1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 #include "sched-int.h"
62 #include "diagnostic.h"
64 enum upper_128bits_state
71 typedef struct block_info_def
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
78 /* TRUE if block has been processed. */
80 /* TRUE if block has been scanned. */
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev;
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88 enum call_avx256_state
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256 = -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256,
94 /* Callee passes 256bit AVX register. */
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
99 /* vzeroupper intrinsic. */
103 /* Check if a 256bit AVX register is referenced in stores. */
106 check_avx256_stores (rtx dest, const_rtx set, void *data)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
110 || (GET_CODE (set) == SET
111 && REG_P (SET_SRC (set))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
114 enum upper_128bits_state *state
115 = (enum upper_128bits_state *) data;
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
124 STATE is state of the upper 128bits of AVX registers at entry. */
127 move_or_delete_vzeroupper_2 (basic_block bb,
128 enum upper_128bits_state state)
131 rtx vzeroupper_insn = NULL_RTX;
136 if (BLOCK_INFO (bb)->unchanged)
139 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
142 BLOCK_INFO (bb)->state = state;
146 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
149 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
150 bb->index, BLOCK_INFO (bb)->state);
154 BLOCK_INFO (bb)->prev = state;
157 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
162 /* BB_END changes when it is deleted. */
163 bb_end = BB_END (bb);
165 while (insn != bb_end)
167 insn = NEXT_INSN (insn);
169 if (!NONDEBUG_INSN_P (insn))
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn) || CALL_P (insn))
175 if (!vzeroupper_insn)
178 if (PREV_INSN (insn) != vzeroupper_insn)
182 fprintf (dump_file, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file, PREV_INSN (insn));
184 fprintf (dump_file, "before:\n");
185 print_rtl_single (dump_file, insn);
187 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
190 vzeroupper_insn = NULL_RTX;
194 pat = PATTERN (insn);
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat) == UNSPEC_VOLATILE
198 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file, "Found vzeroupper:\n");
204 print_rtl_single (dump_file, insn);
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat) == PARALLEL
211 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
217 /* Delete pending vzeroupper insertion. */
220 delete_insn (vzeroupper_insn);
221 vzeroupper_insn = NULL_RTX;
224 else if (state != used)
226 note_stores (pat, check_avx256_stores, &state);
233 /* Process vzeroupper intrinsic. */
234 avx256 = INTVAL (XVECEXP (pat, 0, 0));
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256 == callee_return_avx256)
247 /* Remove unnecessary vzeroupper since upper 128bits are
251 fprintf (dump_file, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file, insn);
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
260 if (avx256 != callee_return_pass_avx256)
263 if (avx256 == callee_return_pass_avx256
264 || avx256 == callee_pass_avx256)
266 /* Must remove vzeroupper since callee passes in 256bit
270 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file, insn);
277 vzeroupper_insn = insn;
283 BLOCK_INFO (bb)->state = state;
284 BLOCK_INFO (bb)->unchanged = unchanged;
285 BLOCK_INFO (bb)->scanned = true;
288 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb->index, unchanged ? "unchanged" : "changed",
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
299 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
303 enum upper_128bits_state state, old_state, new_state;
307 fprintf (dump_file, " Process [bb %i]: status: %d\n",
308 block->index, BLOCK_INFO (block)->processed);
310 if (BLOCK_INFO (block)->processed)
315 /* Check all predecessor edges of this block. */
316 seen_unknown = false;
317 FOR_EACH_EDGE (e, ei, block->preds)
321 switch (BLOCK_INFO (e->src)->state)
324 if (!unknown_is_unused)
338 old_state = BLOCK_INFO (block)->state;
339 move_or_delete_vzeroupper_2 (block, state);
340 new_state = BLOCK_INFO (block)->state;
342 if (state != unknown || new_state == used)
343 BLOCK_INFO (block)->processed = true;
345 /* Need to rescan if the upper 128bits of AVX registers are changed
347 if (new_state != old_state)
349 if (new_state == used)
350 cfun->machine->rescan_vzeroupper_p = 1;
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
362 move_or_delete_vzeroupper (void)
367 fibheap_t worklist, pending, fibheap_swap;
368 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def));
376 /* Process outgoing edges of entry point. */
378 fprintf (dump_file, "Process outgoing edges of entry point\n");
380 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
382 move_or_delete_vzeroupper_2 (e->dest,
383 cfun->machine->caller_pass_avx256_p
385 BLOCK_INFO (e->dest)->processed = true;
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
391 bb_order = XNEWVEC (int, last_basic_block);
392 pre_and_rev_post_order_compute (NULL, rc_order, false);
393 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
394 bb_order[rc_order[i]] = i;
397 worklist = fibheap_new ();
398 pending = fibheap_new ();
399 visited = sbitmap_alloc (last_basic_block);
400 in_worklist = sbitmap_alloc (last_basic_block);
401 in_pending = sbitmap_alloc (last_basic_block);
402 sbitmap_zero (in_worklist);
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending);
407 if (BLOCK_INFO (bb)->processed)
408 RESET_BIT (in_pending, bb->index);
411 move_or_delete_vzeroupper_1 (bb, false);
412 fibheap_insert (pending, bb_order[bb->index], bb);
416 fprintf (dump_file, "Check remaining basic blocks\n");
418 while (!fibheap_empty (pending))
420 fibheap_swap = pending;
422 worklist = fibheap_swap;
423 sbitmap_swap = in_pending;
424 in_pending = in_worklist;
425 in_worklist = sbitmap_swap;
427 sbitmap_zero (visited);
429 cfun->machine->rescan_vzeroupper_p = 0;
431 while (!fibheap_empty (worklist))
433 bb = (basic_block) fibheap_extract_min (worklist);
434 RESET_BIT (in_worklist, bb->index);
435 gcc_assert (!TEST_BIT (visited, bb->index));
436 if (!TEST_BIT (visited, bb->index))
440 SET_BIT (visited, bb->index);
442 if (move_or_delete_vzeroupper_1 (bb, false))
443 FOR_EACH_EDGE (e, ei, bb->succs)
445 if (e->dest == EXIT_BLOCK_PTR
446 || BLOCK_INFO (e->dest)->processed)
449 if (TEST_BIT (visited, e->dest->index))
451 if (!TEST_BIT (in_pending, e->dest->index))
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending, e->dest->index);
455 fibheap_insert (pending,
456 bb_order[e->dest->index],
460 else if (!TEST_BIT (in_worklist, e->dest->index))
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist, e->dest->index);
464 fibheap_insert (worklist, bb_order[e->dest->index],
471 if (!cfun->machine->rescan_vzeroupper_p)
476 fibheap_delete (worklist);
477 fibheap_delete (pending);
478 sbitmap_free (visited);
479 sbitmap_free (in_worklist);
480 sbitmap_free (in_pending);
483 fprintf (dump_file, "Process remaining basic blocks\n");
486 move_or_delete_vzeroupper_1 (bb, true);
488 free_aux_for_blocks ();
491 static rtx legitimize_dllimport_symbol (rtx, bool);
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
512 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
566 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
567 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
581 /* Processor costs (relative to an add) */
583 struct processor_costs i386_cost = { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
636 DUMMY_STRINGOP_ALGS},
637 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs i486_cost = { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
708 DUMMY_STRINGOP_ALGS},
709 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
710 DUMMY_STRINGOP_ALGS},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
725 struct processor_costs pentium_cost = {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
778 DUMMY_STRINGOP_ALGS},
779 {{libcall, {{-1, rep_prefix_4_byte}}},
780 DUMMY_STRINGOP_ALGS},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
795 struct processor_costs pentiumpro_cost = {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
853 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
854 DUMMY_STRINGOP_ALGS},
855 {{rep_prefix_4_byte, {{1024, unrolled_loop},
856 {8192, rep_prefix_4_byte}, {-1, libcall}}},
857 DUMMY_STRINGOP_ALGS},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
872 struct processor_costs geode_cost = {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
926 DUMMY_STRINGOP_ALGS},
927 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
928 DUMMY_STRINGOP_ALGS},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
943 struct processor_costs k6_cost = {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
999 DUMMY_STRINGOP_ALGS},
1000 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1001 DUMMY_STRINGOP_ALGS},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1016 struct processor_costs athlon_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1072 DUMMY_STRINGOP_ALGS},
1073 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1074 DUMMY_STRINGOP_ALGS},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1089 struct processor_costs k8_cost = {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1150 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1151 {{libcall, {{8, loop}, {24, unrolled_loop},
1152 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1153 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1167 struct processor_costs amdfam10_cost = {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1212 MOVD reg64, xmmreg Double FADD 3
1214 MOVD reg32, xmmreg Double FADD 3
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1237 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1238 {{libcall, {{8, loop}, {24, unrolled_loop},
1239 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1240 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1254 struct processor_costs bdver1_cost = {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1299 MOVD reg64, xmmreg Double FADD 3
1301 MOVD reg32, xmmreg Double FADD 3
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1324 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1325 {{libcall, {{8, loop}, {24, unrolled_loop},
1326 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1327 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1341 struct processor_costs bdver2_cost = {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (4), /* SI */
1349 COSTS_N_INSNS (6), /* DI */
1350 COSTS_N_INSNS (6)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1361 4, /* cost for loading QImode using movzbl */
1362 {5, 5, 4}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {5, 5, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {4, 4}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 4}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1386 MOVD reg64, xmmreg Double FADD 3
1388 MOVD reg32, xmmreg Double FADD 3
1390 16, /* size of l1 cache. */
1391 2048, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 /* New AMD processors never drop prefetches; if they cannot be performed
1394 immediately, they are queued. We set number of simultaneous prefetches
1395 to a large constant to reflect this (it probably is not a good idea not
1396 to limit number of prefetches at all, as their execution also takes some
1398 100, /* number of parallel prefetches */
1399 2, /* Branch cost */
1400 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1401 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1402 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1403 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1404 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1405 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1407 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1408 very small blocks it is better to use loop. For large blocks, libcall
1409 can do nontemporary accesses and beat inline considerably. */
1410 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1411 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1412 {{libcall, {{8, loop}, {24, unrolled_loop},
1413 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1414 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1415 6, /* scalar_stmt_cost. */
1416 4, /* scalar load_cost. */
1417 4, /* scalar_store_cost. */
1418 6, /* vec_stmt_cost. */
1419 0, /* vec_to_scalar_cost. */
1420 2, /* scalar_to_vec_cost. */
1421 4, /* vec_align_load_cost. */
1422 4, /* vec_unalign_load_cost. */
1423 4, /* vec_store_cost. */
1424 2, /* cond_taken_branch_cost. */
1425 1, /* cond_not_taken_branch_cost. */
1428 struct processor_costs btver1_cost = {
1429 COSTS_N_INSNS (1), /* cost of an add instruction */
1430 COSTS_N_INSNS (2), /* cost of a lea instruction */
1431 COSTS_N_INSNS (1), /* variable shift costs */
1432 COSTS_N_INSNS (1), /* constant shift costs */
1433 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1434 COSTS_N_INSNS (4), /* HI */
1435 COSTS_N_INSNS (3), /* SI */
1436 COSTS_N_INSNS (4), /* DI */
1437 COSTS_N_INSNS (5)}, /* other */
1438 0, /* cost of multiply per each bit set */
1439 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1440 COSTS_N_INSNS (35), /* HI */
1441 COSTS_N_INSNS (51), /* SI */
1442 COSTS_N_INSNS (83), /* DI */
1443 COSTS_N_INSNS (83)}, /* other */
1444 COSTS_N_INSNS (1), /* cost of movsx */
1445 COSTS_N_INSNS (1), /* cost of movzx */
1446 8, /* "large" insn */
1448 4, /* cost for loading QImode using movzbl */
1449 {3, 4, 3}, /* cost of loading integer registers
1450 in QImode, HImode and SImode.
1451 Relative to reg-reg move (2). */
1452 {3, 4, 3}, /* cost of storing integer registers */
1453 4, /* cost of reg,reg fld/fst */
1454 {4, 4, 12}, /* cost of loading fp registers
1455 in SFmode, DFmode and XFmode */
1456 {6, 6, 8}, /* cost of storing fp registers
1457 in SFmode, DFmode and XFmode */
1458 2, /* cost of moving MMX register */
1459 {3, 3}, /* cost of loading MMX registers
1460 in SImode and DImode */
1461 {4, 4}, /* cost of storing MMX registers
1462 in SImode and DImode */
1463 2, /* cost of moving SSE register */
1464 {4, 4, 3}, /* cost of loading SSE registers
1465 in SImode, DImode and TImode */
1466 {4, 4, 5}, /* cost of storing SSE registers
1467 in SImode, DImode and TImode */
1468 3, /* MMX or SSE register to integer */
1470 MOVD reg64, xmmreg Double FSTORE 4
1471 MOVD reg32, xmmreg Double FSTORE 4
1473 MOVD reg64, xmmreg Double FADD 3
1475 MOVD reg32, xmmreg Double FADD 3
1477 32, /* size of l1 cache. */
1478 512, /* size of l2 cache. */
1479 64, /* size of prefetch block */
1480 100, /* number of parallel prefetches */
1481 2, /* Branch cost */
1482 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1483 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1484 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1485 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1486 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1487 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1489 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1490 very small blocks it is better to use loop. For large blocks, libcall can
1491 do nontemporary accesses and beat inline considerably. */
1492 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1493 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1494 {{libcall, {{8, loop}, {24, unrolled_loop},
1495 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1496 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1497 4, /* scalar_stmt_cost. */
1498 2, /* scalar load_cost. */
1499 2, /* scalar_store_cost. */
1500 6, /* vec_stmt_cost. */
1501 0, /* vec_to_scalar_cost. */
1502 2, /* scalar_to_vec_cost. */
1503 2, /* vec_align_load_cost. */
1504 2, /* vec_unalign_load_cost. */
1505 2, /* vec_store_cost. */
1506 2, /* cond_taken_branch_cost. */
1507 1, /* cond_not_taken_branch_cost. */
1511 struct processor_costs pentium4_cost = {
1512 COSTS_N_INSNS (1), /* cost of an add instruction */
1513 COSTS_N_INSNS (3), /* cost of a lea instruction */
1514 COSTS_N_INSNS (4), /* variable shift costs */
1515 COSTS_N_INSNS (4), /* constant shift costs */
1516 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1517 COSTS_N_INSNS (15), /* HI */
1518 COSTS_N_INSNS (15), /* SI */
1519 COSTS_N_INSNS (15), /* DI */
1520 COSTS_N_INSNS (15)}, /* other */
1521 0, /* cost of multiply per each bit set */
1522 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1523 COSTS_N_INSNS (56), /* HI */
1524 COSTS_N_INSNS (56), /* SI */
1525 COSTS_N_INSNS (56), /* DI */
1526 COSTS_N_INSNS (56)}, /* other */
1527 COSTS_N_INSNS (1), /* cost of movsx */
1528 COSTS_N_INSNS (1), /* cost of movzx */
1529 16, /* "large" insn */
1531 2, /* cost for loading QImode using movzbl */
1532 {4, 5, 4}, /* cost of loading integer registers
1533 in QImode, HImode and SImode.
1534 Relative to reg-reg move (2). */
1535 {2, 3, 2}, /* cost of storing integer registers */
1536 2, /* cost of reg,reg fld/fst */
1537 {2, 2, 6}, /* cost of loading fp registers
1538 in SFmode, DFmode and XFmode */
1539 {4, 4, 6}, /* cost of storing fp registers
1540 in SFmode, DFmode and XFmode */
1541 2, /* cost of moving MMX register */
1542 {2, 2}, /* cost of loading MMX registers
1543 in SImode and DImode */
1544 {2, 2}, /* cost of storing MMX registers
1545 in SImode and DImode */
1546 12, /* cost of moving SSE register */
1547 {12, 12, 12}, /* cost of loading SSE registers
1548 in SImode, DImode and TImode */
1549 {2, 2, 8}, /* cost of storing SSE registers
1550 in SImode, DImode and TImode */
1551 10, /* MMX or SSE register to integer */
1552 8, /* size of l1 cache. */
1553 256, /* size of l2 cache. */
1554 64, /* size of prefetch block */
1555 6, /* number of parallel prefetches */
1556 2, /* Branch cost */
1557 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1558 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1559 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1562 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1563 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1564 DUMMY_STRINGOP_ALGS},
1565 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1567 DUMMY_STRINGOP_ALGS},
1568 1, /* scalar_stmt_cost. */
1569 1, /* scalar load_cost. */
1570 1, /* scalar_store_cost. */
1571 1, /* vec_stmt_cost. */
1572 1, /* vec_to_scalar_cost. */
1573 1, /* scalar_to_vec_cost. */
1574 1, /* vec_align_load_cost. */
1575 2, /* vec_unalign_load_cost. */
1576 1, /* vec_store_cost. */
1577 3, /* cond_taken_branch_cost. */
1578 1, /* cond_not_taken_branch_cost. */
1582 struct processor_costs nocona_cost = {
1583 COSTS_N_INSNS (1), /* cost of an add instruction */
1584 COSTS_N_INSNS (1), /* cost of a lea instruction */
1585 COSTS_N_INSNS (1), /* variable shift costs */
1586 COSTS_N_INSNS (1), /* constant shift costs */
1587 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1588 COSTS_N_INSNS (10), /* HI */
1589 COSTS_N_INSNS (10), /* SI */
1590 COSTS_N_INSNS (10), /* DI */
1591 COSTS_N_INSNS (10)}, /* other */
1592 0, /* cost of multiply per each bit set */
1593 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1594 COSTS_N_INSNS (66), /* HI */
1595 COSTS_N_INSNS (66), /* SI */
1596 COSTS_N_INSNS (66), /* DI */
1597 COSTS_N_INSNS (66)}, /* other */
1598 COSTS_N_INSNS (1), /* cost of movsx */
1599 COSTS_N_INSNS (1), /* cost of movzx */
1600 16, /* "large" insn */
1601 17, /* MOVE_RATIO */
1602 4, /* cost for loading QImode using movzbl */
1603 {4, 4, 4}, /* cost of loading integer registers
1604 in QImode, HImode and SImode.
1605 Relative to reg-reg move (2). */
1606 {4, 4, 4}, /* cost of storing integer registers */
1607 3, /* cost of reg,reg fld/fst */
1608 {12, 12, 12}, /* cost of loading fp registers
1609 in SFmode, DFmode and XFmode */
1610 {4, 4, 4}, /* cost of storing fp registers
1611 in SFmode, DFmode and XFmode */
1612 6, /* cost of moving MMX register */
1613 {12, 12}, /* cost of loading MMX registers
1614 in SImode and DImode */
1615 {12, 12}, /* cost of storing MMX registers
1616 in SImode and DImode */
1617 6, /* cost of moving SSE register */
1618 {12, 12, 12}, /* cost of loading SSE registers
1619 in SImode, DImode and TImode */
1620 {12, 12, 12}, /* cost of storing SSE registers
1621 in SImode, DImode and TImode */
1622 8, /* MMX or SSE register to integer */
1623 8, /* size of l1 cache. */
1624 1024, /* size of l2 cache. */
1625 128, /* size of prefetch block */
1626 8, /* number of parallel prefetches */
1627 1, /* Branch cost */
1628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1629 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1630 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1633 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1634 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1635 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1636 {100000, unrolled_loop}, {-1, libcall}}}},
1637 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1639 {libcall, {{24, loop}, {64, unrolled_loop},
1640 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1641 1, /* scalar_stmt_cost. */
1642 1, /* scalar load_cost. */
1643 1, /* scalar_store_cost. */
1644 1, /* vec_stmt_cost. */
1645 1, /* vec_to_scalar_cost. */
1646 1, /* scalar_to_vec_cost. */
1647 1, /* vec_align_load_cost. */
1648 2, /* vec_unalign_load_cost. */
1649 1, /* vec_store_cost. */
1650 3, /* cond_taken_branch_cost. */
1651 1, /* cond_not_taken_branch_cost. */
1655 struct processor_costs atom_cost = {
1656 COSTS_N_INSNS (1), /* cost of an add instruction */
1657 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1658 COSTS_N_INSNS (1), /* variable shift costs */
1659 COSTS_N_INSNS (1), /* constant shift costs */
1660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1661 COSTS_N_INSNS (4), /* HI */
1662 COSTS_N_INSNS (3), /* SI */
1663 COSTS_N_INSNS (4), /* DI */
1664 COSTS_N_INSNS (2)}, /* other */
1665 0, /* cost of multiply per each bit set */
1666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1667 COSTS_N_INSNS (26), /* HI */
1668 COSTS_N_INSNS (42), /* SI */
1669 COSTS_N_INSNS (74), /* DI */
1670 COSTS_N_INSNS (74)}, /* other */
1671 COSTS_N_INSNS (1), /* cost of movsx */
1672 COSTS_N_INSNS (1), /* cost of movzx */
1673 8, /* "large" insn */
1674 17, /* MOVE_RATIO */
1675 4, /* cost for loading QImode using movzbl */
1676 {4, 4, 4}, /* cost of loading integer registers
1677 in QImode, HImode and SImode.
1678 Relative to reg-reg move (2). */
1679 {4, 4, 4}, /* cost of storing integer registers */
1680 4, /* cost of reg,reg fld/fst */
1681 {12, 12, 12}, /* cost of loading fp registers
1682 in SFmode, DFmode and XFmode */
1683 {6, 6, 8}, /* cost of storing fp registers
1684 in SFmode, DFmode and XFmode */
1685 2, /* cost of moving MMX register */
1686 {8, 8}, /* cost of loading MMX registers
1687 in SImode and DImode */
1688 {8, 8}, /* cost of storing MMX registers
1689 in SImode and DImode */
1690 2, /* cost of moving SSE register */
1691 {8, 8, 8}, /* cost of loading SSE registers
1692 in SImode, DImode and TImode */
1693 {8, 8, 8}, /* cost of storing SSE registers
1694 in SImode, DImode and TImode */
1695 5, /* MMX or SSE register to integer */
1696 32, /* size of l1 cache. */
1697 256, /* size of l2 cache. */
1698 64, /* size of prefetch block */
1699 6, /* number of parallel prefetches */
1700 3, /* Branch cost */
1701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1702 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1703 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1704 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1705 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1706 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1707 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1708 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1709 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1710 {{libcall, {{8, loop}, {15, unrolled_loop},
1711 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1712 {libcall, {{24, loop}, {32, unrolled_loop},
1713 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1714 1, /* scalar_stmt_cost. */
1715 1, /* scalar load_cost. */
1716 1, /* scalar_store_cost. */
1717 1, /* vec_stmt_cost. */
1718 1, /* vec_to_scalar_cost. */
1719 1, /* scalar_to_vec_cost. */
1720 1, /* vec_align_load_cost. */
1721 2, /* vec_unalign_load_cost. */
1722 1, /* vec_store_cost. */
1723 3, /* cond_taken_branch_cost. */
1724 1, /* cond_not_taken_branch_cost. */
1727 /* Generic64 should produce code tuned for Nocona and K8. */
1729 struct processor_costs generic64_cost = {
1730 COSTS_N_INSNS (1), /* cost of an add instruction */
1731 /* On all chips taken into consideration lea is 2 cycles and more. With
1732 this cost however our current implementation of synth_mult results in
1733 use of unnecessary temporary registers causing regression on several
1734 SPECfp benchmarks. */
1735 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1736 COSTS_N_INSNS (1), /* variable shift costs */
1737 COSTS_N_INSNS (1), /* constant shift costs */
1738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1739 COSTS_N_INSNS (4), /* HI */
1740 COSTS_N_INSNS (3), /* SI */
1741 COSTS_N_INSNS (4), /* DI */
1742 COSTS_N_INSNS (2)}, /* other */
1743 0, /* cost of multiply per each bit set */
1744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1745 COSTS_N_INSNS (26), /* HI */
1746 COSTS_N_INSNS (42), /* SI */
1747 COSTS_N_INSNS (74), /* DI */
1748 COSTS_N_INSNS (74)}, /* other */
1749 COSTS_N_INSNS (1), /* cost of movsx */
1750 COSTS_N_INSNS (1), /* cost of movzx */
1751 8, /* "large" insn */
1752 17, /* MOVE_RATIO */
1753 4, /* cost for loading QImode using movzbl */
1754 {4, 4, 4}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {4, 4, 4}, /* cost of storing integer registers */
1758 4, /* cost of reg,reg fld/fst */
1759 {12, 12, 12}, /* cost of loading fp registers
1760 in SFmode, DFmode and XFmode */
1761 {6, 6, 8}, /* cost of storing fp registers
1762 in SFmode, DFmode and XFmode */
1763 2, /* cost of moving MMX register */
1764 {8, 8}, /* cost of loading MMX registers
1765 in SImode and DImode */
1766 {8, 8}, /* cost of storing MMX registers
1767 in SImode and DImode */
1768 2, /* cost of moving SSE register */
1769 {8, 8, 8}, /* cost of loading SSE registers
1770 in SImode, DImode and TImode */
1771 {8, 8, 8}, /* cost of storing SSE registers
1772 in SImode, DImode and TImode */
1773 5, /* MMX or SSE register to integer */
1774 32, /* size of l1 cache. */
1775 512, /* size of l2 cache. */
1776 64, /* size of prefetch block */
1777 6, /* number of parallel prefetches */
1778 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1779 value is increased to perhaps more appropriate value of 5. */
1780 3, /* Branch cost */
1781 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1782 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1783 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1784 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1785 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1786 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1787 {DUMMY_STRINGOP_ALGS,
1788 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1789 {DUMMY_STRINGOP_ALGS,
1790 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1791 1, /* scalar_stmt_cost. */
1792 1, /* scalar load_cost. */
1793 1, /* scalar_store_cost. */
1794 1, /* vec_stmt_cost. */
1795 1, /* vec_to_scalar_cost. */
1796 1, /* scalar_to_vec_cost. */
1797 1, /* vec_align_load_cost. */
1798 2, /* vec_unalign_load_cost. */
1799 1, /* vec_store_cost. */
1800 3, /* cond_taken_branch_cost. */
1801 1, /* cond_not_taken_branch_cost. */
1804 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1807 struct processor_costs generic32_cost = {
1808 COSTS_N_INSNS (1), /* cost of an add instruction */
1809 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1810 COSTS_N_INSNS (1), /* variable shift costs */
1811 COSTS_N_INSNS (1), /* constant shift costs */
1812 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1813 COSTS_N_INSNS (4), /* HI */
1814 COSTS_N_INSNS (3), /* SI */
1815 COSTS_N_INSNS (4), /* DI */
1816 COSTS_N_INSNS (2)}, /* other */
1817 0, /* cost of multiply per each bit set */
1818 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1819 COSTS_N_INSNS (26), /* HI */
1820 COSTS_N_INSNS (42), /* SI */
1821 COSTS_N_INSNS (74), /* DI */
1822 COSTS_N_INSNS (74)}, /* other */
1823 COSTS_N_INSNS (1), /* cost of movsx */
1824 COSTS_N_INSNS (1), /* cost of movzx */
1825 8, /* "large" insn */
1826 17, /* MOVE_RATIO */
1827 4, /* cost for loading QImode using movzbl */
1828 {4, 4, 4}, /* cost of loading integer registers
1829 in QImode, HImode and SImode.
1830 Relative to reg-reg move (2). */
1831 {4, 4, 4}, /* cost of storing integer registers */
1832 4, /* cost of reg,reg fld/fst */
1833 {12, 12, 12}, /* cost of loading fp registers
1834 in SFmode, DFmode and XFmode */
1835 {6, 6, 8}, /* cost of storing fp registers
1836 in SFmode, DFmode and XFmode */
1837 2, /* cost of moving MMX register */
1838 {8, 8}, /* cost of loading MMX registers
1839 in SImode and DImode */
1840 {8, 8}, /* cost of storing MMX registers
1841 in SImode and DImode */
1842 2, /* cost of moving SSE register */
1843 {8, 8, 8}, /* cost of loading SSE registers
1844 in SImode, DImode and TImode */
1845 {8, 8, 8}, /* cost of storing SSE registers
1846 in SImode, DImode and TImode */
1847 5, /* MMX or SSE register to integer */
1848 32, /* size of l1 cache. */
1849 256, /* size of l2 cache. */
1850 64, /* size of prefetch block */
1851 6, /* number of parallel prefetches */
1852 3, /* Branch cost */
1853 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1854 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1855 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1856 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1857 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1858 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1859 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1860 DUMMY_STRINGOP_ALGS},
1861 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1862 DUMMY_STRINGOP_ALGS},
1863 1, /* scalar_stmt_cost. */
1864 1, /* scalar load_cost. */
1865 1, /* scalar_store_cost. */
1866 1, /* vec_stmt_cost. */
1867 1, /* vec_to_scalar_cost. */
1868 1, /* scalar_to_vec_cost. */
1869 1, /* vec_align_load_cost. */
1870 2, /* vec_unalign_load_cost. */
1871 1, /* vec_store_cost. */
1872 3, /* cond_taken_branch_cost. */
1873 1, /* cond_not_taken_branch_cost. */
1876 const struct processor_costs *ix86_cost = &pentium_cost;
1878 /* Processor feature/optimization bitmasks. */
1879 #define m_386 (1<<PROCESSOR_I386)
1880 #define m_486 (1<<PROCESSOR_I486)
1881 #define m_PENT (1<<PROCESSOR_PENTIUM)
1882 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1883 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1884 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1885 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1886 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1887 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1888 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1889 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1890 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1891 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1892 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1893 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1894 #define m_ATOM (1<<PROCESSOR_ATOM)
1896 #define m_GEODE (1<<PROCESSOR_GEODE)
1897 #define m_K6 (1<<PROCESSOR_K6)
1898 #define m_K6_GEODE (m_K6 | m_GEODE)
1899 #define m_K8 (1<<PROCESSOR_K8)
1900 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1903 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1904 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1905 #define m_BDVER (m_BDVER1 | m_BDVER2)
1906 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1907 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1909 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1910 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1912 /* Generic instruction choice should be common subset of supported CPUs
1913 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1914 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1916 /* Feature tests against the various tunings. */
1917 unsigned char ix86_tune_features[X86_TUNE_LAST];
1919 /* Feature tests against the various tunings used to create ix86_tune_features
1920 based on the processor mask. */
1921 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1922 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1923 negatively, so enabling for Generic64 seems like good code size
1924 tradeoff. We can't enable it for 32bit generic because it does not
1925 work well with PPro base chips. */
1926 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1928 /* X86_TUNE_PUSH_MEMORY */
1929 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1931 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1934 /* X86_TUNE_UNROLL_STRLEN */
1935 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1937 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1938 on simulation result. But after P4 was made, no performance benefit
1939 was observed with branch hints. It also increases the code size.
1940 As a result, icc never generates branch hints. */
1943 /* X86_TUNE_DOUBLE_WITH_ADD */
1946 /* X86_TUNE_USE_SAHF */
1947 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
1949 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1950 partial dependencies. */
1951 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1953 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1954 register stalls on Generic32 compilation setting as well. However
1955 in current implementation the partial register stalls are not eliminated
1956 very well - they can be introduced via subregs synthesized by combine
1957 and can happen in caller/callee saving sequences. Because this option
1958 pays back little on PPro based chips and is in conflict with partial reg
1959 dependencies used by Athlon/P4 based chips, it is better to leave it off
1960 for generic32 for now. */
1963 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1964 m_CORE2I7 | m_GENERIC,
1966 /* X86_TUNE_USE_HIMODE_FIOP */
1967 m_386 | m_486 | m_K6_GEODE,
1969 /* X86_TUNE_USE_SIMODE_FIOP */
1970 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
1972 /* X86_TUNE_USE_MOV0 */
1975 /* X86_TUNE_USE_CLTD */
1976 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
1978 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1981 /* X86_TUNE_SPLIT_LONG_MOVES */
1984 /* X86_TUNE_READ_MODIFY_WRITE */
1987 /* X86_TUNE_READ_MODIFY */
1990 /* X86_TUNE_PROMOTE_QIMODE */
1991 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1993 /* X86_TUNE_FAST_PREFIX */
1994 ~(m_386 | m_486 | m_PENT),
1996 /* X86_TUNE_SINGLE_STRINGOP */
1997 m_386 | m_P4_NOCONA,
1999 /* X86_TUNE_QIMODE_MATH */
2002 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2003 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2004 might be considered for Generic32 if our scheme for avoiding partial
2005 stalls was more effective. */
2008 /* X86_TUNE_PROMOTE_QI_REGS */
2011 /* X86_TUNE_PROMOTE_HI_REGS */
2014 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2015 over esp addition. */
2016 m_386 | m_486 | m_PENT | m_PPRO,
2018 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2019 over esp addition. */
2022 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2023 over esp subtraction. */
2024 m_386 | m_486 | m_PENT | m_K6_GEODE,
2026 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2027 over esp subtraction. */
2028 m_PENT | m_K6_GEODE,
2030 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2031 for DFmode copies */
2032 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2034 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2035 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2037 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2038 conflict here in between PPro/Pentium4 based chips that thread 128bit
2039 SSE registers as single units versus K8 based chips that divide SSE
2040 registers to two 64bit halves. This knob promotes all store destinations
2041 to be 128bit to allow register renaming on 128bit SSE units, but usually
2042 results in one extra microop on 64bit SSE units. Experimental results
2043 shows that disabling this option on P4 brings over 20% SPECfp regression,
2044 while enabling it on K8 brings roughly 2.4% regression that can be partly
2045 masked by careful scheduling of moves. */
2046 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2048 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2049 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
2051 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2054 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2057 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2058 are resolved on SSE register parts instead of whole registers, so we may
2059 maintain just lower part of scalar values in proper format leaving the
2060 upper part undefined. */
2063 /* X86_TUNE_SSE_TYPELESS_STORES */
2066 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2067 m_PPRO | m_P4_NOCONA,
2069 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2070 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2072 /* X86_TUNE_PROLOGUE_USING_MOVE */
2073 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2075 /* X86_TUNE_EPILOGUE_USING_MOVE */
2076 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2078 /* X86_TUNE_SHIFT1 */
2081 /* X86_TUNE_USE_FFREEP */
2084 /* X86_TUNE_INTER_UNIT_MOVES */
2085 ~(m_AMD_MULTIPLE | m_GENERIC),
2087 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2088 ~(m_AMDFAM10 | m_BDVER ),
2090 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2091 than 4 branch instructions in the 16 byte window. */
2092 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2094 /* X86_TUNE_SCHEDULE */
2095 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2097 /* X86_TUNE_USE_BT */
2098 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2100 /* X86_TUNE_USE_INCDEC */
2101 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2103 /* X86_TUNE_PAD_RETURNS */
2104 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2106 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2109 /* X86_TUNE_EXT_80387_CONSTANTS */
2110 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2112 /* X86_TUNE_SHORTEN_X87_SSE */
2115 /* X86_TUNE_AVOID_VECTOR_DECODE */
2116 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2118 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2119 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2122 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2123 vector path on AMD machines. */
2124 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2126 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2128 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2130 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2134 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2135 but one byte longer. */
2138 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2139 operand that cannot be represented using a modRM byte. The XOR
2140 replacement is long decoded, so this split helps here as well. */
2143 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2145 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2147 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2148 from integer to FP. */
2151 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2152 with a subsequent conditional jump instruction into a single
2153 compare-and-branch uop. */
2156 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2157 will impact LEA instruction selection. */
2160 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2164 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2165 at -O3. For the moment, the prefetching seems badly tuned for Intel
2167 m_K6_GEODE | m_AMD_MULTIPLE,
2169 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2170 the auto-vectorizer. */
2173 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2174 during reassociation of integer computation. */
2177 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2178 during reassociation of fp computation. */
2182 /* Feature tests against the various architecture variations. */
2183 unsigned char ix86_arch_features[X86_ARCH_LAST];
2185 /* Feature tests against the various architecture variations, used to create
2186 ix86_arch_features based on the processor mask. */
2187 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2188 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2189 ~(m_386 | m_486 | m_PENT | m_K6),
2191 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2194 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2197 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2200 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2204 static const unsigned int x86_accumulate_outgoing_args
2205 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2207 static const unsigned int x86_arch_always_fancy_math_387
2208 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2210 static const unsigned int x86_avx256_split_unaligned_load
2211 = m_COREI7 | m_GENERIC;
2213 static const unsigned int x86_avx256_split_unaligned_store
2214 = m_COREI7 | m_BDVER | m_GENERIC;
2216 /* In case the average insn count for single function invocation is
2217 lower than this constant, emit fast (but longer) prologue and
2219 #define FAST_PROLOGUE_INSN_COUNT 20
2221 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2222 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2223 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2224 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2226 /* Array of the smallest class containing reg number REGNO, indexed by
2227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2229 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2231 /* ax, dx, cx, bx */
2232 AREG, DREG, CREG, BREG,
2233 /* si, di, bp, sp */
2234 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2236 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2237 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2240 /* flags, fpsr, fpcr, frame */
2241 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2243 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2246 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2249 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2250 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2251 /* SSE REX registers */
2252 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2256 /* The "default" register map used in 32bit mode. */
2258 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2260 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2261 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2262 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2263 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2264 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2269 /* The "default" register map used in 64bit mode. */
2271 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2273 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2274 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2275 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2276 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2277 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2278 8,9,10,11,12,13,14,15, /* extended integer registers */
2279 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2282 /* Define the register numbers to be used in Dwarf debugging information.
2283 The SVR4 reference port C compiler uses the following register numbers
2284 in its Dwarf output code:
2285 0 for %eax (gcc regno = 0)
2286 1 for %ecx (gcc regno = 2)
2287 2 for %edx (gcc regno = 1)
2288 3 for %ebx (gcc regno = 3)
2289 4 for %esp (gcc regno = 7)
2290 5 for %ebp (gcc regno = 6)
2291 6 for %esi (gcc regno = 4)
2292 7 for %edi (gcc regno = 5)
2293 The following three DWARF register numbers are never generated by
2294 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2295 believes these numbers have these meanings.
2296 8 for %eip (no gcc equivalent)
2297 9 for %eflags (gcc regno = 17)
2298 10 for %trapno (no gcc equivalent)
2299 It is not at all clear how we should number the FP stack registers
2300 for the x86 architecture. If the version of SDB on x86/svr4 were
2301 a bit less brain dead with respect to floating-point then we would
2302 have a precedent to follow with respect to DWARF register numbers
2303 for x86 FP registers, but the SDB on x86/svr4 is so completely
2304 broken with respect to FP registers that it is hardly worth thinking
2305 of it as something to strive for compatibility with.
2306 The version of x86/svr4 SDB I have at the moment does (partially)
2307 seem to believe that DWARF register number 11 is associated with
2308 the x86 register %st(0), but that's about all. Higher DWARF
2309 register numbers don't seem to be associated with anything in
2310 particular, and even for DWARF regno 11, SDB only seems to under-
2311 stand that it should say that a variable lives in %st(0) (when
2312 asked via an `=' command) if we said it was in DWARF regno 11,
2313 but SDB still prints garbage when asked for the value of the
2314 variable in question (via a `/' command).
2315 (Also note that the labels SDB prints for various FP stack regs
2316 when doing an `x' command are all wrong.)
2317 Note that these problems generally don't affect the native SVR4
2318 C compiler because it doesn't allow the use of -O with -g and
2319 because when it is *not* optimizing, it allocates a memory
2320 location for each floating-point variable, and the memory
2321 location is what gets described in the DWARF AT_location
2322 attribute for the variable in question.
2323 Regardless of the severe mental illness of the x86/svr4 SDB, we
2324 do something sensible here and we use the following DWARF
2325 register numbers. Note that these are all stack-top-relative
2327 11 for %st(0) (gcc regno = 8)
2328 12 for %st(1) (gcc regno = 9)
2329 13 for %st(2) (gcc regno = 10)
2330 14 for %st(3) (gcc regno = 11)
2331 15 for %st(4) (gcc regno = 12)
2332 16 for %st(5) (gcc regno = 13)
2333 17 for %st(6) (gcc regno = 14)
2334 18 for %st(7) (gcc regno = 15)
2336 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2338 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2339 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2340 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2341 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2342 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2343 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2344 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2347 /* Define parameter passing and return registers. */
2349 static int const x86_64_int_parameter_registers[6] =
2351 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2354 static int const x86_64_ms_abi_int_parameter_registers[4] =
2356 CX_REG, DX_REG, R8_REG, R9_REG
2359 static int const x86_64_int_return_registers[4] =
2361 AX_REG, DX_REG, DI_REG, SI_REG
2364 /* Define the structure for the machine field in struct function. */
2366 struct GTY(()) stack_local_entry {
2367 unsigned short mode;
2370 struct stack_local_entry *next;
2373 /* Structure describing stack frame layout.
2374 Stack grows downward:
2380 saved static chain if ix86_static_chain_on_stack
2382 saved frame pointer if frame_pointer_needed
2383 <- HARD_FRAME_POINTER
2389 <- sse_regs_save_offset
2392 [va_arg registers] |
2396 [padding2] | = to_allocate
2405 int outgoing_arguments_size;
2406 HOST_WIDE_INT frame;
2408 /* The offsets relative to ARG_POINTER. */
2409 HOST_WIDE_INT frame_pointer_offset;
2410 HOST_WIDE_INT hard_frame_pointer_offset;
2411 HOST_WIDE_INT stack_pointer_offset;
2412 HOST_WIDE_INT hfp_save_offset;
2413 HOST_WIDE_INT reg_save_offset;
2414 HOST_WIDE_INT sse_reg_save_offset;
2416 /* When save_regs_using_mov is set, emit prologue using
2417 move instead of push instructions. */
2418 bool save_regs_using_mov;
2421 /* Which cpu are we scheduling for. */
2422 enum attr_cpu ix86_schedule;
2424 /* Which cpu are we optimizing for. */
2425 enum processor_type ix86_tune;
2427 /* Which instruction set architecture to use. */
2428 enum processor_type ix86_arch;
2430 /* true if sse prefetch instruction is not NOOP. */
2431 int x86_prefetch_sse;
2433 /* -mstackrealign option */
2434 static const char ix86_force_align_arg_pointer_string[]
2435 = "force_align_arg_pointer";
2437 static rtx (*ix86_gen_leave) (void);
2438 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2439 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2440 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2441 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2442 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2443 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2444 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2445 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2446 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2448 /* Preferred alignment for stack boundary in bits. */
2449 unsigned int ix86_preferred_stack_boundary;
2451 /* Alignment for incoming stack boundary in bits specified at
2453 static unsigned int ix86_user_incoming_stack_boundary;
2455 /* Default alignment for incoming stack boundary in bits. */
2456 static unsigned int ix86_default_incoming_stack_boundary;
2458 /* Alignment for incoming stack boundary in bits. */
2459 unsigned int ix86_incoming_stack_boundary;
2461 /* Calling abi specific va_list type nodes. */
2462 static GTY(()) tree sysv_va_list_type_node;
2463 static GTY(()) tree ms_va_list_type_node;
2465 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2466 char internal_label_prefix[16];
2467 int internal_label_prefix_len;
2469 /* Fence to use after loop using movnt. */
2472 /* Register class used for passing given 64bit part of the argument.
2473 These represent classes as documented by the PS ABI, with the exception
2474 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2475 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2477 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2478 whenever possible (upper half does contain padding). */
2479 enum x86_64_reg_class
2482 X86_64_INTEGER_CLASS,
2483 X86_64_INTEGERSI_CLASS,
2490 X86_64_COMPLEX_X87_CLASS,
2494 #define MAX_CLASSES 4
2496 /* Table of constants used by fldpi, fldln2, etc.... */
2497 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2498 static bool ext_80387_constants_init = 0;
2501 static struct machine_function * ix86_init_machine_status (void);
2502 static rtx ix86_function_value (const_tree, const_tree, bool);
2503 static bool ix86_function_value_regno_p (const unsigned int);
2504 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2506 static rtx ix86_static_chain (const_tree, bool);
2507 static int ix86_function_regparm (const_tree, const_tree);
2508 static void ix86_compute_frame_layout (struct ix86_frame *);
2509 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2511 static void ix86_add_new_builtins (HOST_WIDE_INT);
2512 static tree ix86_canonical_va_list_type (tree);
2513 static void predict_jump (int);
2514 static unsigned int split_stack_prologue_scratch_regno (void);
2515 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2517 enum ix86_function_specific_strings
2519 IX86_FUNCTION_SPECIFIC_ARCH,
2520 IX86_FUNCTION_SPECIFIC_TUNE,
2521 IX86_FUNCTION_SPECIFIC_MAX
2524 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2525 const char *, enum fpmath_unit, bool);
2526 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2527 static void ix86_function_specific_save (struct cl_target_option *);
2528 static void ix86_function_specific_restore (struct cl_target_option *);
2529 static void ix86_function_specific_print (FILE *, int,
2530 struct cl_target_option *);
2531 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2532 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2533 struct gcc_options *);
2534 static bool ix86_can_inline_p (tree, tree);
2535 static void ix86_set_current_function (tree);
2536 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2538 static enum calling_abi ix86_function_abi (const_tree);
2541 #ifndef SUBTARGET32_DEFAULT_CPU
2542 #define SUBTARGET32_DEFAULT_CPU "i386"
2545 /* The svr4 ABI for the i386 says that records and unions are returned
2547 #ifndef DEFAULT_PCC_STRUCT_RETURN
2548 #define DEFAULT_PCC_STRUCT_RETURN 1
2551 /* Whether -mtune= or -march= were specified */
2552 static int ix86_tune_defaulted;
2553 static int ix86_arch_specified;
2555 /* Vectorization library interface and handlers. */
2556 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2558 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2559 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2561 /* Processor target table, indexed by processor number */
2564 const struct processor_costs *cost; /* Processor costs */
2565 const int align_loop; /* Default alignments. */
2566 const int align_loop_max_skip;
2567 const int align_jump;
2568 const int align_jump_max_skip;
2569 const int align_func;
2572 static const struct ptt processor_target_table[PROCESSOR_max] =
2574 {&i386_cost, 4, 3, 4, 3, 4},
2575 {&i486_cost, 16, 15, 16, 15, 16},
2576 {&pentium_cost, 16, 7, 16, 7, 16},
2577 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2578 {&geode_cost, 0, 0, 0, 0, 0},
2579 {&k6_cost, 32, 7, 32, 7, 32},
2580 {&athlon_cost, 16, 7, 16, 7, 16},
2581 {&pentium4_cost, 0, 0, 0, 0, 0},
2582 {&k8_cost, 16, 7, 16, 7, 16},
2583 {&nocona_cost, 0, 0, 0, 0, 0},
2584 /* Core 2 32-bit. */
2585 {&generic32_cost, 16, 10, 16, 10, 16},
2586 /* Core 2 64-bit. */
2587 {&generic64_cost, 16, 10, 16, 10, 16},
2588 /* Core i7 32-bit. */
2589 {&generic32_cost, 16, 10, 16, 10, 16},
2590 /* Core i7 64-bit. */
2591 {&generic64_cost, 16, 10, 16, 10, 16},
2592 {&generic32_cost, 16, 7, 16, 7, 16},
2593 {&generic64_cost, 16, 10, 16, 10, 16},
2594 {&amdfam10_cost, 32, 24, 32, 7, 32},
2595 {&bdver1_cost, 32, 24, 32, 7, 32},
2596 {&bdver2_cost, 32, 24, 32, 7, 32},
2597 {&btver1_cost, 32, 24, 32, 7, 32},
2598 {&atom_cost, 16, 15, 16, 7, 16}
2601 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2631 /* Return true if a red-zone is in use. */
2634 ix86_using_red_zone (void)
2636 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2639 /* Return a string that documents the current -m options. The caller is
2640 responsible for freeing the string. */
2643 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2644 const char *tune, enum fpmath_unit fpmath,
2647 struct ix86_target_opts
2649 const char *option; /* option string */
2650 HOST_WIDE_INT mask; /* isa mask options */
2653 /* This table is ordered so that options like -msse4.2 that imply
2654 preceding options while match those first. */
2655 static struct ix86_target_opts isa_opts[] =
2657 { "-m64", OPTION_MASK_ISA_64BIT },
2658 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2659 { "-mfma", OPTION_MASK_ISA_FMA },
2660 { "-mxop", OPTION_MASK_ISA_XOP },
2661 { "-mlwp", OPTION_MASK_ISA_LWP },
2662 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2663 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2664 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2665 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2666 { "-msse3", OPTION_MASK_ISA_SSE3 },
2667 { "-msse2", OPTION_MASK_ISA_SSE2 },
2668 { "-msse", OPTION_MASK_ISA_SSE },
2669 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2670 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2671 { "-mmmx", OPTION_MASK_ISA_MMX },
2672 { "-mabm", OPTION_MASK_ISA_ABM },
2673 { "-mbmi", OPTION_MASK_ISA_BMI },
2674 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2675 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2676 { "-mtbm", OPTION_MASK_ISA_TBM },
2677 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2678 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2679 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2680 { "-maes", OPTION_MASK_ISA_AES },
2681 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2682 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2683 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2684 { "-mf16c", OPTION_MASK_ISA_F16C },
2688 static struct ix86_target_opts flag_opts[] =
2690 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2691 { "-m80387", MASK_80387 },
2692 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2693 { "-malign-double", MASK_ALIGN_DOUBLE },
2694 { "-mcld", MASK_CLD },
2695 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2696 { "-mieee-fp", MASK_IEEE_FP },
2697 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2698 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2699 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2700 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2701 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2702 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2703 { "-mno-red-zone", MASK_NO_RED_ZONE },
2704 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2705 { "-mrecip", MASK_RECIP },
2706 { "-mrtd", MASK_RTD },
2707 { "-msseregparm", MASK_SSEREGPARM },
2708 { "-mstack-arg-probe", MASK_STACK_PROBE },
2709 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2710 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2711 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2712 { "-mvzeroupper", MASK_VZEROUPPER },
2713 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2714 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2715 { "-mprefer-avx128", MASK_PREFER_AVX128},
2718 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2721 char target_other[40];
2730 memset (opts, '\0', sizeof (opts));
2732 /* Add -march= option. */
2735 opts[num][0] = "-march=";
2736 opts[num++][1] = arch;
2739 /* Add -mtune= option. */
2742 opts[num][0] = "-mtune=";
2743 opts[num++][1] = tune;
2746 /* Pick out the options in isa options. */
2747 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2749 if ((isa & isa_opts[i].mask) != 0)
2751 opts[num++][0] = isa_opts[i].option;
2752 isa &= ~ isa_opts[i].mask;
2756 if (isa && add_nl_p)
2758 opts[num++][0] = isa_other;
2759 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2763 /* Add flag options. */
2764 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2766 if ((flags & flag_opts[i].mask) != 0)
2768 opts[num++][0] = flag_opts[i].option;
2769 flags &= ~ flag_opts[i].mask;
2773 if (flags && add_nl_p)
2775 opts[num++][0] = target_other;
2776 sprintf (target_other, "(other flags: %#x)", flags);
2779 /* Add -fpmath= option. */
2782 opts[num][0] = "-mfpmath=";
2783 switch ((int) fpmath)
2786 opts[num++][1] = "387";
2790 opts[num++][1] = "sse";
2793 case FPMATH_387 | FPMATH_SSE:
2794 opts[num++][1] = "sse+387";
2806 gcc_assert (num < ARRAY_SIZE (opts));
2808 /* Size the string. */
2810 sep_len = (add_nl_p) ? 3 : 1;
2811 for (i = 0; i < num; i++)
2814 for (j = 0; j < 2; j++)
2816 len += strlen (opts[i][j]);
2819 /* Build the string. */
2820 ret = ptr = (char *) xmalloc (len);
2823 for (i = 0; i < num; i++)
2827 for (j = 0; j < 2; j++)
2828 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2835 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2843 for (j = 0; j < 2; j++)
2846 memcpy (ptr, opts[i][j], len2[j]);
2848 line_len += len2[j];
2853 gcc_assert (ret + len >= ptr);
2858 /* Return true, if profiling code should be emitted before
2859 prologue. Otherwise it returns false.
2860 Note: For x86 with "hotfix" it is sorried. */
2862 ix86_profile_before_prologue (void)
2864 return flag_fentry != 0;
2867 /* Function that is callable from the debugger to print the current
2870 ix86_debug_options (void)
2872 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2873 ix86_arch_string, ix86_tune_string,
2878 fprintf (stderr, "%s\n\n", opts);
2882 fputs ("<no options>\n\n", stderr);
2887 /* Override various settings based on options. If MAIN_ARGS_P, the
2888 options are from the command line, otherwise they are from
2892 ix86_option_override_internal (bool main_args_p)
2895 unsigned int ix86_arch_mask, ix86_tune_mask;
2896 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2901 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2902 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2903 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2904 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2905 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2906 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2907 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2908 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2909 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2910 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2911 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2912 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2913 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2914 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2915 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2916 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2917 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2918 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2919 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2920 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2921 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2922 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2923 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2924 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2925 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2926 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2927 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2928 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2929 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2930 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2931 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2932 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2933 /* if this reaches 64, need to widen struct pta flags below */
2937 const char *const name; /* processor name or nickname. */
2938 const enum processor_type processor;
2939 const enum attr_cpu schedule;
2940 const unsigned HOST_WIDE_INT flags;
2942 const processor_alias_table[] =
2944 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2945 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2946 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2947 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2948 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2949 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2950 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2951 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2952 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2953 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2954 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2955 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2956 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2958 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2960 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2961 PTA_MMX | PTA_SSE | PTA_SSE2},
2962 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2963 PTA_MMX |PTA_SSE | PTA_SSE2},
2964 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2965 PTA_MMX | PTA_SSE | PTA_SSE2},
2966 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2967 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2968 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2969 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2970 | PTA_CX16 | PTA_NO_SAHF},
2971 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2972 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2973 | PTA_SSSE3 | PTA_CX16},
2974 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2975 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2976 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
2977 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2978 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2979 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2980 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2981 {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
2982 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2983 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2984 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2985 | PTA_RDRND | PTA_F16C},
2986 {"core-avx2", PROCESSOR_COREI7_64, CPU_COREI7,
2987 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2988 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
2989 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2990 | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
2991 | PTA_FMA | PTA_MOVBE},
2992 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2993 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2994 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2995 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2996 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2997 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2998 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2999 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3000 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3001 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3002 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3003 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3004 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3005 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3006 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3007 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3008 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3009 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3010 {"x86-64", PROCESSOR_K8, CPU_K8,
3011 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3012 {"k8", PROCESSOR_K8, CPU_K8,
3013 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3014 | PTA_SSE2 | PTA_NO_SAHF},
3015 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3016 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3017 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3018 {"opteron", PROCESSOR_K8, CPU_K8,
3019 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3020 | PTA_SSE2 | PTA_NO_SAHF},
3021 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3022 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3023 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3024 {"athlon64", PROCESSOR_K8, CPU_K8,
3025 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3026 | PTA_SSE2 | PTA_NO_SAHF},
3027 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3028 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3029 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3030 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3031 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3032 | PTA_SSE2 | PTA_NO_SAHF},
3033 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3034 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3035 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3036 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3037 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3038 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3039 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3040 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3041 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3042 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3043 | PTA_XOP | PTA_LWP},
3044 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3045 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3046 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3047 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3048 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3050 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3051 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3052 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3053 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3054 0 /* flags are only used for -march switch. */ },
3055 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3056 PTA_64BIT /* flags are only used for -march switch. */ },
3059 /* -mrecip options. */
3062 const char *string; /* option name */
3063 unsigned int mask; /* mask bits to set */
3065 const recip_options[] =
3067 { "all", RECIP_MASK_ALL },
3068 { "none", RECIP_MASK_NONE },
3069 { "div", RECIP_MASK_DIV },
3070 { "sqrt", RECIP_MASK_SQRT },
3071 { "vec-div", RECIP_MASK_VEC_DIV },
3072 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3075 int const pta_size = ARRAY_SIZE (processor_alias_table);
3077 /* Set up prefix/suffix so the error messages refer to either the command
3078 line argument, or the attribute(target). */
3087 prefix = "option(\"";
3092 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3093 SUBTARGET_OVERRIDE_OPTIONS;
3096 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3097 SUBSUBTARGET_OVERRIDE_OPTIONS;
3101 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3103 /* -fPIC is the default for x86_64. */
3104 if (TARGET_MACHO && TARGET_64BIT)
3107 /* Need to check -mtune=generic first. */
3108 if (ix86_tune_string)
3110 if (!strcmp (ix86_tune_string, "generic")
3111 || !strcmp (ix86_tune_string, "i686")
3112 /* As special support for cross compilers we read -mtune=native
3113 as -mtune=generic. With native compilers we won't see the
3114 -mtune=native, as it was changed by the driver. */
3115 || !strcmp (ix86_tune_string, "native"))
3118 ix86_tune_string = "generic64";
3120 ix86_tune_string = "generic32";
3122 /* If this call is for setting the option attribute, allow the
3123 generic32/generic64 that was previously set. */
3124 else if (!main_args_p
3125 && (!strcmp (ix86_tune_string, "generic32")
3126 || !strcmp (ix86_tune_string, "generic64")))
3128 else if (!strncmp (ix86_tune_string, "generic", 7))
3129 error ("bad value (%s) for %stune=%s %s",
3130 ix86_tune_string, prefix, suffix, sw);
3131 else if (!strcmp (ix86_tune_string, "x86-64"))
3132 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3133 "%stune=k8%s or %stune=generic%s instead as appropriate",
3134 prefix, suffix, prefix, suffix, prefix, suffix);
3138 if (ix86_arch_string)
3139 ix86_tune_string = ix86_arch_string;
3140 if (!ix86_tune_string)
3142 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3143 ix86_tune_defaulted = 1;
3146 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3147 need to use a sensible tune option. */
3148 if (!strcmp (ix86_tune_string, "generic")
3149 || !strcmp (ix86_tune_string, "x86-64")
3150 || !strcmp (ix86_tune_string, "i686"))
3153 ix86_tune_string = "generic64";
3155 ix86_tune_string = "generic32";
3159 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3161 /* rep; movq isn't available in 32-bit code. */
3162 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3163 ix86_stringop_alg = no_stringop;
3166 if (!ix86_arch_string)
3167 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3169 ix86_arch_specified = 1;
3171 if (!global_options_set.x_ix86_abi)
3172 ix86_abi = DEFAULT_ABI;
3174 if (global_options_set.x_ix86_cmodel)
3176 switch (ix86_cmodel)
3181 ix86_cmodel = CM_SMALL_PIC;
3183 error ("code model %qs not supported in the %s bit mode",
3190 ix86_cmodel = CM_MEDIUM_PIC;
3192 error ("code model %qs not supported in the %s bit mode",
3194 else if (TARGET_X32)
3195 error ("code model %qs not supported in x32 mode",
3202 ix86_cmodel = CM_LARGE_PIC;
3204 error ("code model %qs not supported in the %s bit mode",
3206 else if (TARGET_X32)
3207 error ("code model %qs not supported in x32 mode",
3213 error ("code model %s does not support PIC mode", "32");
3215 error ("code model %qs not supported in the %s bit mode",
3222 error ("code model %s does not support PIC mode", "kernel");
3223 ix86_cmodel = CM_32;
3226 error ("code model %qs not supported in the %s bit mode",
3236 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3237 use of rip-relative addressing. This eliminates fixups that
3238 would otherwise be needed if this object is to be placed in a
3239 DLL, and is essentially just as efficient as direct addressing. */
3240 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3241 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3242 else if (TARGET_64BIT)
3243 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3245 ix86_cmodel = CM_32;
3247 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3249 error ("-masm=intel not supported in this configuration");
3250 ix86_asm_dialect = ASM_ATT;
3252 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3253 sorry ("%i-bit mode not compiled in",
3254 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3256 for (i = 0; i < pta_size; i++)
3257 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3259 ix86_schedule = processor_alias_table[i].schedule;
3260 ix86_arch = processor_alias_table[i].processor;
3261 /* Default cpu tuning to the architecture. */
3262 ix86_tune = ix86_arch;
3264 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3265 error ("CPU you selected does not support x86-64 "
3268 if (processor_alias_table[i].flags & PTA_MMX
3269 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3270 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3271 if (processor_alias_table[i].flags & PTA_3DNOW
3272 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3273 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3274 if (processor_alias_table[i].flags & PTA_3DNOW_A
3275 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3276 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3277 if (processor_alias_table[i].flags & PTA_SSE
3278 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3279 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3280 if (processor_alias_table[i].flags & PTA_SSE2
3281 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3282 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3283 if (processor_alias_table[i].flags & PTA_SSE3
3284 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3285 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3286 if (processor_alias_table[i].flags & PTA_SSSE3
3287 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3288 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3289 if (processor_alias_table[i].flags & PTA_SSE4_1
3290 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3291 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3292 if (processor_alias_table[i].flags & PTA_SSE4_2
3293 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3294 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3295 if (processor_alias_table[i].flags & PTA_AVX
3296 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3297 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3298 if (processor_alias_table[i].flags & PTA_AVX2
3299 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3300 ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3301 if (processor_alias_table[i].flags & PTA_FMA
3302 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3303 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3304 if (processor_alias_table[i].flags & PTA_SSE4A
3305 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3306 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3307 if (processor_alias_table[i].flags & PTA_FMA4
3308 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3309 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3310 if (processor_alias_table[i].flags & PTA_XOP
3311 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3312 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3313 if (processor_alias_table[i].flags & PTA_LWP
3314 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3315 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3316 if (processor_alias_table[i].flags & PTA_ABM
3317 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3318 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3319 if (processor_alias_table[i].flags & PTA_BMI
3320 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3321 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3322 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3323 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3324 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3325 if (processor_alias_table[i].flags & PTA_TBM
3326 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3327 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3328 if (processor_alias_table[i].flags & PTA_BMI2
3329 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3330 ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3331 if (processor_alias_table[i].flags & PTA_CX16
3332 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3333 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3334 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3335 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3336 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3337 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3338 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3339 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3340 if (processor_alias_table[i].flags & PTA_MOVBE
3341 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3342 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3343 if (processor_alias_table[i].flags & PTA_AES
3344 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3345 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3346 if (processor_alias_table[i].flags & PTA_PCLMUL
3347 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3348 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3349 if (processor_alias_table[i].flags & PTA_FSGSBASE
3350 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3351 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3352 if (processor_alias_table[i].flags & PTA_RDRND
3353 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3354 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3355 if (processor_alias_table[i].flags & PTA_F16C
3356 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3357 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3358 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3359 x86_prefetch_sse = true;
3364 if (!strcmp (ix86_arch_string, "generic"))
3365 error ("generic CPU can be used only for %stune=%s %s",
3366 prefix, suffix, sw);
3367 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3368 error ("bad value (%s) for %sarch=%s %s",
3369 ix86_arch_string, prefix, suffix, sw);
3371 ix86_arch_mask = 1u << ix86_arch;
3372 for (i = 0; i < X86_ARCH_LAST; ++i)
3373 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3375 for (i = 0; i < pta_size; i++)
3376 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3378 ix86_schedule = processor_alias_table[i].schedule;
3379 ix86_tune = processor_alias_table[i].processor;
3382 if (!(processor_alias_table[i].flags & PTA_64BIT))
3384 if (ix86_tune_defaulted)
3386 ix86_tune_string = "x86-64";
3387 for (i = 0; i < pta_size; i++)
3388 if (! strcmp (ix86_tune_string,
3389 processor_alias_table[i].name))
3391 ix86_schedule = processor_alias_table[i].schedule;
3392 ix86_tune = processor_alias_table[i].processor;
3395 error ("CPU you selected does not support x86-64 "
3401 /* Adjust tuning when compiling for 32-bit ABI. */
3404 case PROCESSOR_GENERIC64:
3405 ix86_tune = PROCESSOR_GENERIC32;
3406 ix86_schedule = CPU_PENTIUMPRO;
3409 case PROCESSOR_CORE2_64:
3410 ix86_tune = PROCESSOR_CORE2_32;
3413 case PROCESSOR_COREI7_64:
3414 ix86_tune = PROCESSOR_COREI7_32;
3421 /* Intel CPUs have always interpreted SSE prefetch instructions as
3422 NOPs; so, we can enable SSE prefetch instructions even when
3423 -mtune (rather than -march) points us to a processor that has them.
3424 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3425 higher processors. */
3427 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3428 x86_prefetch_sse = true;
3432 if (ix86_tune_specified && i == pta_size)
3433 error ("bad value (%s) for %stune=%s %s",
3434 ix86_tune_string, prefix, suffix, sw);
3436 ix86_tune_mask = 1u << ix86_tune;
3437 for (i = 0; i < X86_TUNE_LAST; ++i)
3438 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3440 #ifndef USE_IX86_FRAME_POINTER
3441 #define USE_IX86_FRAME_POINTER 0
3444 #ifndef USE_X86_64_FRAME_POINTER
3445 #define USE_X86_64_FRAME_POINTER 0
3448 /* Set the default values for switches whose default depends on TARGET_64BIT
3449 in case they weren't overwritten by command line options. */
3452 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3453 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3454 if (flag_asynchronous_unwind_tables == 2)
3455 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3456 if (flag_pcc_struct_return == 2)
3457 flag_pcc_struct_return = 0;
3461 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3462 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3463 if (flag_asynchronous_unwind_tables == 2)
3464 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3465 if (flag_pcc_struct_return == 2)
3466 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3470 ix86_cost = &ix86_size_cost;
3472 ix86_cost = processor_target_table[ix86_tune].cost;
3474 /* Arrange to set up i386_stack_locals for all functions. */
3475 init_machine_status = ix86_init_machine_status;
3477 /* Validate -mregparm= value. */
3478 if (global_options_set.x_ix86_regparm)
3481 warning (0, "-mregparm is ignored in 64-bit mode");
3482 if (ix86_regparm > REGPARM_MAX)
3484 error ("-mregparm=%d is not between 0 and %d",
3485 ix86_regparm, REGPARM_MAX);
3490 ix86_regparm = REGPARM_MAX;
3492 /* Default align_* from the processor table. */
3493 if (align_loops == 0)
3495 align_loops = processor_target_table[ix86_tune].align_loop;
3496 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3498 if (align_jumps == 0)
3500 align_jumps = processor_target_table[ix86_tune].align_jump;
3501 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3503 if (align_functions == 0)
3505 align_functions = processor_target_table[ix86_tune].align_func;
3508 /* Provide default for -mbranch-cost= value. */
3509 if (!global_options_set.x_ix86_branch_cost)
3510 ix86_branch_cost = ix86_cost->branch_cost;
3514 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3516 /* Enable by default the SSE and MMX builtins. Do allow the user to
3517 explicitly disable any of these. In particular, disabling SSE and
3518 MMX for kernel code is extremely useful. */
3519 if (!ix86_arch_specified)
3521 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3522 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3525 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3529 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3531 if (!ix86_arch_specified)
3533 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3535 /* i386 ABI does not specify red zone. It still makes sense to use it
3536 when programmer takes care to stack from being destroyed. */
3537 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3538 target_flags |= MASK_NO_RED_ZONE;
3541 /* Keep nonleaf frame pointers. */
3542 if (flag_omit_frame_pointer)
3543 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3544 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3545 flag_omit_frame_pointer = 1;
3547 /* If we're doing fast math, we don't care about comparison order
3548 wrt NaNs. This lets us use a shorter comparison sequence. */
3549 if (flag_finite_math_only)
3550 target_flags &= ~MASK_IEEE_FP;
3552 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3553 since the insns won't need emulation. */
3554 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3555 target_flags &= ~MASK_NO_FANCY_MATH_387;
3557 /* Likewise, if the target doesn't have a 387, or we've specified
3558 software floating point, don't use 387 inline intrinsics. */
3560 target_flags |= MASK_NO_FANCY_MATH_387;
3562 /* Turn on MMX builtins for -msse. */
3565 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3566 x86_prefetch_sse = true;
3569 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3570 if (TARGET_SSE4_2 || TARGET_ABM)
3571 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3573 /* Turn on lzcnt instruction for -mabm. */
3575 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
3577 /* Validate -mpreferred-stack-boundary= value or default it to
3578 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3579 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3580 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3582 int min = (TARGET_64BIT ? 4 : 2);
3583 int max = (TARGET_SEH ? 4 : 12);
3585 if (ix86_preferred_stack_boundary_arg < min
3586 || ix86_preferred_stack_boundary_arg > max)
3589 error ("-mpreferred-stack-boundary is not supported "
3592 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3593 ix86_preferred_stack_boundary_arg, min, max);
3596 ix86_preferred_stack_boundary
3597 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3600 /* Set the default value for -mstackrealign. */
3601 if (ix86_force_align_arg_pointer == -1)
3602 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3604 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3606 /* Validate -mincoming-stack-boundary= value or default it to
3607 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3608 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3609 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3611 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3612 || ix86_incoming_stack_boundary_arg > 12)
3613 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3614 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3617 ix86_user_incoming_stack_boundary
3618 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3619 ix86_incoming_stack_boundary
3620 = ix86_user_incoming_stack_boundary;
3624 /* Accept -msseregparm only if at least SSE support is enabled. */
3625 if (TARGET_SSEREGPARM
3627 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3629 if (global_options_set.x_ix86_fpmath)
3631 if (ix86_fpmath & FPMATH_SSE)
3635 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3636 ix86_fpmath = FPMATH_387;
3638 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3640 warning (0, "387 instruction set disabled, using SSE arithmetics");
3641 ix86_fpmath = FPMATH_SSE;
3646 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3648 /* If the i387 is disabled, then do not return values in it. */
3650 target_flags &= ~MASK_FLOAT_RETURNS;
3652 /* Use external vectorized library in vectorizing intrinsics. */
3653 if (global_options_set.x_ix86_veclibabi_type)
3654 switch (ix86_veclibabi_type)
3656 case ix86_veclibabi_type_svml:
3657 ix86_veclib_handler = ix86_veclibabi_svml;
3660 case ix86_veclibabi_type_acml:
3661 ix86_veclib_handler = ix86_veclibabi_acml;
3668 if ((!USE_IX86_FRAME_POINTER
3669 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3670 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3672 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3674 /* ??? Unwind info is not correct around the CFG unless either a frame
3675 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3676 unwind info generation to be aware of the CFG and propagating states
3678 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3679 || flag_exceptions || flag_non_call_exceptions)
3680 && flag_omit_frame_pointer
3681 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3683 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3684 warning (0, "unwind tables currently require either a frame pointer "
3685 "or %saccumulate-outgoing-args%s for correctness",
3687 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3690 /* If stack probes are required, the space used for large function
3691 arguments on the stack must also be probed, so enable
3692 -maccumulate-outgoing-args so this happens in the prologue. */
3693 if (TARGET_STACK_PROBE
3694 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3696 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3697 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3698 "for correctness", prefix, suffix);
3699 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3702 /* For sane SSE instruction set generation we need fcomi instruction.
3703 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3704 expands to a sequence that includes conditional move. */
3705 if (TARGET_SSE || TARGET_RDRND)
3708 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3711 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3712 p = strchr (internal_label_prefix, 'X');
3713 internal_label_prefix_len = p - internal_label_prefix;
3717 /* When scheduling description is not available, disable scheduler pass
3718 so it won't slow down the compilation and make x87 code slower. */
3719 if (!TARGET_SCHEDULE)
3720 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3722 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3723 ix86_cost->simultaneous_prefetches,
3724 global_options.x_param_values,
3725 global_options_set.x_param_values);
3726 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3727 global_options.x_param_values,
3728 global_options_set.x_param_values);
3729 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3730 global_options.x_param_values,
3731 global_options_set.x_param_values);
3732 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3733 global_options.x_param_values,
3734 global_options_set.x_param_values);
3736 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3737 if (flag_prefetch_loop_arrays < 0
3740 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3741 flag_prefetch_loop_arrays = 1;
3743 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3744 can be optimized to ap = __builtin_next_arg (0). */
3745 if (!TARGET_64BIT && !flag_split_stack)
3746 targetm.expand_builtin_va_start = NULL;
3750 ix86_gen_leave = gen_leave_rex64;
3751 ix86_gen_add3 = gen_adddi3;
3752 ix86_gen_sub3 = gen_subdi3;
3753 ix86_gen_sub3_carry = gen_subdi3_carry;
3754 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3755 ix86_gen_monitor = gen_sse3_monitor64;
3756 ix86_gen_andsp = gen_anddi3;
3757 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3758 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3759 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3763 ix86_gen_leave = gen_leave;
3764 ix86_gen_add3 = gen_addsi3;
3765 ix86_gen_sub3 = gen_subsi3;
3766 ix86_gen_sub3_carry = gen_subsi3_carry;
3767 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3768 ix86_gen_monitor = gen_sse3_monitor;
3769 ix86_gen_andsp = gen_andsi3;
3770 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3771 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3772 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3776 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3778 target_flags |= MASK_CLD & ~target_flags_explicit;
3781 if (!TARGET_64BIT && flag_pic)
3783 if (flag_fentry > 0)
3784 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3788 else if (TARGET_SEH)
3790 if (flag_fentry == 0)
3791 sorry ("-mno-fentry isn%'t compatible with SEH");
3794 else if (flag_fentry < 0)
3796 #if defined(PROFILE_BEFORE_PROLOGUE)
3805 /* When not optimize for size, enable vzeroupper optimization for
3806 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3807 AVX unaligned load/store. */
3810 if (flag_expensive_optimizations
3811 && !(target_flags_explicit & MASK_VZEROUPPER))
3812 target_flags |= MASK_VZEROUPPER;
3813 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3814 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3815 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3816 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3817 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3818 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3819 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3820 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
3821 target_flags |= MASK_PREFER_AVX128;
3826 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3827 target_flags &= ~MASK_VZEROUPPER;
3830 if (ix86_recip_name)
3832 char *p = ASTRDUP (ix86_recip_name);
3834 unsigned int mask, i;
3837 while ((q = strtok (p, ",")) != NULL)
3848 if (!strcmp (q, "default"))
3849 mask = RECIP_MASK_ALL;
3852 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
3853 if (!strcmp (q, recip_options[i].string))
3855 mask = recip_options[i].mask;
3859 if (i == ARRAY_SIZE (recip_options))
3861 error ("unknown option for -mrecip=%s", q);
3863 mask = RECIP_MASK_NONE;
3867 recip_mask_explicit |= mask;
3869 recip_mask &= ~mask;
3876 recip_mask |= RECIP_MASK_ALL & ~recip_mask_explicit;
3877 else if (target_flags_explicit & MASK_RECIP)
3878 recip_mask &= ~(RECIP_MASK_ALL & ~recip_mask_explicit);
3880 /* Save the initial options in case the user does function specific
3883 target_option_default_node = target_option_current_node
3884 = build_target_option_node ();
3887 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3890 function_pass_avx256_p (const_rtx val)
3895 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3898 if (GET_CODE (val) == PARALLEL)
3903 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3905 r = XVECEXP (val, 0, i);
3906 if (GET_CODE (r) == EXPR_LIST
3908 && REG_P (XEXP (r, 0))
3909 && (GET_MODE (XEXP (r, 0)) == OImode
3910 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3918 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3921 ix86_option_override (void)
3923 ix86_option_override_internal (true);
3926 /* Update register usage after having seen the compiler flags. */
3929 ix86_conditional_register_usage (void)
3934 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3936 if (fixed_regs[i] > 1)
3937 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3938 if (call_used_regs[i] > 1)
3939 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3942 /* The PIC register, if it exists, is fixed. */
3943 j = PIC_OFFSET_TABLE_REGNUM;
3944 if (j != INVALID_REGNUM)
3945 fixed_regs[j] = call_used_regs[j] = 1;
3947 /* The 64-bit MS_ABI changes the set of call-used registers. */
3948 if (TARGET_64BIT_MS_ABI)
3950 call_used_regs[SI_REG] = 0;
3951 call_used_regs[DI_REG] = 0;
3952 call_used_regs[XMM6_REG] = 0;
3953 call_used_regs[XMM7_REG] = 0;
3954 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3955 call_used_regs[i] = 0;
3958 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3959 other call-clobbered regs for 64-bit. */
3962 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3964 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3965 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3966 && call_used_regs[i])
3967 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3970 /* If MMX is disabled, squash the registers. */
3972 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3973 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3974 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3976 /* If SSE is disabled, squash the registers. */
3978 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3979 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3980 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3982 /* If the FPU is disabled, squash the registers. */
3983 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3984 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3985 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3986 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3988 /* If 32-bit, squash the 64-bit registers. */
3991 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3993 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3999 /* Save the current options */
4002 ix86_function_specific_save (struct cl_target_option *ptr)
4004 ptr->arch = ix86_arch;
4005 ptr->schedule = ix86_schedule;
4006 ptr->tune = ix86_tune;
4007 ptr->branch_cost = ix86_branch_cost;
4008 ptr->tune_defaulted = ix86_tune_defaulted;
4009 ptr->arch_specified = ix86_arch_specified;
4010 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4011 ptr->ix86_target_flags_explicit = target_flags_explicit;
4012 ptr->x_recip_mask_explicit = recip_mask_explicit;
4014 /* The fields are char but the variables are not; make sure the
4015 values fit in the fields. */
4016 gcc_assert (ptr->arch == ix86_arch);
4017 gcc_assert (ptr->schedule == ix86_schedule);
4018 gcc_assert (ptr->tune == ix86_tune);
4019 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4022 /* Restore the current options */
4025 ix86_function_specific_restore (struct cl_target_option *ptr)
4027 enum processor_type old_tune = ix86_tune;
4028 enum processor_type old_arch = ix86_arch;
4029 unsigned int ix86_arch_mask, ix86_tune_mask;
4032 ix86_arch = (enum processor_type) ptr->arch;
4033 ix86_schedule = (enum attr_cpu) ptr->schedule;
4034 ix86_tune = (enum processor_type) ptr->tune;
4035 ix86_branch_cost = ptr->branch_cost;
4036 ix86_tune_defaulted = ptr->tune_defaulted;
4037 ix86_arch_specified = ptr->arch_specified;
4038 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4039 target_flags_explicit = ptr->ix86_target_flags_explicit;
4040 recip_mask_explicit = ptr->x_recip_mask_explicit;
4042 /* Recreate the arch feature tests if the arch changed */
4043 if (old_arch != ix86_arch)
4045 ix86_arch_mask = 1u << ix86_arch;
4046 for (i = 0; i < X86_ARCH_LAST; ++i)
4047 ix86_arch_features[i]
4048 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4051 /* Recreate the tune optimization tests */
4052 if (old_tune != ix86_tune)
4054 ix86_tune_mask = 1u << ix86_tune;
4055 for (i = 0; i < X86_TUNE_LAST; ++i)
4056 ix86_tune_features[i]
4057 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4061 /* Print the current options */
4064 ix86_function_specific_print (FILE *file, int indent,
4065 struct cl_target_option *ptr)
4068 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4069 NULL, NULL, ptr->x_ix86_fpmath, false);
4071 fprintf (file, "%*sarch = %d (%s)\n",
4074 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4075 ? cpu_names[ptr->arch]
4078 fprintf (file, "%*stune = %d (%s)\n",
4081 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4082 ? cpu_names[ptr->tune]
4085 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4089 fprintf (file, "%*s%s\n", indent, "", target_string);
4090 free (target_string);
4095 /* Inner function to process the attribute((target(...))), take an argument and
4096 set the current options from the argument. If we have a list, recursively go
4100 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4101 struct gcc_options *enum_opts_set)
4106 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4107 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4108 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4109 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4110 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4126 enum ix86_opt_type type;
4131 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4132 IX86_ATTR_ISA ("abm", OPT_mabm),
4133 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4134 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4135 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4136 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4137 IX86_ATTR_ISA ("aes", OPT_maes),
4138 IX86_ATTR_ISA ("avx", OPT_mavx),
4139 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4140 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4141 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4142 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4143 IX86_ATTR_ISA ("sse", OPT_msse),
4144 IX86_ATTR_ISA ("sse2", OPT_msse2),
4145 IX86_ATTR_ISA ("sse3", OPT_msse3),
4146 IX86_ATTR_ISA ("sse4", OPT_msse4),
4147 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4148 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4149 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4150 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4151 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4152 IX86_ATTR_ISA ("fma", OPT_mfma),
4153 IX86_ATTR_ISA ("xop", OPT_mxop),
4154 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4155 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4156 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4157 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4160 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4162 /* string options */
4163 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4164 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4167 IX86_ATTR_YES ("cld",
4171 IX86_ATTR_NO ("fancy-math-387",
4172 OPT_mfancy_math_387,
4173 MASK_NO_FANCY_MATH_387),
4175 IX86_ATTR_YES ("ieee-fp",
4179 IX86_ATTR_YES ("inline-all-stringops",
4180 OPT_minline_all_stringops,
4181 MASK_INLINE_ALL_STRINGOPS),
4183 IX86_ATTR_YES ("inline-stringops-dynamically",
4184 OPT_minline_stringops_dynamically,
4185 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4187 IX86_ATTR_NO ("align-stringops",
4188 OPT_mno_align_stringops,
4189 MASK_NO_ALIGN_STRINGOPS),
4191 IX86_ATTR_YES ("recip",
4197 /* If this is a list, recurse to get the options. */
4198 if (TREE_CODE (args) == TREE_LIST)
4202 for (; args; args = TREE_CHAIN (args))
4203 if (TREE_VALUE (args)
4204 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4205 p_strings, enum_opts_set))
4211 else if (TREE_CODE (args) != STRING_CST)
4214 /* Handle multiple arguments separated by commas. */
4215 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4217 while (next_optstr && *next_optstr != '\0')
4219 char *p = next_optstr;
4221 char *comma = strchr (next_optstr, ',');
4222 const char *opt_string;
4223 size_t len, opt_len;
4228 enum ix86_opt_type type = ix86_opt_unknown;
4234 len = comma - next_optstr;
4235 next_optstr = comma + 1;
4243 /* Recognize no-xxx. */
4244 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4253 /* Find the option. */
4256 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4258 type = attrs[i].type;
4259 opt_len = attrs[i].len;
4260 if (ch == attrs[i].string[0]
4261 && ((type != ix86_opt_str && type != ix86_opt_enum)
4264 && memcmp (p, attrs[i].string, opt_len) == 0)
4267 mask = attrs[i].mask;
4268 opt_string = attrs[i].string;
4273 /* Process the option. */
4276 error ("attribute(target(\"%s\")) is unknown", orig_p);
4280 else if (type == ix86_opt_isa)
4282 struct cl_decoded_option decoded;
4284 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4285 ix86_handle_option (&global_options, &global_options_set,
4286 &decoded, input_location);
4289 else if (type == ix86_opt_yes || type == ix86_opt_no)
4291 if (type == ix86_opt_no)
4292 opt_set_p = !opt_set_p;
4295 target_flags |= mask;
4297 target_flags &= ~mask;
4300 else if (type == ix86_opt_str)
4304 error ("option(\"%s\") was already specified", opt_string);
4308 p_strings[opt] = xstrdup (p + opt_len);
4311 else if (type == ix86_opt_enum)
4316 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4318 set_option (&global_options, enum_opts_set, opt, value,
4319 p + opt_len, DK_UNSPECIFIED, input_location,
4323 error ("attribute(target(\"%s\")) is unknown", orig_p);
4335 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4338 ix86_valid_target_attribute_tree (tree args)
4340 const char *orig_arch_string = ix86_arch_string;
4341 const char *orig_tune_string = ix86_tune_string;
4342 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4343 int orig_tune_defaulted = ix86_tune_defaulted;
4344 int orig_arch_specified = ix86_arch_specified;
4345 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4348 struct cl_target_option *def
4349 = TREE_TARGET_OPTION (target_option_default_node);
4350 struct gcc_options enum_opts_set;
4352 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4354 /* Process each of the options on the chain. */
4355 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4359 /* If the changed options are different from the default, rerun
4360 ix86_option_override_internal, and then save the options away.
4361 The string options are are attribute options, and will be undone
4362 when we copy the save structure. */
4363 if (ix86_isa_flags != def->x_ix86_isa_flags
4364 || target_flags != def->x_target_flags
4365 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4366 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4367 || enum_opts_set.x_ix86_fpmath)
4369 /* If we are using the default tune= or arch=, undo the string assigned,
4370 and use the default. */
4371 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4372 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4373 else if (!orig_arch_specified)
4374 ix86_arch_string = NULL;
4376 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4377 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4378 else if (orig_tune_defaulted)
4379 ix86_tune_string = NULL;
4381 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4382 if (enum_opts_set.x_ix86_fpmath)
4383 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4384 else if (!TARGET_64BIT && TARGET_SSE)
4386 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4387 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4390 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4391 ix86_option_override_internal (false);
4393 /* Add any builtin functions with the new isa if any. */
4394 ix86_add_new_builtins (ix86_isa_flags);
4396 /* Save the current options unless we are validating options for
4398 t = build_target_option_node ();
4400 ix86_arch_string = orig_arch_string;
4401 ix86_tune_string = orig_tune_string;
4402 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4404 /* Free up memory allocated to hold the strings */
4405 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4406 free (option_strings[i]);
4412 /* Hook to validate attribute((target("string"))). */
4415 ix86_valid_target_attribute_p (tree fndecl,
4416 tree ARG_UNUSED (name),
4418 int ARG_UNUSED (flags))
4420 struct cl_target_option cur_target;
4422 tree old_optimize = build_optimization_node ();
4423 tree new_target, new_optimize;
4424 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4426 /* If the function changed the optimization levels as well as setting target
4427 options, start with the optimizations specified. */
4428 if (func_optimize && func_optimize != old_optimize)
4429 cl_optimization_restore (&global_options,
4430 TREE_OPTIMIZATION (func_optimize));
4432 /* The target attributes may also change some optimization flags, so update
4433 the optimization options if necessary. */
4434 cl_target_option_save (&cur_target, &global_options);
4435 new_target = ix86_valid_target_attribute_tree (args);
4436 new_optimize = build_optimization_node ();
4443 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4445 if (old_optimize != new_optimize)
4446 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4449 cl_target_option_restore (&global_options, &cur_target);
4451 if (old_optimize != new_optimize)
4452 cl_optimization_restore (&global_options,
4453 TREE_OPTIMIZATION (old_optimize));
4459 /* Hook to determine if one function can safely inline another. */
4462 ix86_can_inline_p (tree caller, tree callee)
4465 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4466 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4468 /* If callee has no option attributes, then it is ok to inline. */
4472 /* If caller has no option attributes, but callee does then it is not ok to
4474 else if (!caller_tree)
4479 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4480 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4482 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4483 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4485 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4486 != callee_opts->x_ix86_isa_flags)
4489 /* See if we have the same non-isa options. */
4490 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4493 /* See if arch, tune, etc. are the same. */
4494 else if (caller_opts->arch != callee_opts->arch)
4497 else if (caller_opts->tune != callee_opts->tune)
4500 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4503 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4514 /* Remember the last target of ix86_set_current_function. */
4515 static GTY(()) tree ix86_previous_fndecl;
4517 /* Establish appropriate back-end context for processing the function
4518 FNDECL. The argument might be NULL to indicate processing at top
4519 level, outside of any function scope. */
4521 ix86_set_current_function (tree fndecl)
4523 /* Only change the context if the function changes. This hook is called
4524 several times in the course of compiling a function, and we don't want to
4525 slow things down too much or call target_reinit when it isn't safe. */
4526 if (fndecl && fndecl != ix86_previous_fndecl)
4528 tree old_tree = (ix86_previous_fndecl
4529 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4532 tree new_tree = (fndecl
4533 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4536 ix86_previous_fndecl = fndecl;
4537 if (old_tree == new_tree)
4542 cl_target_option_restore (&global_options,
4543 TREE_TARGET_OPTION (new_tree));
4549 struct cl_target_option *def
4550 = TREE_TARGET_OPTION (target_option_current_node);
4552 cl_target_option_restore (&global_options, def);
4559 /* Return true if this goes in large data/bss. */
4562 ix86_in_large_data_p (tree exp)
4564 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4567 /* Functions are never large data. */
4568 if (TREE_CODE (exp) == FUNCTION_DECL)
4571 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4573 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4574 if (strcmp (section, ".ldata") == 0
4575 || strcmp (section, ".lbss") == 0)
4581 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4583 /* If this is an incomplete type with size 0, then we can't put it
4584 in data because it might be too big when completed. */
4585 if (!size || size > ix86_section_threshold)
4592 /* Switch to the appropriate section for output of DECL.
4593 DECL is either a `VAR_DECL' node or a constant of some sort.
4594 RELOC indicates whether forming the initial value of DECL requires
4595 link-time relocations. */
4597 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4601 x86_64_elf_select_section (tree decl, int reloc,
4602 unsigned HOST_WIDE_INT align)
4604 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4605 && ix86_in_large_data_p (decl))
4607 const char *sname = NULL;
4608 unsigned int flags = SECTION_WRITE;
4609 switch (categorize_decl_for_section (decl, reloc))
4614 case SECCAT_DATA_REL:
4615 sname = ".ldata.rel";
4617 case SECCAT_DATA_REL_LOCAL:
4618 sname = ".ldata.rel.local";
4620 case SECCAT_DATA_REL_RO:
4621 sname = ".ldata.rel.ro";
4623 case SECCAT_DATA_REL_RO_LOCAL:
4624 sname = ".ldata.rel.ro.local";
4628 flags |= SECTION_BSS;
4631 case SECCAT_RODATA_MERGE_STR:
4632 case SECCAT_RODATA_MERGE_STR_INIT:
4633 case SECCAT_RODATA_MERGE_CONST:
4637 case SECCAT_SRODATA:
4644 /* We don't split these for medium model. Place them into
4645 default sections and hope for best. */
4650 /* We might get called with string constants, but get_named_section
4651 doesn't like them as they are not DECLs. Also, we need to set
4652 flags in that case. */
4654 return get_section (sname, flags, NULL);
4655 return get_named_section (decl, sname, reloc);
4658 return default_elf_select_section (decl, reloc, align);
4661 /* Build up a unique section name, expressed as a
4662 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4663 RELOC indicates whether the initial value of EXP requires
4664 link-time relocations. */
4666 static void ATTRIBUTE_UNUSED
4667 x86_64_elf_unique_section (tree decl, int reloc)
4669 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4670 && ix86_in_large_data_p (decl))
4672 const char *prefix = NULL;
4673 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4674 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4676 switch (categorize_decl_for_section (decl, reloc))
4679 case SECCAT_DATA_REL:
4680 case SECCAT_DATA_REL_LOCAL:
4681 case SECCAT_DATA_REL_RO:
4682 case SECCAT_DATA_REL_RO_LOCAL:
4683 prefix = one_only ? ".ld" : ".ldata";
4686 prefix = one_only ? ".lb" : ".lbss";
4689 case SECCAT_RODATA_MERGE_STR:
4690 case SECCAT_RODATA_MERGE_STR_INIT:
4691 case SECCAT_RODATA_MERGE_CONST:
4692 prefix = one_only ? ".lr" : ".lrodata";
4694 case SECCAT_SRODATA:
4701 /* We don't split these for medium model. Place them into
4702 default sections and hope for best. */
4707 const char *name, *linkonce;
4710 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4711 name = targetm.strip_name_encoding (name);
4713 /* If we're using one_only, then there needs to be a .gnu.linkonce
4714 prefix to the section name. */
4715 linkonce = one_only ? ".gnu.linkonce" : "";
4717 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4719 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4723 default_unique_section (decl, reloc);
4726 #ifdef COMMON_ASM_OP
4727 /* This says how to output assembler code to declare an
4728 uninitialized external linkage data object.
4730 For medium model x86-64 we need to use .largecomm opcode for
4733 x86_elf_aligned_common (FILE *file,
4734 const char *name, unsigned HOST_WIDE_INT size,
4737 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4738 && size > (unsigned int)ix86_section_threshold)
4739 fputs (".largecomm\t", file);
4741 fputs (COMMON_ASM_OP, file);
4742 assemble_name (file, name);
4743 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4744 size, align / BITS_PER_UNIT);
4748 /* Utility function for targets to use in implementing
4749 ASM_OUTPUT_ALIGNED_BSS. */
4752 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4753 const char *name, unsigned HOST_WIDE_INT size,
4756 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4757 && size > (unsigned int)ix86_section_threshold)
4758 switch_to_section (get_named_section (decl, ".lbss", 0));
4760 switch_to_section (bss_section);
4761 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4762 #ifdef ASM_DECLARE_OBJECT_NAME
4763 last_assemble_variable_decl = decl;
4764 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4766 /* Standard thing is just output label for the object. */
4767 ASM_OUTPUT_LABEL (file, name);
4768 #endif /* ASM_DECLARE_OBJECT_NAME */
4769 ASM_OUTPUT_SKIP (file, size ? size : 1);
4772 /* Decide whether we must probe the stack before any space allocation
4773 on this target. It's essentially TARGET_STACK_PROBE except when
4774 -fstack-check causes the stack to be already probed differently. */
4777 ix86_target_stack_probe (void)
4779 /* Do not probe the stack twice if static stack checking is enabled. */
4780 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4783 return TARGET_STACK_PROBE;
4786 /* Decide whether we can make a sibling call to a function. DECL is the
4787 declaration of the function being targeted by the call and EXP is the
4788 CALL_EXPR representing the call. */
4791 ix86_function_ok_for_sibcall (tree decl, tree exp)
4793 tree type, decl_or_type;
4796 /* If we are generating position-independent code, we cannot sibcall
4797 optimize any indirect call, or a direct call to a global function,
4798 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4802 && (!decl || !targetm.binds_local_p (decl)))
4805 /* If we need to align the outgoing stack, then sibcalling would
4806 unalign the stack, which may break the called function. */
4807 if (ix86_minimum_incoming_stack_boundary (true)
4808 < PREFERRED_STACK_BOUNDARY)
4813 decl_or_type = decl;
4814 type = TREE_TYPE (decl);
4818 /* We're looking at the CALL_EXPR, we need the type of the function. */
4819 type = CALL_EXPR_FN (exp); /* pointer expression */
4820 type = TREE_TYPE (type); /* pointer type */
4821 type = TREE_TYPE (type); /* function type */
4822 decl_or_type = type;
4825 /* Check that the return value locations are the same. Like
4826 if we are returning floats on the 80387 register stack, we cannot
4827 make a sibcall from a function that doesn't return a float to a
4828 function that does or, conversely, from a function that does return
4829 a float to a function that doesn't; the necessary stack adjustment
4830 would not be executed. This is also the place we notice
4831 differences in the return value ABI. Note that it is ok for one
4832 of the functions to have void return type as long as the return
4833 value of the other is passed in a register. */
4834 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4835 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4837 if (STACK_REG_P (a) || STACK_REG_P (b))
4839 if (!rtx_equal_p (a, b))
4842 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4844 /* Disable sibcall if we need to generate vzeroupper after
4846 if (TARGET_VZEROUPPER
4847 && cfun->machine->callee_return_avx256_p
4848 && !cfun->machine->caller_return_avx256_p)
4851 else if (!rtx_equal_p (a, b))
4856 /* The SYSV ABI has more call-clobbered registers;
4857 disallow sibcalls from MS to SYSV. */
4858 if (cfun->machine->call_abi == MS_ABI
4859 && ix86_function_type_abi (type) == SYSV_ABI)
4864 /* If this call is indirect, we'll need to be able to use a
4865 call-clobbered register for the address of the target function.
4866 Make sure that all such registers are not used for passing
4867 parameters. Note that DLLIMPORT functions are indirect. */
4869 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4871 if (ix86_function_regparm (type, NULL) >= 3)
4873 /* ??? Need to count the actual number of registers to be used,
4874 not the possible number of registers. Fix later. */
4880 /* Otherwise okay. That also includes certain types of indirect calls. */
4884 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4885 and "sseregparm" calling convention attributes;
4886 arguments as in struct attribute_spec.handler. */
4889 ix86_handle_cconv_attribute (tree *node, tree name,
4891 int flags ATTRIBUTE_UNUSED,
4894 if (TREE_CODE (*node) != FUNCTION_TYPE
4895 && TREE_CODE (*node) != METHOD_TYPE
4896 && TREE_CODE (*node) != FIELD_DECL
4897 && TREE_CODE (*node) != TYPE_DECL)
4899 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4901 *no_add_attrs = true;
4905 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4906 if (is_attribute_p ("regparm", name))
4910 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4912 error ("fastcall and regparm attributes are not compatible");
4915 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4917 error ("regparam and thiscall attributes are not compatible");
4920 cst = TREE_VALUE (args);
4921 if (TREE_CODE (cst) != INTEGER_CST)
4923 warning (OPT_Wattributes,
4924 "%qE attribute requires an integer constant argument",
4926 *no_add_attrs = true;
4928 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4930 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4932 *no_add_attrs = true;
4940 /* Do not warn when emulating the MS ABI. */
4941 if ((TREE_CODE (*node) != FUNCTION_TYPE
4942 && TREE_CODE (*node) != METHOD_TYPE)
4943 || ix86_function_type_abi (*node) != MS_ABI)
4944 warning (OPT_Wattributes, "%qE attribute ignored",
4946 *no_add_attrs = true;
4950 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4951 if (is_attribute_p ("fastcall", name))
4953 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4955 error ("fastcall and cdecl attributes are not compatible");
4957 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4959 error ("fastcall and stdcall attributes are not compatible");
4961 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4963 error ("fastcall and regparm attributes are not compatible");
4965 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4967 error ("fastcall and thiscall attributes are not compatible");
4971 /* Can combine stdcall with fastcall (redundant), regparm and
4973 else if (is_attribute_p ("stdcall", name))
4975 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4977 error ("stdcall and cdecl attributes are not compatible");
4979 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4981 error ("stdcall and fastcall attributes are not compatible");
4983 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4985 error ("stdcall and thiscall attributes are not compatible");
4989 /* Can combine cdecl with regparm and sseregparm. */
4990 else if (is_attribute_p ("cdecl", name))
4992 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4994 error ("stdcall and cdecl attributes are not compatible");
4996 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4998 error ("fastcall and cdecl attributes are not compatible");
5000 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5002 error ("cdecl and thiscall attributes are not compatible");
5005 else if (is_attribute_p ("thiscall", name))
5007 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5008 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5010 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5012 error ("stdcall and thiscall attributes are not compatible");
5014 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5016 error ("fastcall and thiscall attributes are not compatible");
5018 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5020 error ("cdecl and thiscall attributes are not compatible");
5024 /* Can combine sseregparm with all attributes. */
5029 /* The transactional memory builtins are implicitly regparm or fastcall
5030 depending on the ABI. Override the generic do-nothing attribute that
5031 these builtins were declared with, and replace it with one of the two
5032 attributes that we expect elsewhere. */
5035 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5036 tree args ATTRIBUTE_UNUSED,
5037 int flags ATTRIBUTE_UNUSED,
5042 /* In no case do we want to add the placeholder attribute. */
5043 *no_add_attrs = true;
5045 /* The 64-bit ABI is unchanged for transactional memory. */
5049 /* ??? Is there a better way to validate 32-bit windows? We have
5050 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5051 if (CHECK_STACK_LIMIT > 0)
5052 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5055 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5056 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5058 decl_attributes (node, alt, flags);
5063 /* This function determines from TYPE the calling-convention. */
5066 ix86_get_callcvt (const_tree type)
5068 unsigned int ret = 0;
5073 return IX86_CALLCVT_CDECL;
5075 attrs = TYPE_ATTRIBUTES (type);
5076 if (attrs != NULL_TREE)
5078 if (lookup_attribute ("cdecl", attrs))
5079 ret |= IX86_CALLCVT_CDECL;
5080 else if (lookup_attribute ("stdcall", attrs))
5081 ret |= IX86_CALLCVT_STDCALL;
5082 else if (lookup_attribute ("fastcall", attrs))
5083 ret |= IX86_CALLCVT_FASTCALL;
5084 else if (lookup_attribute ("thiscall", attrs))
5085 ret |= IX86_CALLCVT_THISCALL;
5087 /* Regparam isn't allowed for thiscall and fastcall. */
5088 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5090 if (lookup_attribute ("regparm", attrs))
5091 ret |= IX86_CALLCVT_REGPARM;
5092 if (lookup_attribute ("sseregparm", attrs))
5093 ret |= IX86_CALLCVT_SSEREGPARM;
5096 if (IX86_BASE_CALLCVT(ret) != 0)
5100 is_stdarg = stdarg_p (type);
5101 if (TARGET_RTD && !is_stdarg)
5102 return IX86_CALLCVT_STDCALL | ret;
5106 || TREE_CODE (type) != METHOD_TYPE
5107 || ix86_function_type_abi (type) != MS_ABI)
5108 return IX86_CALLCVT_CDECL | ret;
5110 return IX86_CALLCVT_THISCALL;
5113 /* Return 0 if the attributes for two types are incompatible, 1 if they
5114 are compatible, and 2 if they are nearly compatible (which causes a
5115 warning to be generated). */
5118 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5120 unsigned int ccvt1, ccvt2;
5122 if (TREE_CODE (type1) != FUNCTION_TYPE
5123 && TREE_CODE (type1) != METHOD_TYPE)
5126 ccvt1 = ix86_get_callcvt (type1);
5127 ccvt2 = ix86_get_callcvt (type2);
5130 if (ix86_function_regparm (type1, NULL)
5131 != ix86_function_regparm (type2, NULL))
5137 /* Return the regparm value for a function with the indicated TYPE and DECL.
5138 DECL may be NULL when calling function indirectly
5139 or considering a libcall. */
5142 ix86_function_regparm (const_tree type, const_tree decl)
5149 return (ix86_function_type_abi (type) == SYSV_ABI
5150 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5151 ccvt = ix86_get_callcvt (type);
5152 regparm = ix86_regparm;
5154 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5156 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5159 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5163 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5165 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5168 /* Use register calling convention for local functions when possible. */
5170 && TREE_CODE (decl) == FUNCTION_DECL
5172 && !(profile_flag && !flag_fentry))
5174 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5175 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5176 if (i && i->local && i->can_change_signature)
5178 int local_regparm, globals = 0, regno;
5180 /* Make sure no regparm register is taken by a
5181 fixed register variable. */
5182 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5183 if (fixed_regs[local_regparm])
5186 /* We don't want to use regparm(3) for nested functions as
5187 these use a static chain pointer in the third argument. */
5188 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5191 /* In 32-bit mode save a register for the split stack. */
5192 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5195 /* Each fixed register usage increases register pressure,
5196 so less registers should be used for argument passing.
5197 This functionality can be overriden by an explicit
5199 for (regno = 0; regno <= DI_REG; regno++)
5200 if (fixed_regs[regno])
5204 = globals < local_regparm ? local_regparm - globals : 0;
5206 if (local_regparm > regparm)
5207 regparm = local_regparm;
5214 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5215 DFmode (2) arguments in SSE registers for a function with the
5216 indicated TYPE and DECL. DECL may be NULL when calling function
5217 indirectly or considering a libcall. Otherwise return 0. */
5220 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5222 gcc_assert (!TARGET_64BIT);
5224 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5225 by the sseregparm attribute. */
5226 if (TARGET_SSEREGPARM
5227 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5234 error ("calling %qD with attribute sseregparm without "
5235 "SSE/SSE2 enabled", decl);
5237 error ("calling %qT with attribute sseregparm without "
5238 "SSE/SSE2 enabled", type);
5246 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5247 (and DFmode for SSE2) arguments in SSE registers. */
5248 if (decl && TARGET_SSE_MATH && optimize
5249 && !(profile_flag && !flag_fentry))
5251 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5252 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5253 if (i && i->local && i->can_change_signature)
5254 return TARGET_SSE2 ? 2 : 1;
5260 /* Return true if EAX is live at the start of the function. Used by
5261 ix86_expand_prologue to determine if we need special help before
5262 calling allocate_stack_worker. */
5265 ix86_eax_live_at_start_p (void)
5267 /* Cheat. Don't bother working forward from ix86_function_regparm
5268 to the function type to whether an actual argument is located in
5269 eax. Instead just look at cfg info, which is still close enough
5270 to correct at this point. This gives false positives for broken
5271 functions that might use uninitialized data that happens to be
5272 allocated in eax, but who cares? */
5273 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5277 ix86_keep_aggregate_return_pointer (tree fntype)
5283 attr = lookup_attribute ("callee_pop_aggregate_return",
5284 TYPE_ATTRIBUTES (fntype));
5286 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5288 /* For 32-bit MS-ABI the default is to keep aggregate
5290 if (ix86_function_type_abi (fntype) == MS_ABI)
5293 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5296 /* Value is the number of bytes of arguments automatically
5297 popped when returning from a subroutine call.
5298 FUNDECL is the declaration node of the function (as a tree),
5299 FUNTYPE is the data type of the function (as a tree),
5300 or for a library call it is an identifier node for the subroutine name.
5301 SIZE is the number of bytes of arguments passed on the stack.
5303 On the 80386, the RTD insn may be used to pop them if the number
5304 of args is fixed, but if the number is variable then the caller
5305 must pop them all. RTD can't be used for library calls now
5306 because the library is compiled with the Unix compiler.
5307 Use of RTD is a selectable option, since it is incompatible with
5308 standard Unix calling sequences. If the option is not selected,
5309 the caller must always pop the args.
5311 The attribute stdcall is equivalent to RTD on a per module basis. */
5314 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5318 /* None of the 64-bit ABIs pop arguments. */
5322 ccvt = ix86_get_callcvt (funtype);
5324 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5325 | IX86_CALLCVT_THISCALL)) != 0
5326 && ! stdarg_p (funtype))
5329 /* Lose any fake structure return argument if it is passed on the stack. */
5330 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5331 && !ix86_keep_aggregate_return_pointer (funtype))
5333 int nregs = ix86_function_regparm (funtype, fundecl);
5335 return GET_MODE_SIZE (Pmode);
5341 /* Argument support functions. */
5343 /* Return true when register may be used to pass function parameters. */
5345 ix86_function_arg_regno_p (int regno)
5348 const int *parm_regs;
5353 return (regno < REGPARM_MAX
5354 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5356 return (regno < REGPARM_MAX
5357 || (TARGET_MMX && MMX_REGNO_P (regno)
5358 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5359 || (TARGET_SSE && SSE_REGNO_P (regno)
5360 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5365 if (SSE_REGNO_P (regno) && TARGET_SSE)
5370 if (TARGET_SSE && SSE_REGNO_P (regno)
5371 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5375 /* TODO: The function should depend on current function ABI but
5376 builtins.c would need updating then. Therefore we use the
5379 /* RAX is used as hidden argument to va_arg functions. */
5380 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5383 if (ix86_abi == MS_ABI)
5384 parm_regs = x86_64_ms_abi_int_parameter_registers;
5386 parm_regs = x86_64_int_parameter_registers;
5387 for (i = 0; i < (ix86_abi == MS_ABI
5388 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5389 if (regno == parm_regs[i])
5394 /* Return if we do not know how to pass TYPE solely in registers. */
5397 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5399 if (must_pass_in_stack_var_size_or_pad (mode, type))
5402 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5403 The layout_type routine is crafty and tries to trick us into passing
5404 currently unsupported vector types on the stack by using TImode. */
5405 return (!TARGET_64BIT && mode == TImode
5406 && type && TREE_CODE (type) != VECTOR_TYPE);
5409 /* It returns the size, in bytes, of the area reserved for arguments passed
5410 in registers for the function represented by fndecl dependent to the used
5413 ix86_reg_parm_stack_space (const_tree fndecl)
5415 enum calling_abi call_abi = SYSV_ABI;
5416 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5417 call_abi = ix86_function_abi (fndecl);
5419 call_abi = ix86_function_type_abi (fndecl);
5420 if (TARGET_64BIT && call_abi == MS_ABI)
5425 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5428 ix86_function_type_abi (const_tree fntype)
5430 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5432 enum calling_abi abi = ix86_abi;
5433 if (abi == SYSV_ABI)
5435 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5438 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5446 ix86_function_ms_hook_prologue (const_tree fn)
5448 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5450 if (decl_function_context (fn) != NULL_TREE)
5451 error_at (DECL_SOURCE_LOCATION (fn),
5452 "ms_hook_prologue is not compatible with nested function");
5459 static enum calling_abi
5460 ix86_function_abi (const_tree fndecl)
5464 return ix86_function_type_abi (TREE_TYPE (fndecl));
5467 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5470 ix86_cfun_abi (void)
5474 return cfun->machine->call_abi;
5477 /* Write the extra assembler code needed to declare a function properly. */
5480 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5483 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5487 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5488 unsigned int filler_cc = 0xcccccccc;
5490 for (i = 0; i < filler_count; i += 4)
5491 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5494 #ifdef SUBTARGET_ASM_UNWIND_INIT
5495 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5498 ASM_OUTPUT_LABEL (asm_out_file, fname);
5500 /* Output magic byte marker, if hot-patch attribute is set. */
5505 /* leaq [%rsp + 0], %rsp */
5506 asm_fprintf (asm_out_file, ASM_BYTE
5507 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5511 /* movl.s %edi, %edi
5513 movl.s %esp, %ebp */
5514 asm_fprintf (asm_out_file, ASM_BYTE
5515 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5521 extern void init_regs (void);
5523 /* Implementation of call abi switching target hook. Specific to FNDECL
5524 the specific call register sets are set. See also
5525 ix86_conditional_register_usage for more details. */
5527 ix86_call_abi_override (const_tree fndecl)
5529 if (fndecl == NULL_TREE)
5530 cfun->machine->call_abi = ix86_abi;
5532 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5535 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5536 expensive re-initialization of init_regs each time we switch function context
5537 since this is needed only during RTL expansion. */
5539 ix86_maybe_switch_abi (void)
5542 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5546 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5547 for a call to a function whose data type is FNTYPE.
5548 For a library call, FNTYPE is 0. */
5551 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5552 tree fntype, /* tree ptr for function decl */
5553 rtx libname, /* SYMBOL_REF of library name or 0 */
5557 struct cgraph_local_info *i;
5560 memset (cum, 0, sizeof (*cum));
5562 /* Initialize for the current callee. */
5565 cfun->machine->callee_pass_avx256_p = false;
5566 cfun->machine->callee_return_avx256_p = false;
5571 i = cgraph_local_info (fndecl);
5572 cum->call_abi = ix86_function_abi (fndecl);
5573 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5578 cum->call_abi = ix86_function_type_abi (fntype);
5580 fnret_type = TREE_TYPE (fntype);
5585 if (TARGET_VZEROUPPER && fnret_type)
5587 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5589 if (function_pass_avx256_p (fnret_value))
5591 /* The return value of this function uses 256bit AVX modes. */
5593 cfun->machine->callee_return_avx256_p = true;
5595 cfun->machine->caller_return_avx256_p = true;
5599 cum->caller = caller;
5601 /* Set up the number of registers to use for passing arguments. */
5603 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5604 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5605 "or subtarget optimization implying it");
5606 cum->nregs = ix86_regparm;
5609 cum->nregs = (cum->call_abi == SYSV_ABI
5610 ? X86_64_REGPARM_MAX
5611 : X86_64_MS_REGPARM_MAX);
5615 cum->sse_nregs = SSE_REGPARM_MAX;
5618 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5619 ? X86_64_SSE_REGPARM_MAX
5620 : X86_64_MS_SSE_REGPARM_MAX);
5624 cum->mmx_nregs = MMX_REGPARM_MAX;
5625 cum->warn_avx = true;
5626 cum->warn_sse = true;
5627 cum->warn_mmx = true;
5629 /* Because type might mismatch in between caller and callee, we need to
5630 use actual type of function for local calls.
5631 FIXME: cgraph_analyze can be told to actually record if function uses
5632 va_start so for local functions maybe_vaarg can be made aggressive
5634 FIXME: once typesytem is fixed, we won't need this code anymore. */
5635 if (i && i->local && i->can_change_signature)
5636 fntype = TREE_TYPE (fndecl);
5637 cum->maybe_vaarg = (fntype
5638 ? (!prototype_p (fntype) || stdarg_p (fntype))
5643 /* If there are variable arguments, then we won't pass anything
5644 in registers in 32-bit mode. */
5645 if (stdarg_p (fntype))
5656 /* Use ecx and edx registers if function has fastcall attribute,
5657 else look for regparm information. */
5660 unsigned int ccvt = ix86_get_callcvt (fntype);
5661 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5664 cum->fastcall = 1; /* Same first register as in fastcall. */
5666 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5672 cum->nregs = ix86_function_regparm (fntype, fndecl);
5675 /* Set up the number of SSE registers used for passing SFmode
5676 and DFmode arguments. Warn for mismatching ABI. */
5677 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5681 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5682 But in the case of vector types, it is some vector mode.
5684 When we have only some of our vector isa extensions enabled, then there
5685 are some modes for which vector_mode_supported_p is false. For these
5686 modes, the generic vector support in gcc will choose some non-vector mode
5687 in order to implement the type. By computing the natural mode, we'll
5688 select the proper ABI location for the operand and not depend on whatever
5689 the middle-end decides to do with these vector types.
5691 The midde-end can't deal with the vector types > 16 bytes. In this
5692 case, we return the original mode and warn ABI change if CUM isn't
5695 static enum machine_mode
5696 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5698 enum machine_mode mode = TYPE_MODE (type);
5700 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5702 HOST_WIDE_INT size = int_size_in_bytes (type);
5703 if ((size == 8 || size == 16 || size == 32)
5704 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5705 && TYPE_VECTOR_SUBPARTS (type) > 1)
5707 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5709 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5710 mode = MIN_MODE_VECTOR_FLOAT;
5712 mode = MIN_MODE_VECTOR_INT;
5714 /* Get the mode which has this inner mode and number of units. */
5715 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5716 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5717 && GET_MODE_INNER (mode) == innermode)
5719 if (size == 32 && !TARGET_AVX)
5721 static bool warnedavx;
5728 warning (0, "AVX vector argument without AVX "
5729 "enabled changes the ABI");
5731 return TYPE_MODE (type);
5744 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5745 this may not agree with the mode that the type system has chosen for the
5746 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5747 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5750 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5755 if (orig_mode != BLKmode)
5756 tmp = gen_rtx_REG (orig_mode, regno);
5759 tmp = gen_rtx_REG (mode, regno);
5760 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5761 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5767 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5768 of this code is to classify each 8bytes of incoming argument by the register
5769 class and assign registers accordingly. */
5771 /* Return the union class of CLASS1 and CLASS2.
5772 See the x86-64 PS ABI for details. */
5774 static enum x86_64_reg_class
5775 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5777 /* Rule #1: If both classes are equal, this is the resulting class. */
5778 if (class1 == class2)
5781 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5783 if (class1 == X86_64_NO_CLASS)
5785 if (class2 == X86_64_NO_CLASS)
5788 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5789 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5790 return X86_64_MEMORY_CLASS;
5792 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5793 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5794 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5795 return X86_64_INTEGERSI_CLASS;
5796 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5797 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5798 return X86_64_INTEGER_CLASS;
5800 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5802 if (class1 == X86_64_X87_CLASS
5803 || class1 == X86_64_X87UP_CLASS
5804 || class1 == X86_64_COMPLEX_X87_CLASS
5805 || class2 == X86_64_X87_CLASS
5806 || class2 == X86_64_X87UP_CLASS
5807 || class2 == X86_64_COMPLEX_X87_CLASS)
5808 return X86_64_MEMORY_CLASS;
5810 /* Rule #6: Otherwise class SSE is used. */
5811 return X86_64_SSE_CLASS;
5814 /* Classify the argument of type TYPE and mode MODE.
5815 CLASSES will be filled by the register class used to pass each word
5816 of the operand. The number of words is returned. In case the parameter
5817 should be passed in memory, 0 is returned. As a special case for zero
5818 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5820 BIT_OFFSET is used internally for handling records and specifies offset
5821 of the offset in bits modulo 256 to avoid overflow cases.
5823 See the x86-64 PS ABI for details.
5827 classify_argument (enum machine_mode mode, const_tree type,
5828 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5830 HOST_WIDE_INT bytes =
5831 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5832 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5834 /* Variable sized entities are always passed/returned in memory. */
5838 if (mode != VOIDmode
5839 && targetm.calls.must_pass_in_stack (mode, type))
5842 if (type && AGGREGATE_TYPE_P (type))
5846 enum x86_64_reg_class subclasses[MAX_CLASSES];
5848 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5852 for (i = 0; i < words; i++)
5853 classes[i] = X86_64_NO_CLASS;
5855 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5856 signalize memory class, so handle it as special case. */
5859 classes[0] = X86_64_NO_CLASS;
5863 /* Classify each field of record and merge classes. */
5864 switch (TREE_CODE (type))
5867 /* And now merge the fields of structure. */
5868 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5870 if (TREE_CODE (field) == FIELD_DECL)
5874 if (TREE_TYPE (field) == error_mark_node)
5877 /* Bitfields are always classified as integer. Handle them
5878 early, since later code would consider them to be
5879 misaligned integers. */
5880 if (DECL_BIT_FIELD (field))
5882 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5883 i < ((int_bit_position (field) + (bit_offset % 64))
5884 + tree_low_cst (DECL_SIZE (field), 0)
5887 merge_classes (X86_64_INTEGER_CLASS,
5894 type = TREE_TYPE (field);
5896 /* Flexible array member is ignored. */
5897 if (TYPE_MODE (type) == BLKmode
5898 && TREE_CODE (type) == ARRAY_TYPE
5899 && TYPE_SIZE (type) == NULL_TREE
5900 && TYPE_DOMAIN (type) != NULL_TREE
5901 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5906 if (!warned && warn_psabi)
5909 inform (input_location,
5910 "the ABI of passing struct with"
5911 " a flexible array member has"
5912 " changed in GCC 4.4");
5916 num = classify_argument (TYPE_MODE (type), type,
5918 (int_bit_position (field)
5919 + bit_offset) % 256);
5922 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5923 for (i = 0; i < num && (i + pos) < words; i++)
5925 merge_classes (subclasses[i], classes[i + pos]);
5932 /* Arrays are handled as small records. */
5935 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5936 TREE_TYPE (type), subclasses, bit_offset);
5940 /* The partial classes are now full classes. */
5941 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5942 subclasses[0] = X86_64_SSE_CLASS;
5943 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5944 && !((bit_offset % 64) == 0 && bytes == 4))
5945 subclasses[0] = X86_64_INTEGER_CLASS;
5947 for (i = 0; i < words; i++)
5948 classes[i] = subclasses[i % num];
5953 case QUAL_UNION_TYPE:
5954 /* Unions are similar to RECORD_TYPE but offset is always 0.
5956 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5958 if (TREE_CODE (field) == FIELD_DECL)
5962 if (TREE_TYPE (field) == error_mark_node)
5965 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5966 TREE_TYPE (field), subclasses,
5970 for (i = 0; i < num; i++)
5971 classes[i] = merge_classes (subclasses[i], classes[i]);
5982 /* When size > 16 bytes, if the first one isn't
5983 X86_64_SSE_CLASS or any other ones aren't
5984 X86_64_SSEUP_CLASS, everything should be passed in
5986 if (classes[0] != X86_64_SSE_CLASS)
5989 for (i = 1; i < words; i++)
5990 if (classes[i] != X86_64_SSEUP_CLASS)
5994 /* Final merger cleanup. */
5995 for (i = 0; i < words; i++)
5997 /* If one class is MEMORY, everything should be passed in
5999 if (classes[i] == X86_64_MEMORY_CLASS)
6002 /* The X86_64_SSEUP_CLASS should be always preceded by
6003 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6004 if (classes[i] == X86_64_SSEUP_CLASS
6005 && classes[i - 1] != X86_64_SSE_CLASS
6006 && classes[i - 1] != X86_64_SSEUP_CLASS)
6008 /* The first one should never be X86_64_SSEUP_CLASS. */
6009 gcc_assert (i != 0);
6010 classes[i] = X86_64_SSE_CLASS;
6013 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6014 everything should be passed in memory. */
6015 if (classes[i] == X86_64_X87UP_CLASS
6016 && (classes[i - 1] != X86_64_X87_CLASS))
6020 /* The first one should never be X86_64_X87UP_CLASS. */
6021 gcc_assert (i != 0);
6022 if (!warned && warn_psabi)
6025 inform (input_location,
6026 "the ABI of passing union with long double"
6027 " has changed in GCC 4.4");
6035 /* Compute alignment needed. We align all types to natural boundaries with
6036 exception of XFmode that is aligned to 64bits. */
6037 if (mode != VOIDmode && mode != BLKmode)
6039 int mode_alignment = GET_MODE_BITSIZE (mode);
6042 mode_alignment = 128;
6043 else if (mode == XCmode)
6044 mode_alignment = 256;
6045 if (COMPLEX_MODE_P (mode))
6046 mode_alignment /= 2;
6047 /* Misaligned fields are always returned in memory. */
6048 if (bit_offset % mode_alignment)
6052 /* for V1xx modes, just use the base mode */
6053 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6054 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6055 mode = GET_MODE_INNER (mode);
6057 /* Classification of atomic types. */
6062 classes[0] = X86_64_SSE_CLASS;
6065 classes[0] = X86_64_SSE_CLASS;
6066 classes[1] = X86_64_SSEUP_CLASS;
6076 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6080 classes[0] = X86_64_INTEGERSI_CLASS;
6083 else if (size <= 64)
6085 classes[0] = X86_64_INTEGER_CLASS;
6088 else if (size <= 64+32)
6090 classes[0] = X86_64_INTEGER_CLASS;
6091 classes[1] = X86_64_INTEGERSI_CLASS;
6094 else if (size <= 64+64)
6096 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6104 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6108 /* OImode shouldn't be used directly. */
6113 if (!(bit_offset % 64))
6114 classes[0] = X86_64_SSESF_CLASS;
6116 classes[0] = X86_64_SSE_CLASS;
6119 classes[0] = X86_64_SSEDF_CLASS;
6122 classes[0] = X86_64_X87_CLASS;
6123 classes[1] = X86_64_X87UP_CLASS;
6126 classes[0] = X86_64_SSE_CLASS;
6127 classes[1] = X86_64_SSEUP_CLASS;
6130 classes[0] = X86_64_SSE_CLASS;
6131 if (!(bit_offset % 64))
6137 if (!warned && warn_psabi)
6140 inform (input_location,
6141 "the ABI of passing structure with complex float"
6142 " member has changed in GCC 4.4");
6144 classes[1] = X86_64_SSESF_CLASS;
6148 classes[0] = X86_64_SSEDF_CLASS;
6149 classes[1] = X86_64_SSEDF_CLASS;
6152 classes[0] = X86_64_COMPLEX_X87_CLASS;
6155 /* This modes is larger than 16 bytes. */
6163 classes[0] = X86_64_SSE_CLASS;
6164 classes[1] = X86_64_SSEUP_CLASS;
6165 classes[2] = X86_64_SSEUP_CLASS;
6166 classes[3] = X86_64_SSEUP_CLASS;
6174 classes[0] = X86_64_SSE_CLASS;
6175 classes[1] = X86_64_SSEUP_CLASS;
6183 classes[0] = X86_64_SSE_CLASS;
6189 gcc_assert (VECTOR_MODE_P (mode));
6194 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6196 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6197 classes[0] = X86_64_INTEGERSI_CLASS;
6199 classes[0] = X86_64_INTEGER_CLASS;
6200 classes[1] = X86_64_INTEGER_CLASS;
6201 return 1 + (bytes > 8);
6205 /* Examine the argument and return set number of register required in each
6206 class. Return 0 iff parameter should be passed in memory. */
6208 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6209 int *int_nregs, int *sse_nregs)
6211 enum x86_64_reg_class regclass[MAX_CLASSES];
6212 int n = classify_argument (mode, type, regclass, 0);
6218 for (n--; n >= 0; n--)
6219 switch (regclass[n])
6221 case X86_64_INTEGER_CLASS:
6222 case X86_64_INTEGERSI_CLASS:
6225 case X86_64_SSE_CLASS:
6226 case X86_64_SSESF_CLASS:
6227 case X86_64_SSEDF_CLASS:
6230 case X86_64_NO_CLASS:
6231 case X86_64_SSEUP_CLASS:
6233 case X86_64_X87_CLASS:
6234 case X86_64_X87UP_CLASS:
6238 case X86_64_COMPLEX_X87_CLASS:
6239 return in_return ? 2 : 0;
6240 case X86_64_MEMORY_CLASS:
6246 /* Construct container for the argument used by GCC interface. See
6247 FUNCTION_ARG for the detailed description. */
6250 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6251 const_tree type, int in_return, int nintregs, int nsseregs,
6252 const int *intreg, int sse_regno)
6254 /* The following variables hold the static issued_error state. */
6255 static bool issued_sse_arg_error;
6256 static bool issued_sse_ret_error;
6257 static bool issued_x87_ret_error;
6259 enum machine_mode tmpmode;
6261 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6262 enum x86_64_reg_class regclass[MAX_CLASSES];
6266 int needed_sseregs, needed_intregs;
6267 rtx exp[MAX_CLASSES];
6270 n = classify_argument (mode, type, regclass, 0);
6273 if (!examine_argument (mode, type, in_return, &needed_intregs,
6276 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6279 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6280 some less clueful developer tries to use floating-point anyway. */
6281 if (needed_sseregs && !TARGET_SSE)
6285 if (!issued_sse_ret_error)
6287 error ("SSE register return with SSE disabled");
6288 issued_sse_ret_error = true;
6291 else if (!issued_sse_arg_error)
6293 error ("SSE register argument with SSE disabled");
6294 issued_sse_arg_error = true;
6299 /* Likewise, error if the ABI requires us to return values in the
6300 x87 registers and the user specified -mno-80387. */
6301 if (!TARGET_80387 && in_return)
6302 for (i = 0; i < n; i++)
6303 if (regclass[i] == X86_64_X87_CLASS
6304 || regclass[i] == X86_64_X87UP_CLASS
6305 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6307 if (!issued_x87_ret_error)
6309 error ("x87 register return with x87 disabled");
6310 issued_x87_ret_error = true;
6315 /* First construct simple cases. Avoid SCmode, since we want to use
6316 single register to pass this type. */
6317 if (n == 1 && mode != SCmode)
6318 switch (regclass[0])
6320 case X86_64_INTEGER_CLASS:
6321 case X86_64_INTEGERSI_CLASS:
6322 return gen_rtx_REG (mode, intreg[0]);
6323 case X86_64_SSE_CLASS:
6324 case X86_64_SSESF_CLASS:
6325 case X86_64_SSEDF_CLASS:
6326 if (mode != BLKmode)
6327 return gen_reg_or_parallel (mode, orig_mode,
6328 SSE_REGNO (sse_regno));
6330 case X86_64_X87_CLASS:
6331 case X86_64_COMPLEX_X87_CLASS:
6332 return gen_rtx_REG (mode, FIRST_STACK_REG);
6333 case X86_64_NO_CLASS:
6334 /* Zero sized array, struct or class. */
6339 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6340 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6341 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6343 && regclass[0] == X86_64_SSE_CLASS
6344 && regclass[1] == X86_64_SSEUP_CLASS
6345 && regclass[2] == X86_64_SSEUP_CLASS
6346 && regclass[3] == X86_64_SSEUP_CLASS
6348 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6351 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6352 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6353 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6354 && regclass[1] == X86_64_INTEGER_CLASS
6355 && (mode == CDImode || mode == TImode || mode == TFmode)
6356 && intreg[0] + 1 == intreg[1])
6357 return gen_rtx_REG (mode, intreg[0]);
6359 /* Otherwise figure out the entries of the PARALLEL. */
6360 for (i = 0; i < n; i++)
6364 switch (regclass[i])
6366 case X86_64_NO_CLASS:
6368 case X86_64_INTEGER_CLASS:
6369 case X86_64_INTEGERSI_CLASS:
6370 /* Merge TImodes on aligned occasions here too. */
6371 if (i * 8 + 8 > bytes)
6372 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6373 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6377 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6378 if (tmpmode == BLKmode)
6380 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6381 gen_rtx_REG (tmpmode, *intreg),
6385 case X86_64_SSESF_CLASS:
6386 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6387 gen_rtx_REG (SFmode,
6388 SSE_REGNO (sse_regno)),
6392 case X86_64_SSEDF_CLASS:
6393 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6394 gen_rtx_REG (DFmode,
6395 SSE_REGNO (sse_regno)),
6399 case X86_64_SSE_CLASS:
6407 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6417 && regclass[1] == X86_64_SSEUP_CLASS
6418 && regclass[2] == X86_64_SSEUP_CLASS
6419 && regclass[3] == X86_64_SSEUP_CLASS);
6426 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6427 gen_rtx_REG (tmpmode,
6428 SSE_REGNO (sse_regno)),
6437 /* Empty aligned struct, union or class. */
6441 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6442 for (i = 0; i < nexps; i++)
6443 XVECEXP (ret, 0, i) = exp [i];
6447 /* Update the data in CUM to advance over an argument of mode MODE
6448 and data type TYPE. (TYPE is null for libcalls where that information
6449 may not be available.) */
6452 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6453 const_tree type, HOST_WIDE_INT bytes,
6454 HOST_WIDE_INT words)
6470 cum->words += words;
6471 cum->nregs -= words;
6472 cum->regno += words;
6474 if (cum->nregs <= 0)
6482 /* OImode shouldn't be used directly. */
6486 if (cum->float_in_sse < 2)
6489 if (cum->float_in_sse < 1)
6506 if (!type || !AGGREGATE_TYPE_P (type))
6508 cum->sse_words += words;
6509 cum->sse_nregs -= 1;
6510 cum->sse_regno += 1;
6511 if (cum->sse_nregs <= 0)
6525 if (!type || !AGGREGATE_TYPE_P (type))
6527 cum->mmx_words += words;
6528 cum->mmx_nregs -= 1;
6529 cum->mmx_regno += 1;
6530 if (cum->mmx_nregs <= 0)
6541 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6542 const_tree type, HOST_WIDE_INT words, bool named)
6544 int int_nregs, sse_nregs;
6546 /* Unnamed 256bit vector mode parameters are passed on stack. */
6547 if (!named && VALID_AVX256_REG_MODE (mode))
6550 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6551 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6553 cum->nregs -= int_nregs;
6554 cum->sse_nregs -= sse_nregs;
6555 cum->regno += int_nregs;
6556 cum->sse_regno += sse_nregs;
6560 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6561 cum->words = (cum->words + align - 1) & ~(align - 1);
6562 cum->words += words;
6567 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6568 HOST_WIDE_INT words)
6570 /* Otherwise, this should be passed indirect. */
6571 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6573 cum->words += words;
6581 /* Update the data in CUM to advance over an argument of mode MODE and
6582 data type TYPE. (TYPE is null for libcalls where that information
6583 may not be available.) */
6586 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6587 const_tree type, bool named)
6589 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6590 HOST_WIDE_INT bytes, words;
6592 if (mode == BLKmode)
6593 bytes = int_size_in_bytes (type);
6595 bytes = GET_MODE_SIZE (mode);
6596 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6599 mode = type_natural_mode (type, NULL);
6601 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6602 function_arg_advance_ms_64 (cum, bytes, words);
6603 else if (TARGET_64BIT)
6604 function_arg_advance_64 (cum, mode, type, words, named);
6606 function_arg_advance_32 (cum, mode, type, bytes, words);
6609 /* Define where to put the arguments to a function.
6610 Value is zero to push the argument on the stack,
6611 or a hard register in which to store the argument.
6613 MODE is the argument's machine mode.
6614 TYPE is the data type of the argument (as a tree).
6615 This is null for libcalls where that information may
6617 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6618 the preceding args and about the function being called.
6619 NAMED is nonzero if this argument is a named parameter
6620 (otherwise it is an extra parameter matching an ellipsis). */
6623 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6624 enum machine_mode orig_mode, const_tree type,
6625 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6627 static bool warnedsse, warnedmmx;
6629 /* Avoid the AL settings for the Unix64 ABI. */
6630 if (mode == VOIDmode)
6646 if (words <= cum->nregs)
6648 int regno = cum->regno;
6650 /* Fastcall allocates the first two DWORD (SImode) or
6651 smaller arguments to ECX and EDX if it isn't an
6657 || (type && AGGREGATE_TYPE_P (type)))
6660 /* ECX not EAX is the first allocated register. */
6661 if (regno == AX_REG)
6664 return gen_rtx_REG (mode, regno);
6669 if (cum->float_in_sse < 2)
6672 if (cum->float_in_sse < 1)
6676 /* In 32bit, we pass TImode in xmm registers. */
6683 if (!type || !AGGREGATE_TYPE_P (type))
6685 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6688 warning (0, "SSE vector argument without SSE enabled "
6692 return gen_reg_or_parallel (mode, orig_mode,
6693 cum->sse_regno + FIRST_SSE_REG);
6698 /* OImode shouldn't be used directly. */
6707 if (!type || !AGGREGATE_TYPE_P (type))
6710 return gen_reg_or_parallel (mode, orig_mode,
6711 cum->sse_regno + FIRST_SSE_REG);
6721 if (!type || !AGGREGATE_TYPE_P (type))
6723 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6726 warning (0, "MMX vector argument without MMX enabled "
6730 return gen_reg_or_parallel (mode, orig_mode,
6731 cum->mmx_regno + FIRST_MMX_REG);
6740 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6741 enum machine_mode orig_mode, const_tree type, bool named)
6743 /* Handle a hidden AL argument containing number of registers
6744 for varargs x86-64 functions. */
6745 if (mode == VOIDmode)
6746 return GEN_INT (cum->maybe_vaarg
6747 ? (cum->sse_nregs < 0
6748 ? X86_64_SSE_REGPARM_MAX
6763 /* Unnamed 256bit vector mode parameters are passed on stack. */
6769 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6771 &x86_64_int_parameter_registers [cum->regno],
6776 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6777 enum machine_mode orig_mode, bool named,
6778 HOST_WIDE_INT bytes)
6782 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6783 We use value of -2 to specify that current function call is MSABI. */
6784 if (mode == VOIDmode)
6785 return GEN_INT (-2);
6787 /* If we've run out of registers, it goes on the stack. */
6788 if (cum->nregs == 0)
6791 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6793 /* Only floating point modes are passed in anything but integer regs. */
6794 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6797 regno = cum->regno + FIRST_SSE_REG;
6802 /* Unnamed floating parameters are passed in both the
6803 SSE and integer registers. */
6804 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6805 t2 = gen_rtx_REG (mode, regno);
6806 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6807 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6808 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6811 /* Handle aggregated types passed in register. */
6812 if (orig_mode == BLKmode)
6814 if (bytes > 0 && bytes <= 8)
6815 mode = (bytes > 4 ? DImode : SImode);
6816 if (mode == BLKmode)
6820 return gen_reg_or_parallel (mode, orig_mode, regno);
6823 /* Return where to put the arguments to a function.
6824 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6826 MODE is the argument's machine mode. TYPE is the data type of the
6827 argument. It is null for libcalls where that information may not be
6828 available. CUM gives information about the preceding args and about
6829 the function being called. NAMED is nonzero if this argument is a
6830 named parameter (otherwise it is an extra parameter matching an
6834 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
6835 const_tree type, bool named)
6837 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6838 enum machine_mode mode = omode;
6839 HOST_WIDE_INT bytes, words;
6842 if (mode == BLKmode)
6843 bytes = int_size_in_bytes (type);
6845 bytes = GET_MODE_SIZE (mode);
6846 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6848 /* To simplify the code below, represent vector types with a vector mode
6849 even if MMX/SSE are not active. */
6850 if (type && TREE_CODE (type) == VECTOR_TYPE)
6851 mode = type_natural_mode (type, cum);
6853 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6854 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6855 else if (TARGET_64BIT)
6856 arg = function_arg_64 (cum, mode, omode, type, named);
6858 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6860 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6862 /* This argument uses 256bit AVX modes. */
6864 cfun->machine->callee_pass_avx256_p = true;
6866 cfun->machine->caller_pass_avx256_p = true;
6872 /* A C expression that indicates when an argument must be passed by
6873 reference. If nonzero for an argument, a copy of that argument is
6874 made in memory and a pointer to the argument is passed instead of
6875 the argument itself. The pointer is passed in whatever way is
6876 appropriate for passing a pointer to that type. */
6879 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
6880 enum machine_mode mode ATTRIBUTE_UNUSED,
6881 const_tree type, bool named ATTRIBUTE_UNUSED)
6883 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6885 /* See Windows x64 Software Convention. */
6886 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6888 int msize = (int) GET_MODE_SIZE (mode);
6891 /* Arrays are passed by reference. */
6892 if (TREE_CODE (type) == ARRAY_TYPE)
6895 if (AGGREGATE_TYPE_P (type))
6897 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6898 are passed by reference. */
6899 msize = int_size_in_bytes (type);
6903 /* __m128 is passed by reference. */
6905 case 1: case 2: case 4: case 8:
6911 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6917 /* Return true when TYPE should be 128bit aligned for 32bit argument
6918 passing ABI. XXX: This function is obsolete and is only used for
6919 checking psABI compatibility with previous versions of GCC. */
6922 ix86_compat_aligned_value_p (const_tree type)
6924 enum machine_mode mode = TYPE_MODE (type);
6925 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6929 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6931 if (TYPE_ALIGN (type) < 128)
6934 if (AGGREGATE_TYPE_P (type))
6936 /* Walk the aggregates recursively. */
6937 switch (TREE_CODE (type))
6941 case QUAL_UNION_TYPE:
6945 /* Walk all the structure fields. */
6946 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6948 if (TREE_CODE (field) == FIELD_DECL
6949 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6956 /* Just for use if some languages passes arrays by value. */
6957 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6968 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6969 XXX: This function is obsolete and is only used for checking psABI
6970 compatibility with previous versions of GCC. */
6973 ix86_compat_function_arg_boundary (enum machine_mode mode,
6974 const_tree type, unsigned int align)
6976 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6977 natural boundaries. */
6978 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6980 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6981 make an exception for SSE modes since these require 128bit
6984 The handling here differs from field_alignment. ICC aligns MMX
6985 arguments to 4 byte boundaries, while structure fields are aligned
6986 to 8 byte boundaries. */
6989 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6990 align = PARM_BOUNDARY;
6994 if (!ix86_compat_aligned_value_p (type))
6995 align = PARM_BOUNDARY;
6998 if (align > BIGGEST_ALIGNMENT)
6999 align = BIGGEST_ALIGNMENT;
7003 /* Return true when TYPE should be 128bit aligned for 32bit argument
7007 ix86_contains_aligned_value_p (const_tree type)
7009 enum machine_mode mode = TYPE_MODE (type);
7011 if (mode == XFmode || mode == XCmode)
7014 if (TYPE_ALIGN (type) < 128)
7017 if (AGGREGATE_TYPE_P (type))
7019 /* Walk the aggregates recursively. */
7020 switch (TREE_CODE (type))
7024 case QUAL_UNION_TYPE:
7028 /* Walk all the structure fields. */
7029 for (field = TYPE_FIELDS (type);
7031 field = DECL_CHAIN (field))
7033 if (TREE_CODE (field) == FIELD_DECL
7034 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7041 /* Just for use if some languages passes arrays by value. */
7042 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7051 return TYPE_ALIGN (type) >= 128;
7056 /* Gives the alignment boundary, in bits, of an argument with the
7057 specified mode and type. */
7060 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7065 /* Since the main variant type is used for call, we convert it to
7066 the main variant type. */
7067 type = TYPE_MAIN_VARIANT (type);
7068 align = TYPE_ALIGN (type);
7071 align = GET_MODE_ALIGNMENT (mode);
7072 if (align < PARM_BOUNDARY)
7073 align = PARM_BOUNDARY;
7077 unsigned int saved_align = align;
7081 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7084 if (mode == XFmode || mode == XCmode)
7085 align = PARM_BOUNDARY;
7087 else if (!ix86_contains_aligned_value_p (type))
7088 align = PARM_BOUNDARY;
7091 align = PARM_BOUNDARY;
7096 && align != ix86_compat_function_arg_boundary (mode, type,
7100 inform (input_location,
7101 "The ABI for passing parameters with %d-byte"
7102 " alignment has changed in GCC 4.6",
7103 align / BITS_PER_UNIT);
7110 /* Return true if N is a possible register number of function value. */
7113 ix86_function_value_regno_p (const unsigned int regno)
7120 case FIRST_FLOAT_REG:
7121 /* TODO: The function should depend on current function ABI but
7122 builtins.c would need updating then. Therefore we use the
7124 if (TARGET_64BIT && ix86_abi == MS_ABI)
7126 return TARGET_FLOAT_RETURNS_IN_80387;
7132 if (TARGET_MACHO || TARGET_64BIT)
7140 /* Define how to find the value returned by a function.
7141 VALTYPE is the data type of the value (as a tree).
7142 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7143 otherwise, FUNC is 0. */
7146 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7147 const_tree fntype, const_tree fn)
7151 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7152 we normally prevent this case when mmx is not available. However
7153 some ABIs may require the result to be returned like DImode. */
7154 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7155 regno = FIRST_MMX_REG;
7157 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7158 we prevent this case when sse is not available. However some ABIs
7159 may require the result to be returned like integer TImode. */
7160 else if (mode == TImode
7161 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7162 regno = FIRST_SSE_REG;
7164 /* 32-byte vector modes in %ymm0. */
7165 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7166 regno = FIRST_SSE_REG;
7168 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7169 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7170 regno = FIRST_FLOAT_REG;
7172 /* Most things go in %eax. */
7175 /* Override FP return register with %xmm0 for local functions when
7176 SSE math is enabled or for functions with sseregparm attribute. */
7177 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7179 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7180 if ((sse_level >= 1 && mode == SFmode)
7181 || (sse_level == 2 && mode == DFmode))
7182 regno = FIRST_SSE_REG;
7185 /* OImode shouldn't be used directly. */
7186 gcc_assert (mode != OImode);
7188 return gen_rtx_REG (orig_mode, regno);
7192 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7197 /* Handle libcalls, which don't provide a type node. */
7198 if (valtype == NULL)
7212 regno = FIRST_SSE_REG;
7216 regno = FIRST_FLOAT_REG;
7224 return gen_rtx_REG (mode, regno);
7226 else if (POINTER_TYPE_P (valtype))
7228 /* Pointers are always returned in Pmode. */
7232 ret = construct_container (mode, orig_mode, valtype, 1,
7233 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7234 x86_64_int_return_registers, 0);
7236 /* For zero sized structures, construct_container returns NULL, but we
7237 need to keep rest of compiler happy by returning meaningful value. */
7239 ret = gen_rtx_REG (orig_mode, AX_REG);
7245 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7247 unsigned int regno = AX_REG;
7251 switch (GET_MODE_SIZE (mode))
7254 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7255 && !COMPLEX_MODE_P (mode))
7256 regno = FIRST_SSE_REG;
7260 if (mode == SFmode || mode == DFmode)
7261 regno = FIRST_SSE_REG;
7267 return gen_rtx_REG (orig_mode, regno);
7271 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7272 enum machine_mode orig_mode, enum machine_mode mode)
7274 const_tree fn, fntype;
7277 if (fntype_or_decl && DECL_P (fntype_or_decl))
7278 fn = fntype_or_decl;
7279 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7281 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7282 return function_value_ms_64 (orig_mode, mode);
7283 else if (TARGET_64BIT)
7284 return function_value_64 (orig_mode, mode, valtype);
7286 return function_value_32 (orig_mode, mode, fntype, fn);
7290 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7291 bool outgoing ATTRIBUTE_UNUSED)
7293 enum machine_mode mode, orig_mode;
7295 orig_mode = TYPE_MODE (valtype);
7296 mode = type_natural_mode (valtype, NULL);
7297 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7300 /* Pointer function arguments and return values are promoted to Pmode. */
7302 static enum machine_mode
7303 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7304 int *punsignedp, const_tree fntype,
7307 if (type != NULL_TREE && POINTER_TYPE_P (type))
7309 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7312 return default_promote_function_mode (type, mode, punsignedp, fntype,
7317 ix86_libcall_value (enum machine_mode mode)
7319 return ix86_function_value_1 (NULL, NULL, mode, mode);
7322 /* Return true iff type is returned in memory. */
7324 static bool ATTRIBUTE_UNUSED
7325 return_in_memory_32 (const_tree type, enum machine_mode mode)
7329 if (mode == BLKmode)
7332 size = int_size_in_bytes (type);
7334 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7337 if (VECTOR_MODE_P (mode) || mode == TImode)
7339 /* User-created vectors small enough to fit in EAX. */
7343 /* MMX/3dNow values are returned in MM0,
7344 except when it doesn't exits or the ABI prescribes otherwise. */
7346 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7348 /* SSE values are returned in XMM0, except when it doesn't exist. */
7352 /* AVX values are returned in YMM0, except when it doesn't exist. */
7363 /* OImode shouldn't be used directly. */
7364 gcc_assert (mode != OImode);
7369 static bool ATTRIBUTE_UNUSED
7370 return_in_memory_64 (const_tree type, enum machine_mode mode)
7372 int needed_intregs, needed_sseregs;
7373 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7376 static bool ATTRIBUTE_UNUSED
7377 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7379 HOST_WIDE_INT size = int_size_in_bytes (type);
7381 /* __m128 is returned in xmm0. */
7382 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7383 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7386 /* Otherwise, the size must be exactly in [1248]. */
7387 return size != 1 && size != 2 && size != 4 && size != 8;
7391 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7393 #ifdef SUBTARGET_RETURN_IN_MEMORY
7394 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7396 const enum machine_mode mode = type_natural_mode (type, NULL);
7400 if (ix86_function_type_abi (fntype) == MS_ABI)
7401 return return_in_memory_ms_64 (type, mode);
7403 return return_in_memory_64 (type, mode);
7406 return return_in_memory_32 (type, mode);
7410 /* When returning SSE vector types, we have a choice of either
7411 (1) being abi incompatible with a -march switch, or
7412 (2) generating an error.
7413 Given no good solution, I think the safest thing is one warning.
7414 The user won't be able to use -Werror, but....
7416 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7417 called in response to actually generating a caller or callee that
7418 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7419 via aggregate_value_p for general type probing from tree-ssa. */
7422 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7424 static bool warnedsse, warnedmmx;
7426 if (!TARGET_64BIT && type)
7428 /* Look at the return type of the function, not the function type. */
7429 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7431 if (!TARGET_SSE && !warnedsse)
7434 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7437 warning (0, "SSE vector return without SSE enabled "
7442 if (!TARGET_MMX && !warnedmmx)
7444 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7447 warning (0, "MMX vector return without MMX enabled "
7457 /* Create the va_list data type. */
7459 /* Returns the calling convention specific va_list date type.
7460 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7463 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7465 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7467 /* For i386 we use plain pointer to argument area. */
7468 if (!TARGET_64BIT || abi == MS_ABI)
7469 return build_pointer_type (char_type_node);
7471 record = lang_hooks.types.make_type (RECORD_TYPE);
7472 type_decl = build_decl (BUILTINS_LOCATION,
7473 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7475 f_gpr = build_decl (BUILTINS_LOCATION,
7476 FIELD_DECL, get_identifier ("gp_offset"),
7477 unsigned_type_node);
7478 f_fpr = build_decl (BUILTINS_LOCATION,
7479 FIELD_DECL, get_identifier ("fp_offset"),
7480 unsigned_type_node);
7481 f_ovf = build_decl (BUILTINS_LOCATION,
7482 FIELD_DECL, get_identifier ("overflow_arg_area"),
7484 f_sav = build_decl (BUILTINS_LOCATION,
7485 FIELD_DECL, get_identifier ("reg_save_area"),
7488 va_list_gpr_counter_field = f_gpr;
7489 va_list_fpr_counter_field = f_fpr;
7491 DECL_FIELD_CONTEXT (f_gpr) = record;
7492 DECL_FIELD_CONTEXT (f_fpr) = record;
7493 DECL_FIELD_CONTEXT (f_ovf) = record;
7494 DECL_FIELD_CONTEXT (f_sav) = record;
7496 TYPE_STUB_DECL (record) = type_decl;
7497 TYPE_NAME (record) = type_decl;
7498 TYPE_FIELDS (record) = f_gpr;
7499 DECL_CHAIN (f_gpr) = f_fpr;
7500 DECL_CHAIN (f_fpr) = f_ovf;
7501 DECL_CHAIN (f_ovf) = f_sav;
7503 layout_type (record);
7505 /* The correct type is an array type of one element. */
7506 return build_array_type (record, build_index_type (size_zero_node));
7509 /* Setup the builtin va_list data type and for 64-bit the additional
7510 calling convention specific va_list data types. */
7513 ix86_build_builtin_va_list (void)
7515 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7517 /* Initialize abi specific va_list builtin types. */
7521 if (ix86_abi == MS_ABI)
7523 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7524 if (TREE_CODE (t) != RECORD_TYPE)
7525 t = build_variant_type_copy (t);
7526 sysv_va_list_type_node = t;
7531 if (TREE_CODE (t) != RECORD_TYPE)
7532 t = build_variant_type_copy (t);
7533 sysv_va_list_type_node = t;
7535 if (ix86_abi != MS_ABI)
7537 t = ix86_build_builtin_va_list_abi (MS_ABI);
7538 if (TREE_CODE (t) != RECORD_TYPE)
7539 t = build_variant_type_copy (t);
7540 ms_va_list_type_node = t;
7545 if (TREE_CODE (t) != RECORD_TYPE)
7546 t = build_variant_type_copy (t);
7547 ms_va_list_type_node = t;
7554 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7557 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7563 /* GPR size of varargs save area. */
7564 if (cfun->va_list_gpr_size)
7565 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7567 ix86_varargs_gpr_size = 0;
7569 /* FPR size of varargs save area. We don't need it if we don't pass
7570 anything in SSE registers. */
7571 if (TARGET_SSE && cfun->va_list_fpr_size)
7572 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7574 ix86_varargs_fpr_size = 0;
7576 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7579 save_area = frame_pointer_rtx;
7580 set = get_varargs_alias_set ();
7582 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7583 if (max > X86_64_REGPARM_MAX)
7584 max = X86_64_REGPARM_MAX;
7586 for (i = cum->regno; i < max; i++)
7588 mem = gen_rtx_MEM (Pmode,
7589 plus_constant (save_area, i * UNITS_PER_WORD));
7590 MEM_NOTRAP_P (mem) = 1;
7591 set_mem_alias_set (mem, set);
7592 emit_move_insn (mem, gen_rtx_REG (Pmode,
7593 x86_64_int_parameter_registers[i]));
7596 if (ix86_varargs_fpr_size)
7598 enum machine_mode smode;
7601 /* Now emit code to save SSE registers. The AX parameter contains number
7602 of SSE parameter registers used to call this function, though all we
7603 actually check here is the zero/non-zero status. */
7605 label = gen_label_rtx ();
7606 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7607 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7610 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7611 we used movdqa (i.e. TImode) instead? Perhaps even better would
7612 be if we could determine the real mode of the data, via a hook
7613 into pass_stdarg. Ignore all that for now. */
7615 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7616 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7618 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7619 if (max > X86_64_SSE_REGPARM_MAX)
7620 max = X86_64_SSE_REGPARM_MAX;
7622 for (i = cum->sse_regno; i < max; ++i)
7624 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7625 mem = gen_rtx_MEM (smode, mem);
7626 MEM_NOTRAP_P (mem) = 1;
7627 set_mem_alias_set (mem, set);
7628 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7630 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7638 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7640 alias_set_type set = get_varargs_alias_set ();
7643 /* Reset to zero, as there might be a sysv vaarg used
7645 ix86_varargs_gpr_size = 0;
7646 ix86_varargs_fpr_size = 0;
7648 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7652 mem = gen_rtx_MEM (Pmode,
7653 plus_constant (virtual_incoming_args_rtx,
7654 i * UNITS_PER_WORD));
7655 MEM_NOTRAP_P (mem) = 1;
7656 set_mem_alias_set (mem, set);
7658 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7659 emit_move_insn (mem, reg);
7664 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7665 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7668 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7669 CUMULATIVE_ARGS next_cum;
7672 /* This argument doesn't appear to be used anymore. Which is good,
7673 because the old code here didn't suppress rtl generation. */
7674 gcc_assert (!no_rtl);
7679 fntype = TREE_TYPE (current_function_decl);
7681 /* For varargs, we do not want to skip the dummy va_dcl argument.
7682 For stdargs, we do want to skip the last named argument. */
7684 if (stdarg_p (fntype))
7685 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7688 if (cum->call_abi == MS_ABI)
7689 setup_incoming_varargs_ms_64 (&next_cum);
7691 setup_incoming_varargs_64 (&next_cum);
7694 /* Checks if TYPE is of kind va_list char *. */
7697 is_va_list_char_pointer (tree type)
7701 /* For 32-bit it is always true. */
7704 canonic = ix86_canonical_va_list_type (type);
7705 return (canonic == ms_va_list_type_node
7706 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7709 /* Implement va_start. */
7712 ix86_va_start (tree valist, rtx nextarg)
7714 HOST_WIDE_INT words, n_gpr, n_fpr;
7715 tree f_gpr, f_fpr, f_ovf, f_sav;
7716 tree gpr, fpr, ovf, sav, t;
7720 if (flag_split_stack
7721 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7723 unsigned int scratch_regno;
7725 /* When we are splitting the stack, we can't refer to the stack
7726 arguments using internal_arg_pointer, because they may be on
7727 the old stack. The split stack prologue will arrange to
7728 leave a pointer to the old stack arguments in a scratch
7729 register, which we here copy to a pseudo-register. The split
7730 stack prologue can't set the pseudo-register directly because
7731 it (the prologue) runs before any registers have been saved. */
7733 scratch_regno = split_stack_prologue_scratch_regno ();
7734 if (scratch_regno != INVALID_REGNUM)
7738 reg = gen_reg_rtx (Pmode);
7739 cfun->machine->split_stack_varargs_pointer = reg;
7742 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7746 push_topmost_sequence ();
7747 emit_insn_after (seq, entry_of_function ());
7748 pop_topmost_sequence ();
7752 /* Only 64bit target needs something special. */
7753 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7755 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7756 std_expand_builtin_va_start (valist, nextarg);
7761 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7762 next = expand_binop (ptr_mode, add_optab,
7763 cfun->machine->split_stack_varargs_pointer,
7764 crtl->args.arg_offset_rtx,
7765 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7766 convert_move (va_r, next, 0);
7771 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7772 f_fpr = DECL_CHAIN (f_gpr);
7773 f_ovf = DECL_CHAIN (f_fpr);
7774 f_sav = DECL_CHAIN (f_ovf);
7776 valist = build_simple_mem_ref (valist);
7777 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7778 /* The following should be folded into the MEM_REF offset. */
7779 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7781 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7783 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7785 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7788 /* Count number of gp and fp argument registers used. */
7789 words = crtl->args.info.words;
7790 n_gpr = crtl->args.info.regno;
7791 n_fpr = crtl->args.info.sse_regno;
7793 if (cfun->va_list_gpr_size)
7795 type = TREE_TYPE (gpr);
7796 t = build2 (MODIFY_EXPR, type,
7797 gpr, build_int_cst (type, n_gpr * 8));
7798 TREE_SIDE_EFFECTS (t) = 1;
7799 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7802 if (TARGET_SSE && cfun->va_list_fpr_size)
7804 type = TREE_TYPE (fpr);
7805 t = build2 (MODIFY_EXPR, type, fpr,
7806 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7807 TREE_SIDE_EFFECTS (t) = 1;
7808 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7811 /* Find the overflow area. */
7812 type = TREE_TYPE (ovf);
7813 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7814 ovf_rtx = crtl->args.internal_arg_pointer;
7816 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7817 t = make_tree (type, ovf_rtx);
7819 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
7820 t = build2 (MODIFY_EXPR, type, ovf, t);
7821 TREE_SIDE_EFFECTS (t) = 1;
7822 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7824 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7826 /* Find the register save area.
7827 Prologue of the function save it right above stack frame. */
7828 type = TREE_TYPE (sav);
7829 t = make_tree (type, frame_pointer_rtx);
7830 if (!ix86_varargs_gpr_size)
7831 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
7832 t = build2 (MODIFY_EXPR, type, sav, t);
7833 TREE_SIDE_EFFECTS (t) = 1;
7834 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7838 /* Implement va_arg. */
7841 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7844 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7845 tree f_gpr, f_fpr, f_ovf, f_sav;
7846 tree gpr, fpr, ovf, sav, t;
7848 tree lab_false, lab_over = NULL_TREE;
7853 enum machine_mode nat_mode;
7854 unsigned int arg_boundary;
7856 /* Only 64bit target needs something special. */
7857 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7858 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7860 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7861 f_fpr = DECL_CHAIN (f_gpr);
7862 f_ovf = DECL_CHAIN (f_fpr);
7863 f_sav = DECL_CHAIN (f_ovf);
7865 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7866 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7867 valist = build_va_arg_indirect_ref (valist);
7868 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7869 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7870 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7872 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7874 type = build_pointer_type (type);
7875 size = int_size_in_bytes (type);
7876 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7878 nat_mode = type_natural_mode (type, NULL);
7887 /* Unnamed 256bit vector mode parameters are passed on stack. */
7888 if (!TARGET_64BIT_MS_ABI)
7895 container = construct_container (nat_mode, TYPE_MODE (type),
7896 type, 0, X86_64_REGPARM_MAX,
7897 X86_64_SSE_REGPARM_MAX, intreg,
7902 /* Pull the value out of the saved registers. */
7904 addr = create_tmp_var (ptr_type_node, "addr");
7908 int needed_intregs, needed_sseregs;
7910 tree int_addr, sse_addr;
7912 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7913 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7915 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7917 need_temp = (!REG_P (container)
7918 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7919 || TYPE_ALIGN (type) > 128));
7921 /* In case we are passing structure, verify that it is consecutive block
7922 on the register save area. If not we need to do moves. */
7923 if (!need_temp && !REG_P (container))
7925 /* Verify that all registers are strictly consecutive */
7926 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7930 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7932 rtx slot = XVECEXP (container, 0, i);
7933 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7934 || INTVAL (XEXP (slot, 1)) != i * 16)
7942 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7944 rtx slot = XVECEXP (container, 0, i);
7945 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7946 || INTVAL (XEXP (slot, 1)) != i * 8)
7958 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7959 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7962 /* First ensure that we fit completely in registers. */
7965 t = build_int_cst (TREE_TYPE (gpr),
7966 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7967 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7968 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7969 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7970 gimplify_and_add (t, pre_p);
7974 t = build_int_cst (TREE_TYPE (fpr),
7975 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7976 + X86_64_REGPARM_MAX * 8);
7977 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7978 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7979 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7980 gimplify_and_add (t, pre_p);
7983 /* Compute index to start of area used for integer regs. */
7986 /* int_addr = gpr + sav; */
7987 t = fold_build_pointer_plus (sav, gpr);
7988 gimplify_assign (int_addr, t, pre_p);
7992 /* sse_addr = fpr + sav; */
7993 t = fold_build_pointer_plus (sav, fpr);
7994 gimplify_assign (sse_addr, t, pre_p);
7998 int i, prev_size = 0;
7999 tree temp = create_tmp_var (type, "va_arg_tmp");
8002 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8003 gimplify_assign (addr, t, pre_p);
8005 for (i = 0; i < XVECLEN (container, 0); i++)
8007 rtx slot = XVECEXP (container, 0, i);
8008 rtx reg = XEXP (slot, 0);
8009 enum machine_mode mode = GET_MODE (reg);
8015 tree dest_addr, dest;
8016 int cur_size = GET_MODE_SIZE (mode);
8018 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8019 prev_size = INTVAL (XEXP (slot, 1));
8020 if (prev_size + cur_size > size)
8022 cur_size = size - prev_size;
8023 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8024 if (mode == BLKmode)
8027 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8028 if (mode == GET_MODE (reg))
8029 addr_type = build_pointer_type (piece_type);
8031 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8033 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8036 if (SSE_REGNO_P (REGNO (reg)))
8038 src_addr = sse_addr;
8039 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8043 src_addr = int_addr;
8044 src_offset = REGNO (reg) * 8;
8046 src_addr = fold_convert (addr_type, src_addr);
8047 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8049 dest_addr = fold_convert (daddr_type, addr);
8050 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8051 if (cur_size == GET_MODE_SIZE (mode))
8053 src = build_va_arg_indirect_ref (src_addr);
8054 dest = build_va_arg_indirect_ref (dest_addr);
8056 gimplify_assign (dest, src, pre_p);
8061 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8062 3, dest_addr, src_addr,
8063 size_int (cur_size));
8064 gimplify_and_add (copy, pre_p);
8066 prev_size += cur_size;
8072 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8073 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8074 gimplify_assign (gpr, t, pre_p);
8079 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8080 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8081 gimplify_assign (fpr, t, pre_p);
8084 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8086 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8089 /* ... otherwise out of the overflow area. */
8091 /* When we align parameter on stack for caller, if the parameter
8092 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8093 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8094 here with caller. */
8095 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8096 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8097 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8099 /* Care for on-stack alignment if needed. */
8100 if (arg_boundary <= 64 || size == 0)
8104 HOST_WIDE_INT align = arg_boundary / 8;
8105 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8106 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8107 build_int_cst (TREE_TYPE (t), -align));
8110 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8111 gimplify_assign (addr, t, pre_p);
8113 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8114 gimplify_assign (unshare_expr (ovf), t, pre_p);
8117 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8119 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8120 addr = fold_convert (ptrtype, addr);
8123 addr = build_va_arg_indirect_ref (addr);
8124 return build_va_arg_indirect_ref (addr);
8127 /* Return true if OPNUM's MEM should be matched
8128 in movabs* patterns. */
8131 ix86_check_movabs (rtx insn, int opnum)
8135 set = PATTERN (insn);
8136 if (GET_CODE (set) == PARALLEL)
8137 set = XVECEXP (set, 0, 0);
8138 gcc_assert (GET_CODE (set) == SET);
8139 mem = XEXP (set, opnum);
8140 while (GET_CODE (mem) == SUBREG)
8141 mem = SUBREG_REG (mem);
8142 gcc_assert (MEM_P (mem));
8143 return volatile_ok || !MEM_VOLATILE_P (mem);
8146 /* Initialize the table of extra 80387 mathematical constants. */
8149 init_ext_80387_constants (void)
8151 static const char * cst[5] =
8153 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8154 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8155 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8156 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8157 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8161 for (i = 0; i < 5; i++)
8163 real_from_string (&ext_80387_constants_table[i], cst[i]);
8164 /* Ensure each constant is rounded to XFmode precision. */
8165 real_convert (&ext_80387_constants_table[i],
8166 XFmode, &ext_80387_constants_table[i]);
8169 ext_80387_constants_init = 1;
8172 /* Return non-zero if the constant is something that
8173 can be loaded with a special instruction. */
8176 standard_80387_constant_p (rtx x)
8178 enum machine_mode mode = GET_MODE (x);
8182 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8185 if (x == CONST0_RTX (mode))
8187 if (x == CONST1_RTX (mode))
8190 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8192 /* For XFmode constants, try to find a special 80387 instruction when
8193 optimizing for size or on those CPUs that benefit from them. */
8195 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8199 if (! ext_80387_constants_init)
8200 init_ext_80387_constants ();
8202 for (i = 0; i < 5; i++)
8203 if (real_identical (&r, &ext_80387_constants_table[i]))
8207 /* Load of the constant -0.0 or -1.0 will be split as
8208 fldz;fchs or fld1;fchs sequence. */
8209 if (real_isnegzero (&r))
8211 if (real_identical (&r, &dconstm1))
8217 /* Return the opcode of the special instruction to be used to load
8221 standard_80387_constant_opcode (rtx x)
8223 switch (standard_80387_constant_p (x))
8247 /* Return the CONST_DOUBLE representing the 80387 constant that is
8248 loaded by the specified special instruction. The argument IDX
8249 matches the return value from standard_80387_constant_p. */
8252 standard_80387_constant_rtx (int idx)
8256 if (! ext_80387_constants_init)
8257 init_ext_80387_constants ();
8273 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8277 /* Return 1 if X is all 0s and 2 if x is all 1s
8278 in supported SSE/AVX vector mode. */
8281 standard_sse_constant_p (rtx x)
8283 enum machine_mode mode = GET_MODE (x);
8285 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8287 if (vector_all_ones_operand (x, mode))
8309 /* Return the opcode of the special instruction to be used to load
8313 standard_sse_constant_opcode (rtx insn, rtx x)
8315 switch (standard_sse_constant_p (x))
8318 switch (get_attr_mode (insn))
8321 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8322 return "%vpxor\t%0, %d0";
8324 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8325 return "%vxorpd\t%0, %d0";
8327 return "%vxorps\t%0, %d0";
8330 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8331 return "vpxor\t%x0, %x0, %x0";
8333 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8334 return "vxorpd\t%x0, %x0, %x0";
8336 return "vxorps\t%x0, %x0, %x0";
8344 return "vpcmpeqd\t%0, %0, %0";
8346 return "pcmpeqd\t%0, %0";
8354 /* Returns true if OP contains a symbol reference */
8357 symbolic_reference_mentioned_p (rtx op)
8362 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8365 fmt = GET_RTX_FORMAT (GET_CODE (op));
8366 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8372 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8373 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8377 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8384 /* Return true if it is appropriate to emit `ret' instructions in the
8385 body of a function. Do this only if the epilogue is simple, needing a
8386 couple of insns. Prior to reloading, we can't tell how many registers
8387 must be saved, so return false then. Return false if there is no frame
8388 marker to de-allocate. */
8391 ix86_can_use_return_insn_p (void)
8393 struct ix86_frame frame;
8395 if (! reload_completed || frame_pointer_needed)
8398 /* Don't allow more than 32k pop, since that's all we can do
8399 with one instruction. */
8400 if (crtl->args.pops_args && crtl->args.size >= 32768)
8403 ix86_compute_frame_layout (&frame);
8404 return (frame.stack_pointer_offset == UNITS_PER_WORD
8405 && (frame.nregs + frame.nsseregs) == 0);
8408 /* Value should be nonzero if functions must have frame pointers.
8409 Zero means the frame pointer need not be set up (and parms may
8410 be accessed via the stack pointer) in functions that seem suitable. */
8413 ix86_frame_pointer_required (void)
8415 /* If we accessed previous frames, then the generated code expects
8416 to be able to access the saved ebp value in our frame. */
8417 if (cfun->machine->accesses_prev_frame)
8420 /* Several x86 os'es need a frame pointer for other reasons,
8421 usually pertaining to setjmp. */
8422 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8425 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8426 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
8429 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8430 turns off the frame pointer by default. Turn it back on now if
8431 we've not got a leaf function. */
8432 if (TARGET_OMIT_LEAF_FRAME_POINTER
8433 && (!current_function_is_leaf
8434 || ix86_current_function_calls_tls_descriptor))
8437 if (crtl->profile && !flag_fentry)
8443 /* Record that the current function accesses previous call frames. */
8446 ix86_setup_frame_addresses (void)
8448 cfun->machine->accesses_prev_frame = 1;
8451 #ifndef USE_HIDDEN_LINKONCE
8452 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8453 # define USE_HIDDEN_LINKONCE 1
8455 # define USE_HIDDEN_LINKONCE 0
8459 static int pic_labels_used;
8461 /* Fills in the label name that should be used for a pc thunk for
8462 the given register. */
8465 get_pc_thunk_name (char name[32], unsigned int regno)
8467 gcc_assert (!TARGET_64BIT);
8469 if (USE_HIDDEN_LINKONCE)
8470 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
8472 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8476 /* This function generates code for -fpic that loads %ebx with
8477 the return address of the caller and then returns. */
8480 ix86_code_end (void)
8485 for (regno = AX_REG; regno <= SP_REG; regno++)
8490 if (!(pic_labels_used & (1 << regno)))
8493 get_pc_thunk_name (name, regno);
8495 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8496 get_identifier (name),
8497 build_function_type_list (void_type_node, NULL_TREE));
8498 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8499 NULL_TREE, void_type_node);
8500 TREE_PUBLIC (decl) = 1;
8501 TREE_STATIC (decl) = 1;
8506 switch_to_section (darwin_sections[text_coal_section]);
8507 fputs ("\t.weak_definition\t", asm_out_file);
8508 assemble_name (asm_out_file, name);
8509 fputs ("\n\t.private_extern\t", asm_out_file);
8510 assemble_name (asm_out_file, name);
8511 putc ('\n', asm_out_file);
8512 ASM_OUTPUT_LABEL (asm_out_file, name);
8513 DECL_WEAK (decl) = 1;
8517 if (USE_HIDDEN_LINKONCE)
8519 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8521 targetm.asm_out.unique_section (decl, 0);
8522 switch_to_section (get_named_section (decl, NULL, 0));
8524 targetm.asm_out.globalize_label (asm_out_file, name);
8525 fputs ("\t.hidden\t", asm_out_file);
8526 assemble_name (asm_out_file, name);
8527 putc ('\n', asm_out_file);
8528 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8532 switch_to_section (text_section);
8533 ASM_OUTPUT_LABEL (asm_out_file, name);
8536 DECL_INITIAL (decl) = make_node (BLOCK);
8537 current_function_decl = decl;
8538 init_function_start (decl);
8539 first_function_block_is_cold = false;
8540 /* Make sure unwind info is emitted for the thunk if needed. */
8541 final_start_function (emit_barrier (), asm_out_file, 1);
8543 /* Pad stack IP move with 4 instructions (two NOPs count
8544 as one instruction). */
8545 if (TARGET_PAD_SHORT_FUNCTION)
8550 fputs ("\tnop\n", asm_out_file);
8553 xops[0] = gen_rtx_REG (Pmode, regno);
8554 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8555 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8556 fputs ("\tret\n", asm_out_file);
8557 final_end_function ();
8558 init_insn_lengths ();
8559 free_after_compilation (cfun);
8561 current_function_decl = NULL;
8564 if (flag_split_stack)
8565 file_end_indicate_split_stack ();
8568 /* Emit code for the SET_GOT patterns. */
8571 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8577 if (TARGET_VXWORKS_RTP && flag_pic)
8579 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8580 xops[2] = gen_rtx_MEM (Pmode,
8581 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8582 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8584 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8585 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8586 an unadorned address. */
8587 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8588 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8589 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8593 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8597 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8599 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8602 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8603 is what will be referenced by the Mach-O PIC subsystem. */
8605 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8608 targetm.asm_out.internal_label (asm_out_file, "L",
8609 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8614 get_pc_thunk_name (name, REGNO (dest));
8615 pic_labels_used |= 1 << REGNO (dest);
8617 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8618 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8619 output_asm_insn ("call\t%X2", xops);
8620 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8621 is what will be referenced by the Mach-O PIC subsystem. */
8624 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8626 targetm.asm_out.internal_label (asm_out_file, "L",
8627 CODE_LABEL_NUMBER (label));
8632 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8637 /* Generate an "push" pattern for input ARG. */
8642 struct machine_function *m = cfun->machine;
8644 if (m->fs.cfa_reg == stack_pointer_rtx)
8645 m->fs.cfa_offset += UNITS_PER_WORD;
8646 m->fs.sp_offset += UNITS_PER_WORD;
8648 return gen_rtx_SET (VOIDmode,
8650 gen_rtx_PRE_DEC (Pmode,
8651 stack_pointer_rtx)),
8655 /* Generate an "pop" pattern for input ARG. */
8660 return gen_rtx_SET (VOIDmode,
8663 gen_rtx_POST_INC (Pmode,
8664 stack_pointer_rtx)));
8667 /* Return >= 0 if there is an unused call-clobbered register available
8668 for the entire function. */
8671 ix86_select_alt_pic_regnum (void)
8673 if (current_function_is_leaf
8675 && !ix86_current_function_calls_tls_descriptor)
8678 /* Can't use the same register for both PIC and DRAP. */
8680 drap = REGNO (crtl->drap_reg);
8683 for (i = 2; i >= 0; --i)
8684 if (i != drap && !df_regs_ever_live_p (i))
8688 return INVALID_REGNUM;
8691 /* Return TRUE if we need to save REGNO. */
8694 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8696 if (pic_offset_table_rtx
8697 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8698 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8700 || crtl->calls_eh_return
8701 || crtl->uses_const_pool))
8702 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8704 if (crtl->calls_eh_return && maybe_eh_return)
8709 unsigned test = EH_RETURN_DATA_REGNO (i);
8710 if (test == INVALID_REGNUM)
8717 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8720 return (df_regs_ever_live_p (regno)
8721 && !call_used_regs[regno]
8722 && !fixed_regs[regno]
8723 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8726 /* Return number of saved general prupose registers. */
8729 ix86_nsaved_regs (void)
8734 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8735 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8740 /* Return number of saved SSE registrers. */
8743 ix86_nsaved_sseregs (void)
8748 if (!TARGET_64BIT_MS_ABI)
8750 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8751 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8756 /* Given FROM and TO register numbers, say whether this elimination is
8757 allowed. If stack alignment is needed, we can only replace argument
8758 pointer with hard frame pointer, or replace frame pointer with stack
8759 pointer. Otherwise, frame pointer elimination is automatically
8760 handled and all other eliminations are valid. */
8763 ix86_can_eliminate (const int from, const int to)
8765 if (stack_realign_fp)
8766 return ((from == ARG_POINTER_REGNUM
8767 && to == HARD_FRAME_POINTER_REGNUM)
8768 || (from == FRAME_POINTER_REGNUM
8769 && to == STACK_POINTER_REGNUM));
8771 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8774 /* Return the offset between two registers, one to be eliminated, and the other
8775 its replacement, at the start of a routine. */
8778 ix86_initial_elimination_offset (int from, int to)
8780 struct ix86_frame frame;
8781 ix86_compute_frame_layout (&frame);
8783 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8784 return frame.hard_frame_pointer_offset;
8785 else if (from == FRAME_POINTER_REGNUM
8786 && to == HARD_FRAME_POINTER_REGNUM)
8787 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8790 gcc_assert (to == STACK_POINTER_REGNUM);
8792 if (from == ARG_POINTER_REGNUM)
8793 return frame.stack_pointer_offset;
8795 gcc_assert (from == FRAME_POINTER_REGNUM);
8796 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8800 /* In a dynamically-aligned function, we can't know the offset from
8801 stack pointer to frame pointer, so we must ensure that setjmp
8802 eliminates fp against the hard fp (%ebp) rather than trying to
8803 index from %esp up to the top of the frame across a gap that is
8804 of unknown (at compile-time) size. */
8806 ix86_builtin_setjmp_frame_value (void)
8808 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8811 /* When using -fsplit-stack, the allocation routines set a field in
8812 the TCB to the bottom of the stack plus this much space, measured
8815 #define SPLIT_STACK_AVAILABLE 256
8817 /* Fill structure ix86_frame about frame of currently computed function. */
8820 ix86_compute_frame_layout (struct ix86_frame *frame)
8822 unsigned int stack_alignment_needed;
8823 HOST_WIDE_INT offset;
8824 unsigned int preferred_alignment;
8825 HOST_WIDE_INT size = get_frame_size ();
8826 HOST_WIDE_INT to_allocate;
8828 frame->nregs = ix86_nsaved_regs ();
8829 frame->nsseregs = ix86_nsaved_sseregs ();
8831 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8832 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8834 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8835 function prologues and leaf. */
8836 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8837 && (!current_function_is_leaf || cfun->calls_alloca != 0
8838 || ix86_current_function_calls_tls_descriptor))
8840 preferred_alignment = 16;
8841 stack_alignment_needed = 16;
8842 crtl->preferred_stack_boundary = 128;
8843 crtl->stack_alignment_needed = 128;
8846 gcc_assert (!size || stack_alignment_needed);
8847 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8848 gcc_assert (preferred_alignment <= stack_alignment_needed);
8850 /* For SEH we have to limit the amount of code movement into the prologue.
8851 At present we do this via a BLOCKAGE, at which point there's very little
8852 scheduling that can be done, which means that there's very little point
8853 in doing anything except PUSHs. */
8855 cfun->machine->use_fast_prologue_epilogue = false;
8857 /* During reload iteration the amount of registers saved can change.
8858 Recompute the value as needed. Do not recompute when amount of registers
8859 didn't change as reload does multiple calls to the function and does not
8860 expect the decision to change within single iteration. */
8861 else if (!optimize_function_for_size_p (cfun)
8862 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8864 int count = frame->nregs;
8865 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8867 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8869 /* The fast prologue uses move instead of push to save registers. This
8870 is significantly longer, but also executes faster as modern hardware
8871 can execute the moves in parallel, but can't do that for push/pop.
8873 Be careful about choosing what prologue to emit: When function takes
8874 many instructions to execute we may use slow version as well as in
8875 case function is known to be outside hot spot (this is known with
8876 feedback only). Weight the size of function by number of registers
8877 to save as it is cheap to use one or two push instructions but very
8878 slow to use many of them. */
8880 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8881 if (node->frequency < NODE_FREQUENCY_NORMAL
8882 || (flag_branch_probabilities
8883 && node->frequency < NODE_FREQUENCY_HOT))
8884 cfun->machine->use_fast_prologue_epilogue = false;
8886 cfun->machine->use_fast_prologue_epilogue
8887 = !expensive_function_p (count);
8890 frame->save_regs_using_mov
8891 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
8892 /* If static stack checking is enabled and done with probes,
8893 the registers need to be saved before allocating the frame. */
8894 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
8896 /* Skip return address. */
8897 offset = UNITS_PER_WORD;
8899 /* Skip pushed static chain. */
8900 if (ix86_static_chain_on_stack)
8901 offset += UNITS_PER_WORD;
8903 /* Skip saved base pointer. */
8904 if (frame_pointer_needed)
8905 offset += UNITS_PER_WORD;
8906 frame->hfp_save_offset = offset;
8908 /* The traditional frame pointer location is at the top of the frame. */
8909 frame->hard_frame_pointer_offset = offset;
8911 /* Register save area */
8912 offset += frame->nregs * UNITS_PER_WORD;
8913 frame->reg_save_offset = offset;
8915 /* Align and set SSE register save area. */
8916 if (frame->nsseregs)
8918 /* The only ABI that has saved SSE registers (Win64) also has a
8919 16-byte aligned default stack, and thus we don't need to be
8920 within the re-aligned local stack frame to save them. */
8921 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8922 offset = (offset + 16 - 1) & -16;
8923 offset += frame->nsseregs * 16;
8925 frame->sse_reg_save_offset = offset;
8927 /* The re-aligned stack starts here. Values before this point are not
8928 directly comparable with values below this point. In order to make
8929 sure that no value happens to be the same before and after, force
8930 the alignment computation below to add a non-zero value. */
8931 if (stack_realign_fp)
8932 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8935 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8936 offset += frame->va_arg_size;
8938 /* Align start of frame for local function. */
8939 if (stack_realign_fp
8940 || offset != frame->sse_reg_save_offset
8942 || !current_function_is_leaf
8943 || cfun->calls_alloca
8944 || ix86_current_function_calls_tls_descriptor)
8945 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8947 /* Frame pointer points here. */
8948 frame->frame_pointer_offset = offset;
8952 /* Add outgoing arguments area. Can be skipped if we eliminated
8953 all the function calls as dead code.
8954 Skipping is however impossible when function calls alloca. Alloca
8955 expander assumes that last crtl->outgoing_args_size
8956 of stack frame are unused. */
8957 if (ACCUMULATE_OUTGOING_ARGS
8958 && (!current_function_is_leaf || cfun->calls_alloca
8959 || ix86_current_function_calls_tls_descriptor))
8961 offset += crtl->outgoing_args_size;
8962 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8965 frame->outgoing_arguments_size = 0;
8967 /* Align stack boundary. Only needed if we're calling another function
8969 if (!current_function_is_leaf || cfun->calls_alloca
8970 || ix86_current_function_calls_tls_descriptor)
8971 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8973 /* We've reached end of stack frame. */
8974 frame->stack_pointer_offset = offset;
8976 /* Size prologue needs to allocate. */
8977 to_allocate = offset - frame->sse_reg_save_offset;
8979 if ((!to_allocate && frame->nregs <= 1)
8980 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8981 frame->save_regs_using_mov = false;
8983 if (ix86_using_red_zone ()
8984 && current_function_sp_is_unchanging
8985 && current_function_is_leaf
8986 && !ix86_current_function_calls_tls_descriptor)
8988 frame->red_zone_size = to_allocate;
8989 if (frame->save_regs_using_mov)
8990 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8991 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8992 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8995 frame->red_zone_size = 0;
8996 frame->stack_pointer_offset -= frame->red_zone_size;
8998 /* The SEH frame pointer location is near the bottom of the frame.
8999 This is enforced by the fact that the difference between the
9000 stack pointer and the frame pointer is limited to 240 bytes in
9001 the unwind data structure. */
9006 /* If we can leave the frame pointer where it is, do so. */
9007 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9008 if (diff > 240 || (diff & 15) != 0)
9010 /* Ideally we'd determine what portion of the local stack frame
9011 (within the constraint of the lowest 240) is most heavily used.
9012 But without that complication, simply bias the frame pointer
9013 by 128 bytes so as to maximize the amount of the local stack
9014 frame that is addressable with 8-bit offsets. */
9015 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9020 /* This is semi-inlined memory_address_length, but simplified
9021 since we know that we're always dealing with reg+offset, and
9022 to avoid having to create and discard all that rtl. */
9025 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9031 /* EBP and R13 cannot be encoded without an offset. */
9032 len = (regno == BP_REG || regno == R13_REG);
9034 else if (IN_RANGE (offset, -128, 127))
9037 /* ESP and R12 must be encoded with a SIB byte. */
9038 if (regno == SP_REG || regno == R12_REG)
9044 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9045 The valid base registers are taken from CFUN->MACHINE->FS. */
9048 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9050 const struct machine_function *m = cfun->machine;
9051 rtx base_reg = NULL;
9052 HOST_WIDE_INT base_offset = 0;
9054 if (m->use_fast_prologue_epilogue)
9056 /* Choose the base register most likely to allow the most scheduling
9057 opportunities. Generally FP is valid througout the function,
9058 while DRAP must be reloaded within the epilogue. But choose either
9059 over the SP due to increased encoding size. */
9063 base_reg = hard_frame_pointer_rtx;
9064 base_offset = m->fs.fp_offset - cfa_offset;
9066 else if (m->fs.drap_valid)
9068 base_reg = crtl->drap_reg;
9069 base_offset = 0 - cfa_offset;
9071 else if (m->fs.sp_valid)
9073 base_reg = stack_pointer_rtx;
9074 base_offset = m->fs.sp_offset - cfa_offset;
9079 HOST_WIDE_INT toffset;
9082 /* Choose the base register with the smallest address encoding.
9083 With a tie, choose FP > DRAP > SP. */
9086 base_reg = stack_pointer_rtx;
9087 base_offset = m->fs.sp_offset - cfa_offset;
9088 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9090 if (m->fs.drap_valid)
9092 toffset = 0 - cfa_offset;
9093 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9096 base_reg = crtl->drap_reg;
9097 base_offset = toffset;
9103 toffset = m->fs.fp_offset - cfa_offset;
9104 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9107 base_reg = hard_frame_pointer_rtx;
9108 base_offset = toffset;
9113 gcc_assert (base_reg != NULL);
9115 return plus_constant (base_reg, base_offset);
9118 /* Emit code to save registers in the prologue. */
9121 ix86_emit_save_regs (void)
9126 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9127 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9129 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9130 RTX_FRAME_RELATED_P (insn) = 1;
9134 /* Emit a single register save at CFA - CFA_OFFSET. */
9137 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9138 HOST_WIDE_INT cfa_offset)
9140 struct machine_function *m = cfun->machine;
9141 rtx reg = gen_rtx_REG (mode, regno);
9142 rtx mem, addr, base, insn;
9144 addr = choose_baseaddr (cfa_offset);
9145 mem = gen_frame_mem (mode, addr);
9147 /* For SSE saves, we need to indicate the 128-bit alignment. */
9148 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9150 insn = emit_move_insn (mem, reg);
9151 RTX_FRAME_RELATED_P (insn) = 1;
9154 if (GET_CODE (base) == PLUS)
9155 base = XEXP (base, 0);
9156 gcc_checking_assert (REG_P (base));
9158 /* When saving registers into a re-aligned local stack frame, avoid
9159 any tricky guessing by dwarf2out. */
9160 if (m->fs.realigned)
9162 gcc_checking_assert (stack_realign_drap);
9164 if (regno == REGNO (crtl->drap_reg))
9166 /* A bit of a hack. We force the DRAP register to be saved in
9167 the re-aligned stack frame, which provides us with a copy
9168 of the CFA that will last past the prologue. Install it. */
9169 gcc_checking_assert (cfun->machine->fs.fp_valid);
9170 addr = plus_constant (hard_frame_pointer_rtx,
9171 cfun->machine->fs.fp_offset - cfa_offset);
9172 mem = gen_rtx_MEM (mode, addr);
9173 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9177 /* The frame pointer is a stable reference within the
9178 aligned frame. Use it. */
9179 gcc_checking_assert (cfun->machine->fs.fp_valid);
9180 addr = plus_constant (hard_frame_pointer_rtx,
9181 cfun->machine->fs.fp_offset - cfa_offset);
9182 mem = gen_rtx_MEM (mode, addr);
9183 add_reg_note (insn, REG_CFA_EXPRESSION,
9184 gen_rtx_SET (VOIDmode, mem, reg));
9188 /* The memory may not be relative to the current CFA register,
9189 which means that we may need to generate a new pattern for
9190 use by the unwind info. */
9191 else if (base != m->fs.cfa_reg)
9193 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9194 mem = gen_rtx_MEM (mode, addr);
9195 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9199 /* Emit code to save registers using MOV insns.
9200 First register is stored at CFA - CFA_OFFSET. */
9202 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9206 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9207 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9209 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9210 cfa_offset -= UNITS_PER_WORD;
9214 /* Emit code to save SSE registers using MOV insns.
9215 First register is stored at CFA - CFA_OFFSET. */
9217 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9221 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9222 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9224 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9229 static GTY(()) rtx queued_cfa_restores;
9231 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9232 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9233 Don't add the note if the previously saved value will be left untouched
9234 within stack red-zone till return, as unwinders can find the same value
9235 in the register and on the stack. */
9238 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9240 if (!crtl->shrink_wrapped
9241 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9246 add_reg_note (insn, REG_CFA_RESTORE, reg);
9247 RTX_FRAME_RELATED_P (insn) = 1;
9251 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9254 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9257 ix86_add_queued_cfa_restore_notes (rtx insn)
9260 if (!queued_cfa_restores)
9262 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9264 XEXP (last, 1) = REG_NOTES (insn);
9265 REG_NOTES (insn) = queued_cfa_restores;
9266 queued_cfa_restores = NULL_RTX;
9267 RTX_FRAME_RELATED_P (insn) = 1;
9270 /* Expand prologue or epilogue stack adjustment.
9271 The pattern exist to put a dependency on all ebp-based memory accesses.
9272 STYLE should be negative if instructions should be marked as frame related,
9273 zero if %r11 register is live and cannot be freely used and positive
9277 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9278 int style, bool set_cfa)
9280 struct machine_function *m = cfun->machine;
9282 bool add_frame_related_expr = false;
9285 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9286 else if (x86_64_immediate_operand (offset, DImode))
9287 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9291 /* r11 is used by indirect sibcall return as well, set before the
9292 epilogue and used after the epilogue. */
9294 tmp = gen_rtx_REG (DImode, R11_REG);
9297 gcc_assert (src != hard_frame_pointer_rtx
9298 && dest != hard_frame_pointer_rtx);
9299 tmp = hard_frame_pointer_rtx;
9301 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9303 add_frame_related_expr = true;
9305 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9308 insn = emit_insn (insn);
9310 ix86_add_queued_cfa_restore_notes (insn);
9316 gcc_assert (m->fs.cfa_reg == src);
9317 m->fs.cfa_offset += INTVAL (offset);
9318 m->fs.cfa_reg = dest;
9320 r = gen_rtx_PLUS (Pmode, src, offset);
9321 r = gen_rtx_SET (VOIDmode, dest, r);
9322 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9323 RTX_FRAME_RELATED_P (insn) = 1;
9327 RTX_FRAME_RELATED_P (insn) = 1;
9328 if (add_frame_related_expr)
9330 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9331 r = gen_rtx_SET (VOIDmode, dest, r);
9332 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9336 if (dest == stack_pointer_rtx)
9338 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9339 bool valid = m->fs.sp_valid;
9341 if (src == hard_frame_pointer_rtx)
9343 valid = m->fs.fp_valid;
9344 ooffset = m->fs.fp_offset;
9346 else if (src == crtl->drap_reg)
9348 valid = m->fs.drap_valid;
9353 /* Else there are two possibilities: SP itself, which we set
9354 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9355 taken care of this by hand along the eh_return path. */
9356 gcc_checking_assert (src == stack_pointer_rtx
9357 || offset == const0_rtx);
9360 m->fs.sp_offset = ooffset - INTVAL (offset);
9361 m->fs.sp_valid = valid;
9365 /* Find an available register to be used as dynamic realign argument
9366 pointer regsiter. Such a register will be written in prologue and
9367 used in begin of body, so it must not be
9368 1. parameter passing register.
9370 We reuse static-chain register if it is available. Otherwise, we
9371 use DI for i386 and R13 for x86-64. We chose R13 since it has
9374 Return: the regno of chosen register. */
9377 find_drap_reg (void)
9379 tree decl = cfun->decl;
9383 /* Use R13 for nested function or function need static chain.
9384 Since function with tail call may use any caller-saved
9385 registers in epilogue, DRAP must not use caller-saved
9386 register in such case. */
9387 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9394 /* Use DI for nested function or function need static chain.
9395 Since function with tail call may use any caller-saved
9396 registers in epilogue, DRAP must not use caller-saved
9397 register in such case. */
9398 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9401 /* Reuse static chain register if it isn't used for parameter
9403 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9405 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9406 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9413 /* Return minimum incoming stack alignment. */
9416 ix86_minimum_incoming_stack_boundary (bool sibcall)
9418 unsigned int incoming_stack_boundary;
9420 /* Prefer the one specified at command line. */
9421 if (ix86_user_incoming_stack_boundary)
9422 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9423 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9424 if -mstackrealign is used, it isn't used for sibcall check and
9425 estimated stack alignment is 128bit. */
9428 && ix86_force_align_arg_pointer
9429 && crtl->stack_alignment_estimated == 128)
9430 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9432 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9434 /* Incoming stack alignment can be changed on individual functions
9435 via force_align_arg_pointer attribute. We use the smallest
9436 incoming stack boundary. */
9437 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9438 && lookup_attribute (ix86_force_align_arg_pointer_string,
9439 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9440 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9442 /* The incoming stack frame has to be aligned at least at
9443 parm_stack_boundary. */
9444 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9445 incoming_stack_boundary = crtl->parm_stack_boundary;
9447 /* Stack at entrance of main is aligned by runtime. We use the
9448 smallest incoming stack boundary. */
9449 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9450 && DECL_NAME (current_function_decl)
9451 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9452 && DECL_FILE_SCOPE_P (current_function_decl))
9453 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9455 return incoming_stack_boundary;
9458 /* Update incoming stack boundary and estimated stack alignment. */
9461 ix86_update_stack_boundary (void)
9463 ix86_incoming_stack_boundary
9464 = ix86_minimum_incoming_stack_boundary (false);
9466 /* x86_64 vararg needs 16byte stack alignment for register save
9470 && crtl->stack_alignment_estimated < 128)
9471 crtl->stack_alignment_estimated = 128;
9474 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9475 needed or an rtx for DRAP otherwise. */
9478 ix86_get_drap_rtx (void)
9480 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9481 crtl->need_drap = true;
9483 if (stack_realign_drap)
9485 /* Assign DRAP to vDRAP and returns vDRAP */
9486 unsigned int regno = find_drap_reg ();
9491 arg_ptr = gen_rtx_REG (Pmode, regno);
9492 crtl->drap_reg = arg_ptr;
9495 drap_vreg = copy_to_reg (arg_ptr);
9499 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9502 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9503 RTX_FRAME_RELATED_P (insn) = 1;
9511 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9514 ix86_internal_arg_pointer (void)
9516 return virtual_incoming_args_rtx;
9519 struct scratch_reg {
9524 /* Return a short-lived scratch register for use on function entry.
9525 In 32-bit mode, it is valid only after the registers are saved
9526 in the prologue. This register must be released by means of
9527 release_scratch_register_on_entry once it is dead. */
9530 get_scratch_register_on_entry (struct scratch_reg *sr)
9538 /* We always use R11 in 64-bit mode. */
9543 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9545 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9546 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9547 int regparm = ix86_function_regparm (fntype, decl);
9549 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9551 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9552 for the static chain register. */
9553 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9554 && drap_regno != AX_REG)
9556 else if (regparm < 2 && drap_regno != DX_REG)
9558 /* ecx is the static chain register. */
9559 else if (regparm < 3 && !fastcall_p && !static_chain_p
9560 && drap_regno != CX_REG)
9562 else if (ix86_save_reg (BX_REG, true))
9564 /* esi is the static chain register. */
9565 else if (!(regparm == 3 && static_chain_p)
9566 && ix86_save_reg (SI_REG, true))
9568 else if (ix86_save_reg (DI_REG, true))
9572 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9577 sr->reg = gen_rtx_REG (Pmode, regno);
9580 rtx insn = emit_insn (gen_push (sr->reg));
9581 RTX_FRAME_RELATED_P (insn) = 1;
9585 /* Release a scratch register obtained from the preceding function. */
9588 release_scratch_register_on_entry (struct scratch_reg *sr)
9592 rtx x, insn = emit_insn (gen_pop (sr->reg));
9594 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9595 RTX_FRAME_RELATED_P (insn) = 1;
9596 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9597 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9598 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9602 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9604 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9607 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9609 /* We skip the probe for the first interval + a small dope of 4 words and
9610 probe that many bytes past the specified size to maintain a protection
9611 area at the botton of the stack. */
9612 const int dope = 4 * UNITS_PER_WORD;
9613 rtx size_rtx = GEN_INT (size), last;
9615 /* See if we have a constant small number of probes to generate. If so,
9616 that's the easy case. The run-time loop is made up of 11 insns in the
9617 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9618 for n # of intervals. */
9619 if (size <= 5 * PROBE_INTERVAL)
9621 HOST_WIDE_INT i, adjust;
9622 bool first_probe = true;
9624 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9625 values of N from 1 until it exceeds SIZE. If only one probe is
9626 needed, this will not generate any code. Then adjust and probe
9627 to PROBE_INTERVAL + SIZE. */
9628 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9632 adjust = 2 * PROBE_INTERVAL + dope;
9633 first_probe = false;
9636 adjust = PROBE_INTERVAL;
9638 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9639 plus_constant (stack_pointer_rtx, -adjust)));
9640 emit_stack_probe (stack_pointer_rtx);
9644 adjust = size + PROBE_INTERVAL + dope;
9646 adjust = size + PROBE_INTERVAL - i;
9648 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9649 plus_constant (stack_pointer_rtx, -adjust)));
9650 emit_stack_probe (stack_pointer_rtx);
9652 /* Adjust back to account for the additional first interval. */
9653 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9654 plus_constant (stack_pointer_rtx,
9655 PROBE_INTERVAL + dope)));
9658 /* Otherwise, do the same as above, but in a loop. Note that we must be
9659 extra careful with variables wrapping around because we might be at
9660 the very top (or the very bottom) of the address space and we have
9661 to be able to handle this case properly; in particular, we use an
9662 equality test for the loop condition. */
9665 HOST_WIDE_INT rounded_size;
9666 struct scratch_reg sr;
9668 get_scratch_register_on_entry (&sr);
9671 /* Step 1: round SIZE to the previous multiple of the interval. */
9673 rounded_size = size & -PROBE_INTERVAL;
9676 /* Step 2: compute initial and final value of the loop counter. */
9678 /* SP = SP_0 + PROBE_INTERVAL. */
9679 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9680 plus_constant (stack_pointer_rtx,
9681 - (PROBE_INTERVAL + dope))));
9683 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9684 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9685 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9686 gen_rtx_PLUS (Pmode, sr.reg,
9687 stack_pointer_rtx)));
9692 while (SP != LAST_ADDR)
9694 SP = SP + PROBE_INTERVAL
9698 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9699 values of N from 1 until it is equal to ROUNDED_SIZE. */
9701 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9704 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9705 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9707 if (size != rounded_size)
9709 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9710 plus_constant (stack_pointer_rtx,
9711 rounded_size - size)));
9712 emit_stack_probe (stack_pointer_rtx);
9715 /* Adjust back to account for the additional first interval. */
9716 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9717 plus_constant (stack_pointer_rtx,
9718 PROBE_INTERVAL + dope)));
9720 release_scratch_register_on_entry (&sr);
9723 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9725 /* Even if the stack pointer isn't the CFA register, we need to correctly
9726 describe the adjustments made to it, in particular differentiate the
9727 frame-related ones from the frame-unrelated ones. */
9730 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9731 XVECEXP (expr, 0, 0)
9732 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9733 plus_constant (stack_pointer_rtx, -size));
9734 XVECEXP (expr, 0, 1)
9735 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9736 plus_constant (stack_pointer_rtx,
9737 PROBE_INTERVAL + dope + size));
9738 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9739 RTX_FRAME_RELATED_P (last) = 1;
9741 cfun->machine->fs.sp_offset += size;
9744 /* Make sure nothing is scheduled before we are done. */
9745 emit_insn (gen_blockage ());
9748 /* Adjust the stack pointer up to REG while probing it. */
9751 output_adjust_stack_and_probe (rtx reg)
9753 static int labelno = 0;
9754 char loop_lab[32], end_lab[32];
9757 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9758 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9760 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9762 /* Jump to END_LAB if SP == LAST_ADDR. */
9763 xops[0] = stack_pointer_rtx;
9765 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9766 fputs ("\tje\t", asm_out_file);
9767 assemble_name_raw (asm_out_file, end_lab);
9768 fputc ('\n', asm_out_file);
9770 /* SP = SP + PROBE_INTERVAL. */
9771 xops[1] = GEN_INT (PROBE_INTERVAL);
9772 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9775 xops[1] = const0_rtx;
9776 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9778 fprintf (asm_out_file, "\tjmp\t");
9779 assemble_name_raw (asm_out_file, loop_lab);
9780 fputc ('\n', asm_out_file);
9782 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9787 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9788 inclusive. These are offsets from the current stack pointer. */
9791 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9793 /* See if we have a constant small number of probes to generate. If so,
9794 that's the easy case. The run-time loop is made up of 7 insns in the
9795 generic case while the compile-time loop is made up of n insns for n #
9797 if (size <= 7 * PROBE_INTERVAL)
9801 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9802 it exceeds SIZE. If only one probe is needed, this will not
9803 generate any code. Then probe at FIRST + SIZE. */
9804 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9805 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9807 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9810 /* Otherwise, do the same as above, but in a loop. Note that we must be
9811 extra careful with variables wrapping around because we might be at
9812 the very top (or the very bottom) of the address space and we have
9813 to be able to handle this case properly; in particular, we use an
9814 equality test for the loop condition. */
9817 HOST_WIDE_INT rounded_size, last;
9818 struct scratch_reg sr;
9820 get_scratch_register_on_entry (&sr);
9823 /* Step 1: round SIZE to the previous multiple of the interval. */
9825 rounded_size = size & -PROBE_INTERVAL;
9828 /* Step 2: compute initial and final value of the loop counter. */
9830 /* TEST_OFFSET = FIRST. */
9831 emit_move_insn (sr.reg, GEN_INT (-first));
9833 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9834 last = first + rounded_size;
9839 while (TEST_ADDR != LAST_ADDR)
9841 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9845 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9846 until it is equal to ROUNDED_SIZE. */
9848 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9851 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9852 that SIZE is equal to ROUNDED_SIZE. */
9854 if (size != rounded_size)
9855 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9858 rounded_size - size));
9860 release_scratch_register_on_entry (&sr);
9863 /* Make sure nothing is scheduled before we are done. */
9864 emit_insn (gen_blockage ());
9867 /* Probe a range of stack addresses from REG to END, inclusive. These are
9868 offsets from the current stack pointer. */
9871 output_probe_stack_range (rtx reg, rtx end)
9873 static int labelno = 0;
9874 char loop_lab[32], end_lab[32];
9877 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9878 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9880 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9882 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9885 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9886 fputs ("\tje\t", asm_out_file);
9887 assemble_name_raw (asm_out_file, end_lab);
9888 fputc ('\n', asm_out_file);
9890 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9891 xops[1] = GEN_INT (PROBE_INTERVAL);
9892 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9894 /* Probe at TEST_ADDR. */
9895 xops[0] = stack_pointer_rtx;
9897 xops[2] = const0_rtx;
9898 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9900 fprintf (asm_out_file, "\tjmp\t");
9901 assemble_name_raw (asm_out_file, loop_lab);
9902 fputc ('\n', asm_out_file);
9904 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9909 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9910 to be generated in correct form. */
9912 ix86_finalize_stack_realign_flags (void)
9914 /* Check if stack realign is really needed after reload, and
9915 stores result in cfun */
9916 unsigned int incoming_stack_boundary
9917 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9918 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9919 unsigned int stack_realign = (incoming_stack_boundary
9920 < (current_function_is_leaf
9921 ? crtl->max_used_stack_slot_alignment
9922 : crtl->stack_alignment_needed));
9924 if (crtl->stack_realign_finalized)
9926 /* After stack_realign_needed is finalized, we can't no longer
9928 gcc_assert (crtl->stack_realign_needed == stack_realign);
9932 /* If the only reason for frame_pointer_needed is that we conservatively
9933 assumed stack realignment might be needed, but in the end nothing that
9934 needed the stack alignment had been spilled, clear frame_pointer_needed
9935 and say we don't need stack realignment. */
9938 && frame_pointer_needed
9939 && current_function_is_leaf
9940 && flag_omit_frame_pointer
9941 && current_function_sp_is_unchanging
9942 && !ix86_current_function_calls_tls_descriptor
9943 && !crtl->accesses_prior_frames
9944 && !cfun->calls_alloca
9945 && !crtl->calls_eh_return
9946 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
9947 && !ix86_frame_pointer_required ()
9948 && get_frame_size () == 0
9949 && ix86_nsaved_sseregs () == 0
9950 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
9952 HARD_REG_SET set_up_by_prologue, prologue_used;
9955 CLEAR_HARD_REG_SET (prologue_used);
9956 CLEAR_HARD_REG_SET (set_up_by_prologue);
9957 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
9958 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
9959 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
9960 HARD_FRAME_POINTER_REGNUM);
9964 FOR_BB_INSNS (bb, insn)
9965 if (NONDEBUG_INSN_P (insn)
9966 && requires_stack_frame_p (insn, prologue_used,
9967 set_up_by_prologue))
9969 crtl->stack_realign_needed = stack_realign;
9970 crtl->stack_realign_finalized = true;
9975 frame_pointer_needed = false;
9976 stack_realign = false;
9977 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
9978 crtl->stack_alignment_needed = incoming_stack_boundary;
9979 crtl->stack_alignment_estimated = incoming_stack_boundary;
9980 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
9981 crtl->preferred_stack_boundary = incoming_stack_boundary;
9982 df_finish_pass (true);
9983 df_scan_alloc (NULL);
9985 df_compute_regs_ever_live (true);
9989 crtl->stack_realign_needed = stack_realign;
9990 crtl->stack_realign_finalized = true;
9993 /* Expand the prologue into a bunch of separate insns. */
9996 ix86_expand_prologue (void)
9998 struct machine_function *m = cfun->machine;
10001 struct ix86_frame frame;
10002 HOST_WIDE_INT allocate;
10003 bool int_registers_saved;
10005 ix86_finalize_stack_realign_flags ();
10007 /* DRAP should not coexist with stack_realign_fp */
10008 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10010 memset (&m->fs, 0, sizeof (m->fs));
10012 /* Initialize CFA state for before the prologue. */
10013 m->fs.cfa_reg = stack_pointer_rtx;
10014 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10016 /* Track SP offset to the CFA. We continue tracking this after we've
10017 swapped the CFA register away from SP. In the case of re-alignment
10018 this is fudged; we're interested to offsets within the local frame. */
10019 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10020 m->fs.sp_valid = true;
10022 ix86_compute_frame_layout (&frame);
10024 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10026 /* We should have already generated an error for any use of
10027 ms_hook on a nested function. */
10028 gcc_checking_assert (!ix86_static_chain_on_stack);
10030 /* Check if profiling is active and we shall use profiling before
10031 prologue variant. If so sorry. */
10032 if (crtl->profile && flag_fentry != 0)
10033 sorry ("ms_hook_prologue attribute isn%'t compatible "
10034 "with -mfentry for 32-bit");
10036 /* In ix86_asm_output_function_label we emitted:
10037 8b ff movl.s %edi,%edi
10039 8b ec movl.s %esp,%ebp
10041 This matches the hookable function prologue in Win32 API
10042 functions in Microsoft Windows XP Service Pack 2 and newer.
10043 Wine uses this to enable Windows apps to hook the Win32 API
10044 functions provided by Wine.
10046 What that means is that we've already set up the frame pointer. */
10048 if (frame_pointer_needed
10049 && !(crtl->drap_reg && crtl->stack_realign_needed))
10053 /* We've decided to use the frame pointer already set up.
10054 Describe this to the unwinder by pretending that both
10055 push and mov insns happen right here.
10057 Putting the unwind info here at the end of the ms_hook
10058 is done so that we can make absolutely certain we get
10059 the required byte sequence at the start of the function,
10060 rather than relying on an assembler that can produce
10061 the exact encoding required.
10063 However it does mean (in the unpatched case) that we have
10064 a 1 insn window where the asynchronous unwind info is
10065 incorrect. However, if we placed the unwind info at
10066 its correct location we would have incorrect unwind info
10067 in the patched case. Which is probably all moot since
10068 I don't expect Wine generates dwarf2 unwind info for the
10069 system libraries that use this feature. */
10071 insn = emit_insn (gen_blockage ());
10073 push = gen_push (hard_frame_pointer_rtx);
10074 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10075 stack_pointer_rtx);
10076 RTX_FRAME_RELATED_P (push) = 1;
10077 RTX_FRAME_RELATED_P (mov) = 1;
10079 RTX_FRAME_RELATED_P (insn) = 1;
10080 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10081 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10083 /* Note that gen_push incremented m->fs.cfa_offset, even
10084 though we didn't emit the push insn here. */
10085 m->fs.cfa_reg = hard_frame_pointer_rtx;
10086 m->fs.fp_offset = m->fs.cfa_offset;
10087 m->fs.fp_valid = true;
10091 /* The frame pointer is not needed so pop %ebp again.
10092 This leaves us with a pristine state. */
10093 emit_insn (gen_pop (hard_frame_pointer_rtx));
10097 /* The first insn of a function that accepts its static chain on the
10098 stack is to push the register that would be filled in by a direct
10099 call. This insn will be skipped by the trampoline. */
10100 else if (ix86_static_chain_on_stack)
10102 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10103 emit_insn (gen_blockage ());
10105 /* We don't want to interpret this push insn as a register save,
10106 only as a stack adjustment. The real copy of the register as
10107 a save will be done later, if needed. */
10108 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10109 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10110 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10111 RTX_FRAME_RELATED_P (insn) = 1;
10114 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10115 of DRAP is needed and stack realignment is really needed after reload */
10116 if (stack_realign_drap)
10118 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10120 /* Only need to push parameter pointer reg if it is caller saved. */
10121 if (!call_used_regs[REGNO (crtl->drap_reg)])
10123 /* Push arg pointer reg */
10124 insn = emit_insn (gen_push (crtl->drap_reg));
10125 RTX_FRAME_RELATED_P (insn) = 1;
10128 /* Grab the argument pointer. */
10129 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10130 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10131 RTX_FRAME_RELATED_P (insn) = 1;
10132 m->fs.cfa_reg = crtl->drap_reg;
10133 m->fs.cfa_offset = 0;
10135 /* Align the stack. */
10136 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10138 GEN_INT (-align_bytes)));
10139 RTX_FRAME_RELATED_P (insn) = 1;
10141 /* Replicate the return address on the stack so that return
10142 address can be reached via (argp - 1) slot. This is needed
10143 to implement macro RETURN_ADDR_RTX and intrinsic function
10144 expand_builtin_return_addr etc. */
10145 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10146 t = gen_frame_mem (Pmode, t);
10147 insn = emit_insn (gen_push (t));
10148 RTX_FRAME_RELATED_P (insn) = 1;
10150 /* For the purposes of frame and register save area addressing,
10151 we've started over with a new frame. */
10152 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10153 m->fs.realigned = true;
10156 if (frame_pointer_needed && !m->fs.fp_valid)
10158 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10159 slower on all targets. Also sdb doesn't like it. */
10160 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10161 RTX_FRAME_RELATED_P (insn) = 1;
10163 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10165 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10166 RTX_FRAME_RELATED_P (insn) = 1;
10168 if (m->fs.cfa_reg == stack_pointer_rtx)
10169 m->fs.cfa_reg = hard_frame_pointer_rtx;
10170 m->fs.fp_offset = m->fs.sp_offset;
10171 m->fs.fp_valid = true;
10175 int_registers_saved = (frame.nregs == 0);
10177 if (!int_registers_saved)
10179 /* If saving registers via PUSH, do so now. */
10180 if (!frame.save_regs_using_mov)
10182 ix86_emit_save_regs ();
10183 int_registers_saved = true;
10184 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10187 /* When using red zone we may start register saving before allocating
10188 the stack frame saving one cycle of the prologue. However, avoid
10189 doing this if we have to probe the stack; at least on x86_64 the
10190 stack probe can turn into a call that clobbers a red zone location. */
10191 else if (ix86_using_red_zone ()
10192 && (! TARGET_STACK_PROBE
10193 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10195 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10196 int_registers_saved = true;
10200 if (stack_realign_fp)
10202 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10203 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10205 /* The computation of the size of the re-aligned stack frame means
10206 that we must allocate the size of the register save area before
10207 performing the actual alignment. Otherwise we cannot guarantee
10208 that there's enough storage above the realignment point. */
10209 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10210 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10211 GEN_INT (m->fs.sp_offset
10212 - frame.sse_reg_save_offset),
10215 /* Align the stack. */
10216 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10218 GEN_INT (-align_bytes)));
10220 /* For the purposes of register save area addressing, the stack
10221 pointer is no longer valid. As for the value of sp_offset,
10222 see ix86_compute_frame_layout, which we need to match in order
10223 to pass verification of stack_pointer_offset at the end. */
10224 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10225 m->fs.sp_valid = false;
10228 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10230 if (flag_stack_usage_info)
10232 /* We start to count from ARG_POINTER. */
10233 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10235 /* If it was realigned, take into account the fake frame. */
10236 if (stack_realign_drap)
10238 if (ix86_static_chain_on_stack)
10239 stack_size += UNITS_PER_WORD;
10241 if (!call_used_regs[REGNO (crtl->drap_reg)])
10242 stack_size += UNITS_PER_WORD;
10244 /* This over-estimates by 1 minimal-stack-alignment-unit but
10245 mitigates that by counting in the new return address slot. */
10246 current_function_dynamic_stack_size
10247 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10250 current_function_static_stack_size = stack_size;
10253 /* The stack has already been decremented by the instruction calling us
10254 so probe if the size is non-negative to preserve the protection area. */
10255 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10257 /* We expect the registers to be saved when probes are used. */
10258 gcc_assert (int_registers_saved);
10260 if (STACK_CHECK_MOVING_SP)
10262 ix86_adjust_stack_and_probe (allocate);
10267 HOST_WIDE_INT size = allocate;
10269 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10270 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10272 if (TARGET_STACK_PROBE)
10273 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10275 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10281 else if (!ix86_target_stack_probe ()
10282 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10284 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10285 GEN_INT (-allocate), -1,
10286 m->fs.cfa_reg == stack_pointer_rtx);
10290 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10292 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10294 bool eax_live = false;
10295 bool r10_live = false;
10298 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10299 if (!TARGET_64BIT_MS_ABI)
10300 eax_live = ix86_eax_live_at_start_p ();
10304 emit_insn (gen_push (eax));
10305 allocate -= UNITS_PER_WORD;
10309 r10 = gen_rtx_REG (Pmode, R10_REG);
10310 emit_insn (gen_push (r10));
10311 allocate -= UNITS_PER_WORD;
10314 emit_move_insn (eax, GEN_INT (allocate));
10315 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10317 /* Use the fact that AX still contains ALLOCATE. */
10318 adjust_stack_insn = (TARGET_64BIT
10319 ? gen_pro_epilogue_adjust_stack_di_sub
10320 : gen_pro_epilogue_adjust_stack_si_sub);
10322 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10323 stack_pointer_rtx, eax));
10325 /* Note that SEH directives need to continue tracking the stack
10326 pointer even after the frame pointer has been set up. */
10327 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10329 if (m->fs.cfa_reg == stack_pointer_rtx)
10330 m->fs.cfa_offset += allocate;
10332 RTX_FRAME_RELATED_P (insn) = 1;
10333 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10334 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10335 plus_constant (stack_pointer_rtx,
10338 m->fs.sp_offset += allocate;
10340 if (r10_live && eax_live)
10342 t = choose_baseaddr (m->fs.sp_offset - allocate);
10343 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10344 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10345 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10347 else if (eax_live || r10_live)
10349 t = choose_baseaddr (m->fs.sp_offset - allocate);
10350 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10353 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10355 /* If we havn't already set up the frame pointer, do so now. */
10356 if (frame_pointer_needed && !m->fs.fp_valid)
10358 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10359 GEN_INT (frame.stack_pointer_offset
10360 - frame.hard_frame_pointer_offset));
10361 insn = emit_insn (insn);
10362 RTX_FRAME_RELATED_P (insn) = 1;
10363 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10365 if (m->fs.cfa_reg == stack_pointer_rtx)
10366 m->fs.cfa_reg = hard_frame_pointer_rtx;
10367 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10368 m->fs.fp_valid = true;
10371 if (!int_registers_saved)
10372 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10373 if (frame.nsseregs)
10374 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10376 pic_reg_used = false;
10377 if (pic_offset_table_rtx
10378 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10381 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10383 if (alt_pic_reg_used != INVALID_REGNUM)
10384 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10386 pic_reg_used = true;
10393 if (ix86_cmodel == CM_LARGE_PIC)
10395 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10396 rtx label = gen_label_rtx ();
10397 emit_label (label);
10398 LABEL_PRESERVE_P (label) = 1;
10399 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10400 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10401 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10402 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10403 pic_offset_table_rtx, tmp_reg));
10406 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10410 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10411 RTX_FRAME_RELATED_P (insn) = 1;
10412 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10416 /* In the pic_reg_used case, make sure that the got load isn't deleted
10417 when mcount needs it. Blockage to avoid call movement across mcount
10418 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10420 if (crtl->profile && !flag_fentry && pic_reg_used)
10421 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10423 if (crtl->drap_reg && !crtl->stack_realign_needed)
10425 /* vDRAP is setup but after reload it turns out stack realign
10426 isn't necessary, here we will emit prologue to setup DRAP
10427 without stack realign adjustment */
10428 t = choose_baseaddr (0);
10429 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10432 /* Prevent instructions from being scheduled into register save push
10433 sequence when access to the redzone area is done through frame pointer.
10434 The offset between the frame pointer and the stack pointer is calculated
10435 relative to the value of the stack pointer at the end of the function
10436 prologue, and moving instructions that access redzone area via frame
10437 pointer inside push sequence violates this assumption. */
10438 if (frame_pointer_needed && frame.red_zone_size)
10439 emit_insn (gen_memory_blockage ());
10441 /* Emit cld instruction if stringops are used in the function. */
10442 if (TARGET_CLD && ix86_current_function_needs_cld)
10443 emit_insn (gen_cld ());
10445 /* SEH requires that the prologue end within 256 bytes of the start of
10446 the function. Prevent instruction schedules that would extend that.
10447 Further, prevent alloca modifications to the stack pointer from being
10448 combined with prologue modifications. */
10450 emit_insn (gen_prologue_use (stack_pointer_rtx));
10453 /* Emit code to restore REG using a POP insn. */
10456 ix86_emit_restore_reg_using_pop (rtx reg)
10458 struct machine_function *m = cfun->machine;
10459 rtx insn = emit_insn (gen_pop (reg));
10461 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10462 m->fs.sp_offset -= UNITS_PER_WORD;
10464 if (m->fs.cfa_reg == crtl->drap_reg
10465 && REGNO (reg) == REGNO (crtl->drap_reg))
10467 /* Previously we'd represented the CFA as an expression
10468 like *(%ebp - 8). We've just popped that value from
10469 the stack, which means we need to reset the CFA to
10470 the drap register. This will remain until we restore
10471 the stack pointer. */
10472 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10473 RTX_FRAME_RELATED_P (insn) = 1;
10475 /* This means that the DRAP register is valid for addressing too. */
10476 m->fs.drap_valid = true;
10480 if (m->fs.cfa_reg == stack_pointer_rtx)
10482 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10483 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10484 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10485 RTX_FRAME_RELATED_P (insn) = 1;
10487 m->fs.cfa_offset -= UNITS_PER_WORD;
10490 /* When the frame pointer is the CFA, and we pop it, we are
10491 swapping back to the stack pointer as the CFA. This happens
10492 for stack frames that don't allocate other data, so we assume
10493 the stack pointer is now pointing at the return address, i.e.
10494 the function entry state, which makes the offset be 1 word. */
10495 if (reg == hard_frame_pointer_rtx)
10497 m->fs.fp_valid = false;
10498 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10500 m->fs.cfa_reg = stack_pointer_rtx;
10501 m->fs.cfa_offset -= UNITS_PER_WORD;
10503 add_reg_note (insn, REG_CFA_DEF_CFA,
10504 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10505 GEN_INT (m->fs.cfa_offset)));
10506 RTX_FRAME_RELATED_P (insn) = 1;
10511 /* Emit code to restore saved registers using POP insns. */
10514 ix86_emit_restore_regs_using_pop (void)
10516 unsigned int regno;
10518 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10519 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10520 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10523 /* Emit code and notes for the LEAVE instruction. */
10526 ix86_emit_leave (void)
10528 struct machine_function *m = cfun->machine;
10529 rtx insn = emit_insn (ix86_gen_leave ());
10531 ix86_add_queued_cfa_restore_notes (insn);
10533 gcc_assert (m->fs.fp_valid);
10534 m->fs.sp_valid = true;
10535 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10536 m->fs.fp_valid = false;
10538 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10540 m->fs.cfa_reg = stack_pointer_rtx;
10541 m->fs.cfa_offset = m->fs.sp_offset;
10543 add_reg_note (insn, REG_CFA_DEF_CFA,
10544 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10545 RTX_FRAME_RELATED_P (insn) = 1;
10547 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10551 /* Emit code to restore saved registers using MOV insns.
10552 First register is restored from CFA - CFA_OFFSET. */
10554 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10555 bool maybe_eh_return)
10557 struct machine_function *m = cfun->machine;
10558 unsigned int regno;
10560 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10561 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10563 rtx reg = gen_rtx_REG (Pmode, regno);
10566 mem = choose_baseaddr (cfa_offset);
10567 mem = gen_frame_mem (Pmode, mem);
10568 insn = emit_move_insn (reg, mem);
10570 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10572 /* Previously we'd represented the CFA as an expression
10573 like *(%ebp - 8). We've just popped that value from
10574 the stack, which means we need to reset the CFA to
10575 the drap register. This will remain until we restore
10576 the stack pointer. */
10577 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10578 RTX_FRAME_RELATED_P (insn) = 1;
10580 /* This means that the DRAP register is valid for addressing. */
10581 m->fs.drap_valid = true;
10584 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10586 cfa_offset -= UNITS_PER_WORD;
10590 /* Emit code to restore saved registers using MOV insns.
10591 First register is restored from CFA - CFA_OFFSET. */
10593 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10594 bool maybe_eh_return)
10596 unsigned int regno;
10598 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10599 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10601 rtx reg = gen_rtx_REG (V4SFmode, regno);
10604 mem = choose_baseaddr (cfa_offset);
10605 mem = gen_rtx_MEM (V4SFmode, mem);
10606 set_mem_align (mem, 128);
10607 emit_move_insn (reg, mem);
10609 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10615 /* Emit vzeroupper if needed. */
10618 ix86_maybe_emit_epilogue_vzeroupper (void)
10620 if (TARGET_VZEROUPPER
10621 && !TREE_THIS_VOLATILE (cfun->decl)
10622 && !cfun->machine->caller_return_avx256_p)
10623 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10626 /* Restore function stack, frame, and registers. */
10629 ix86_expand_epilogue (int style)
10631 struct machine_function *m = cfun->machine;
10632 struct machine_frame_state frame_state_save = m->fs;
10633 struct ix86_frame frame;
10634 bool restore_regs_via_mov;
10637 ix86_finalize_stack_realign_flags ();
10638 ix86_compute_frame_layout (&frame);
10640 m->fs.sp_valid = (!frame_pointer_needed
10641 || (current_function_sp_is_unchanging
10642 && !stack_realign_fp));
10643 gcc_assert (!m->fs.sp_valid
10644 || m->fs.sp_offset == frame.stack_pointer_offset);
10646 /* The FP must be valid if the frame pointer is present. */
10647 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10648 gcc_assert (!m->fs.fp_valid
10649 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10651 /* We must have *some* valid pointer to the stack frame. */
10652 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10654 /* The DRAP is never valid at this point. */
10655 gcc_assert (!m->fs.drap_valid);
10657 /* See the comment about red zone and frame
10658 pointer usage in ix86_expand_prologue. */
10659 if (frame_pointer_needed && frame.red_zone_size)
10660 emit_insn (gen_memory_blockage ());
10662 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10663 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10665 /* Determine the CFA offset of the end of the red-zone. */
10666 m->fs.red_zone_offset = 0;
10667 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10669 /* The red-zone begins below the return address. */
10670 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10672 /* When the register save area is in the aligned portion of
10673 the stack, determine the maximum runtime displacement that
10674 matches up with the aligned frame. */
10675 if (stack_realign_drap)
10676 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10680 /* Special care must be taken for the normal return case of a function
10681 using eh_return: the eax and edx registers are marked as saved, but
10682 not restored along this path. Adjust the save location to match. */
10683 if (crtl->calls_eh_return && style != 2)
10684 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10686 /* EH_RETURN requires the use of moves to function properly. */
10687 if (crtl->calls_eh_return)
10688 restore_regs_via_mov = true;
10689 /* SEH requires the use of pops to identify the epilogue. */
10690 else if (TARGET_SEH)
10691 restore_regs_via_mov = false;
10692 /* If we're only restoring one register and sp is not valid then
10693 using a move instruction to restore the register since it's
10694 less work than reloading sp and popping the register. */
10695 else if (!m->fs.sp_valid && frame.nregs <= 1)
10696 restore_regs_via_mov = true;
10697 else if (TARGET_EPILOGUE_USING_MOVE
10698 && cfun->machine->use_fast_prologue_epilogue
10699 && (frame.nregs > 1
10700 || m->fs.sp_offset != frame.reg_save_offset))
10701 restore_regs_via_mov = true;
10702 else if (frame_pointer_needed
10704 && m->fs.sp_offset != frame.reg_save_offset)
10705 restore_regs_via_mov = true;
10706 else if (frame_pointer_needed
10707 && TARGET_USE_LEAVE
10708 && cfun->machine->use_fast_prologue_epilogue
10709 && frame.nregs == 1)
10710 restore_regs_via_mov = true;
10712 restore_regs_via_mov = false;
10714 if (restore_regs_via_mov || frame.nsseregs)
10716 /* Ensure that the entire register save area is addressable via
10717 the stack pointer, if we will restore via sp. */
10719 && m->fs.sp_offset > 0x7fffffff
10720 && !(m->fs.fp_valid || m->fs.drap_valid)
10721 && (frame.nsseregs + frame.nregs) != 0)
10723 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10724 GEN_INT (m->fs.sp_offset
10725 - frame.sse_reg_save_offset),
10727 m->fs.cfa_reg == stack_pointer_rtx);
10731 /* If there are any SSE registers to restore, then we have to do it
10732 via moves, since there's obviously no pop for SSE regs. */
10733 if (frame.nsseregs)
10734 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10737 if (restore_regs_via_mov)
10742 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10744 /* eh_return epilogues need %ecx added to the stack pointer. */
10747 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10749 /* Stack align doesn't work with eh_return. */
10750 gcc_assert (!stack_realign_drap);
10751 /* Neither does regparm nested functions. */
10752 gcc_assert (!ix86_static_chain_on_stack);
10754 if (frame_pointer_needed)
10756 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10757 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10758 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10760 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10761 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10763 /* Note that we use SA as a temporary CFA, as the return
10764 address is at the proper place relative to it. We
10765 pretend this happens at the FP restore insn because
10766 prior to this insn the FP would be stored at the wrong
10767 offset relative to SA, and after this insn we have no
10768 other reasonable register to use for the CFA. We don't
10769 bother resetting the CFA to the SP for the duration of
10770 the return insn. */
10771 add_reg_note (insn, REG_CFA_DEF_CFA,
10772 plus_constant (sa, UNITS_PER_WORD));
10773 ix86_add_queued_cfa_restore_notes (insn);
10774 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10775 RTX_FRAME_RELATED_P (insn) = 1;
10777 m->fs.cfa_reg = sa;
10778 m->fs.cfa_offset = UNITS_PER_WORD;
10779 m->fs.fp_valid = false;
10781 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10782 const0_rtx, style, false);
10786 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10787 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10788 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10789 ix86_add_queued_cfa_restore_notes (insn);
10791 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10792 if (m->fs.cfa_offset != UNITS_PER_WORD)
10794 m->fs.cfa_offset = UNITS_PER_WORD;
10795 add_reg_note (insn, REG_CFA_DEF_CFA,
10796 plus_constant (stack_pointer_rtx,
10798 RTX_FRAME_RELATED_P (insn) = 1;
10801 m->fs.sp_offset = UNITS_PER_WORD;
10802 m->fs.sp_valid = true;
10807 /* SEH requires that the function end with (1) a stack adjustment
10808 if necessary, (2) a sequence of pops, and (3) a return or
10809 jump instruction. Prevent insns from the function body from
10810 being scheduled into this sequence. */
10813 /* Prevent a catch region from being adjacent to the standard
10814 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10815 several other flags that would be interesting to test are
10817 if (flag_non_call_exceptions)
10818 emit_insn (gen_nops (const1_rtx));
10820 emit_insn (gen_blockage ());
10823 /* First step is to deallocate the stack frame so that we can
10824 pop the registers. */
10825 if (!m->fs.sp_valid)
10827 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10828 GEN_INT (m->fs.fp_offset
10829 - frame.reg_save_offset),
10832 else if (m->fs.sp_offset != frame.reg_save_offset)
10834 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10835 GEN_INT (m->fs.sp_offset
10836 - frame.reg_save_offset),
10838 m->fs.cfa_reg == stack_pointer_rtx);
10841 ix86_emit_restore_regs_using_pop ();
10844 /* If we used a stack pointer and haven't already got rid of it,
10846 if (m->fs.fp_valid)
10848 /* If the stack pointer is valid and pointing at the frame
10849 pointer store address, then we only need a pop. */
10850 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10851 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10852 /* Leave results in shorter dependency chains on CPUs that are
10853 able to grok it fast. */
10854 else if (TARGET_USE_LEAVE
10855 || optimize_function_for_size_p (cfun)
10856 || !cfun->machine->use_fast_prologue_epilogue)
10857 ix86_emit_leave ();
10860 pro_epilogue_adjust_stack (stack_pointer_rtx,
10861 hard_frame_pointer_rtx,
10862 const0_rtx, style, !using_drap);
10863 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10869 int param_ptr_offset = UNITS_PER_WORD;
10872 gcc_assert (stack_realign_drap);
10874 if (ix86_static_chain_on_stack)
10875 param_ptr_offset += UNITS_PER_WORD;
10876 if (!call_used_regs[REGNO (crtl->drap_reg)])
10877 param_ptr_offset += UNITS_PER_WORD;
10879 insn = emit_insn (gen_rtx_SET
10880 (VOIDmode, stack_pointer_rtx,
10881 gen_rtx_PLUS (Pmode,
10883 GEN_INT (-param_ptr_offset))));
10884 m->fs.cfa_reg = stack_pointer_rtx;
10885 m->fs.cfa_offset = param_ptr_offset;
10886 m->fs.sp_offset = param_ptr_offset;
10887 m->fs.realigned = false;
10889 add_reg_note (insn, REG_CFA_DEF_CFA,
10890 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10891 GEN_INT (param_ptr_offset)));
10892 RTX_FRAME_RELATED_P (insn) = 1;
10894 if (!call_used_regs[REGNO (crtl->drap_reg)])
10895 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10898 /* At this point the stack pointer must be valid, and we must have
10899 restored all of the registers. We may not have deallocated the
10900 entire stack frame. We've delayed this until now because it may
10901 be possible to merge the local stack deallocation with the
10902 deallocation forced by ix86_static_chain_on_stack. */
10903 gcc_assert (m->fs.sp_valid);
10904 gcc_assert (!m->fs.fp_valid);
10905 gcc_assert (!m->fs.realigned);
10906 if (m->fs.sp_offset != UNITS_PER_WORD)
10908 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10909 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10913 ix86_add_queued_cfa_restore_notes (get_last_insn ());
10915 /* Sibcall epilogues don't want a return instruction. */
10918 m->fs = frame_state_save;
10922 /* Emit vzeroupper if needed. */
10923 ix86_maybe_emit_epilogue_vzeroupper ();
10925 if (crtl->args.pops_args && crtl->args.size)
10927 rtx popc = GEN_INT (crtl->args.pops_args);
10929 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10930 address, do explicit add, and jump indirectly to the caller. */
10932 if (crtl->args.pops_args >= 65536)
10934 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10937 /* There is no "pascal" calling convention in any 64bit ABI. */
10938 gcc_assert (!TARGET_64BIT);
10940 insn = emit_insn (gen_pop (ecx));
10941 m->fs.cfa_offset -= UNITS_PER_WORD;
10942 m->fs.sp_offset -= UNITS_PER_WORD;
10944 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10945 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10946 add_reg_note (insn, REG_CFA_REGISTER,
10947 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10948 RTX_FRAME_RELATED_P (insn) = 1;
10950 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10952 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
10955 emit_jump_insn (gen_simple_return_pop_internal (popc));
10958 emit_jump_insn (gen_simple_return_internal ());
10960 /* Restore the state back to the state from the prologue,
10961 so that it's correct for the next epilogue. */
10962 m->fs = frame_state_save;
10965 /* Reset from the function's potential modifications. */
10968 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10969 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10971 if (pic_offset_table_rtx)
10972 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10974 /* Mach-O doesn't support labels at the end of objects, so if
10975 it looks like we might want one, insert a NOP. */
10977 rtx insn = get_last_insn ();
10978 rtx deleted_debug_label = NULL_RTX;
10981 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10983 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
10984 notes only, instead set their CODE_LABEL_NUMBER to -1,
10985 otherwise there would be code generation differences
10986 in between -g and -g0. */
10987 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
10988 deleted_debug_label = insn;
10989 insn = PREV_INSN (insn);
10994 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10995 fputs ("\tnop\n", file);
10996 else if (deleted_debug_label)
10997 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
10998 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
10999 CODE_LABEL_NUMBER (insn) = -1;
11005 /* Return a scratch register to use in the split stack prologue. The
11006 split stack prologue is used for -fsplit-stack. It is the first
11007 instructions in the function, even before the regular prologue.
11008 The scratch register can be any caller-saved register which is not
11009 used for parameters or for the static chain. */
11011 static unsigned int
11012 split_stack_prologue_scratch_regno (void)
11021 is_fastcall = (lookup_attribute ("fastcall",
11022 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11024 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11028 if (DECL_STATIC_CHAIN (cfun->decl))
11030 sorry ("-fsplit-stack does not support fastcall with "
11031 "nested function");
11032 return INVALID_REGNUM;
11036 else if (regparm < 3)
11038 if (!DECL_STATIC_CHAIN (cfun->decl))
11044 sorry ("-fsplit-stack does not support 2 register "
11045 " parameters for a nested function");
11046 return INVALID_REGNUM;
11053 /* FIXME: We could make this work by pushing a register
11054 around the addition and comparison. */
11055 sorry ("-fsplit-stack does not support 3 register parameters");
11056 return INVALID_REGNUM;
11061 /* A SYMBOL_REF for the function which allocates new stackspace for
11064 static GTY(()) rtx split_stack_fn;
11066 /* A SYMBOL_REF for the more stack function when using the large
11069 static GTY(()) rtx split_stack_fn_large;
11071 /* Handle -fsplit-stack. These are the first instructions in the
11072 function, even before the regular prologue. */
11075 ix86_expand_split_stack_prologue (void)
11077 struct ix86_frame frame;
11078 HOST_WIDE_INT allocate;
11079 unsigned HOST_WIDE_INT args_size;
11080 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11081 rtx scratch_reg = NULL_RTX;
11082 rtx varargs_label = NULL_RTX;
11085 gcc_assert (flag_split_stack && reload_completed);
11087 ix86_finalize_stack_realign_flags ();
11088 ix86_compute_frame_layout (&frame);
11089 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11091 /* This is the label we will branch to if we have enough stack
11092 space. We expect the basic block reordering pass to reverse this
11093 branch if optimizing, so that we branch in the unlikely case. */
11094 label = gen_label_rtx ();
11096 /* We need to compare the stack pointer minus the frame size with
11097 the stack boundary in the TCB. The stack boundary always gives
11098 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11099 can compare directly. Otherwise we need to do an addition. */
11101 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11102 UNSPEC_STACK_CHECK);
11103 limit = gen_rtx_CONST (Pmode, limit);
11104 limit = gen_rtx_MEM (Pmode, limit);
11105 if (allocate < SPLIT_STACK_AVAILABLE)
11106 current = stack_pointer_rtx;
11109 unsigned int scratch_regno;
11112 /* We need a scratch register to hold the stack pointer minus
11113 the required frame size. Since this is the very start of the
11114 function, the scratch register can be any caller-saved
11115 register which is not used for parameters. */
11116 offset = GEN_INT (- allocate);
11117 scratch_regno = split_stack_prologue_scratch_regno ();
11118 if (scratch_regno == INVALID_REGNUM)
11120 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11121 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11123 /* We don't use ix86_gen_add3 in this case because it will
11124 want to split to lea, but when not optimizing the insn
11125 will not be split after this point. */
11126 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11127 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11132 emit_move_insn (scratch_reg, offset);
11133 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11134 stack_pointer_rtx));
11136 current = scratch_reg;
11139 ix86_expand_branch (GEU, current, limit, label);
11140 jump_insn = get_last_insn ();
11141 JUMP_LABEL (jump_insn) = label;
11143 /* Mark the jump as very likely to be taken. */
11144 add_reg_note (jump_insn, REG_BR_PROB,
11145 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11147 if (split_stack_fn == NULL_RTX)
11148 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11149 fn = split_stack_fn;
11151 /* Get more stack space. We pass in the desired stack space and the
11152 size of the arguments to copy to the new stack. In 32-bit mode
11153 we push the parameters; __morestack will return on a new stack
11154 anyhow. In 64-bit mode we pass the parameters in r10 and
11156 allocate_rtx = GEN_INT (allocate);
11157 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11158 call_fusage = NULL_RTX;
11163 reg10 = gen_rtx_REG (Pmode, R10_REG);
11164 reg11 = gen_rtx_REG (Pmode, R11_REG);
11166 /* If this function uses a static chain, it will be in %r10.
11167 Preserve it across the call to __morestack. */
11168 if (DECL_STATIC_CHAIN (cfun->decl))
11172 rax = gen_rtx_REG (Pmode, AX_REG);
11173 emit_move_insn (rax, reg10);
11174 use_reg (&call_fusage, rax);
11177 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11179 HOST_WIDE_INT argval;
11181 /* When using the large model we need to load the address
11182 into a register, and we've run out of registers. So we
11183 switch to a different calling convention, and we call a
11184 different function: __morestack_large. We pass the
11185 argument size in the upper 32 bits of r10 and pass the
11186 frame size in the lower 32 bits. */
11187 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11188 gcc_assert ((args_size & 0xffffffff) == args_size);
11190 if (split_stack_fn_large == NULL_RTX)
11191 split_stack_fn_large =
11192 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11194 if (ix86_cmodel == CM_LARGE_PIC)
11198 label = gen_label_rtx ();
11199 emit_label (label);
11200 LABEL_PRESERVE_P (label) = 1;
11201 emit_insn (gen_set_rip_rex64 (reg10, label));
11202 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11203 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11204 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11206 x = gen_rtx_CONST (Pmode, x);
11207 emit_move_insn (reg11, x);
11208 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11209 x = gen_const_mem (Pmode, x);
11210 emit_move_insn (reg11, x);
11213 emit_move_insn (reg11, split_stack_fn_large);
11217 argval = ((args_size << 16) << 16) + allocate;
11218 emit_move_insn (reg10, GEN_INT (argval));
11222 emit_move_insn (reg10, allocate_rtx);
11223 emit_move_insn (reg11, GEN_INT (args_size));
11224 use_reg (&call_fusage, reg11);
11227 use_reg (&call_fusage, reg10);
11231 emit_insn (gen_push (GEN_INT (args_size)));
11232 emit_insn (gen_push (allocate_rtx));
11234 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11235 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11237 add_function_usage_to (call_insn, call_fusage);
11239 /* In order to make call/return prediction work right, we now need
11240 to execute a return instruction. See
11241 libgcc/config/i386/morestack.S for the details on how this works.
11243 For flow purposes gcc must not see this as a return
11244 instruction--we need control flow to continue at the subsequent
11245 label. Therefore, we use an unspec. */
11246 gcc_assert (crtl->args.pops_args < 65536);
11247 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11249 /* If we are in 64-bit mode and this function uses a static chain,
11250 we saved %r10 in %rax before calling _morestack. */
11251 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11252 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11253 gen_rtx_REG (Pmode, AX_REG));
11255 /* If this function calls va_start, we need to store a pointer to
11256 the arguments on the old stack, because they may not have been
11257 all copied to the new stack. At this point the old stack can be
11258 found at the frame pointer value used by __morestack, because
11259 __morestack has set that up before calling back to us. Here we
11260 store that pointer in a scratch register, and in
11261 ix86_expand_prologue we store the scratch register in a stack
11263 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11265 unsigned int scratch_regno;
11269 scratch_regno = split_stack_prologue_scratch_regno ();
11270 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11271 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11275 return address within this function
11276 return address of caller of this function
11278 So we add three words to get to the stack arguments.
11282 return address within this function
11283 first argument to __morestack
11284 second argument to __morestack
11285 return address of caller of this function
11287 So we add five words to get to the stack arguments.
11289 words = TARGET_64BIT ? 3 : 5;
11290 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11291 gen_rtx_PLUS (Pmode, frame_reg,
11292 GEN_INT (words * UNITS_PER_WORD))));
11294 varargs_label = gen_label_rtx ();
11295 emit_jump_insn (gen_jump (varargs_label));
11296 JUMP_LABEL (get_last_insn ()) = varargs_label;
11301 emit_label (label);
11302 LABEL_NUSES (label) = 1;
11304 /* If this function calls va_start, we now have to set the scratch
11305 register for the case where we do not call __morestack. In this
11306 case we need to set it based on the stack pointer. */
11307 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11309 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11310 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11311 GEN_INT (UNITS_PER_WORD))));
11313 emit_label (varargs_label);
11314 LABEL_NUSES (varargs_label) = 1;
11318 /* We may have to tell the dataflow pass that the split stack prologue
11319 is initializing a scratch register. */
11322 ix86_live_on_entry (bitmap regs)
11324 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11326 gcc_assert (flag_split_stack);
11327 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11331 /* Determine if op is suitable SUBREG RTX for address. */
11334 ix86_address_subreg_operand (rtx op)
11336 enum machine_mode mode;
11341 mode = GET_MODE (op);
11343 if (GET_MODE_CLASS (mode) != MODE_INT)
11346 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11347 failures when the register is one word out of a two word structure. */
11348 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11351 /* Allow only SUBREGs of non-eliminable hard registers. */
11352 return register_no_elim_operand (op, mode);
11355 /* Extract the parts of an RTL expression that is a valid memory address
11356 for an instruction. Return 0 if the structure of the address is
11357 grossly off. Return -1 if the address contains ASHIFT, so it is not
11358 strictly valid, but still used for computing length of lea instruction. */
11361 ix86_decompose_address (rtx addr, struct ix86_address *out)
11363 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11364 rtx base_reg, index_reg;
11365 HOST_WIDE_INT scale = 1;
11366 rtx scale_rtx = NULL_RTX;
11369 enum ix86_address_seg seg = SEG_DEFAULT;
11371 /* Allow zero-extended SImode addresses,
11372 they will be emitted with addr32 prefix. */
11373 if (TARGET_64BIT && GET_MODE (addr) == DImode)
11375 if (GET_CODE (addr) == ZERO_EXTEND
11376 && GET_MODE (XEXP (addr, 0)) == SImode)
11377 addr = XEXP (addr, 0);
11378 else if (GET_CODE (addr) == AND
11379 && const_32bit_mask (XEXP (addr, 1), DImode))
11381 addr = XEXP (addr, 0);
11383 /* Strip subreg. */
11384 if (GET_CODE (addr) == SUBREG
11385 && GET_MODE (SUBREG_REG (addr)) == SImode)
11386 addr = SUBREG_REG (addr);
11392 else if (GET_CODE (addr) == SUBREG)
11394 if (ix86_address_subreg_operand (SUBREG_REG (addr)))
11399 else if (GET_CODE (addr) == PLUS)
11401 rtx addends[4], op;
11409 addends[n++] = XEXP (op, 1);
11412 while (GET_CODE (op) == PLUS);
11417 for (i = n; i >= 0; --i)
11420 switch (GET_CODE (op))
11425 index = XEXP (op, 0);
11426 scale_rtx = XEXP (op, 1);
11432 index = XEXP (op, 0);
11433 tmp = XEXP (op, 1);
11434 if (!CONST_INT_P (tmp))
11436 scale = INTVAL (tmp);
11437 if ((unsigned HOST_WIDE_INT) scale > 3)
11439 scale = 1 << scale;
11443 if (XINT (op, 1) == UNSPEC_TP
11444 && TARGET_TLS_DIRECT_SEG_REFS
11445 && seg == SEG_DEFAULT)
11446 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11452 if (!ix86_address_subreg_operand (SUBREG_REG (op)))
11479 else if (GET_CODE (addr) == MULT)
11481 index = XEXP (addr, 0); /* index*scale */
11482 scale_rtx = XEXP (addr, 1);
11484 else if (GET_CODE (addr) == ASHIFT)
11486 /* We're called for lea too, which implements ashift on occasion. */
11487 index = XEXP (addr, 0);
11488 tmp = XEXP (addr, 1);
11489 if (!CONST_INT_P (tmp))
11491 scale = INTVAL (tmp);
11492 if ((unsigned HOST_WIDE_INT) scale > 3)
11494 scale = 1 << scale;
11498 disp = addr; /* displacement */
11504 else if (GET_CODE (index) == SUBREG
11505 && ix86_address_subreg_operand (SUBREG_REG (index)))
11511 /* Extract the integral value of scale. */
11514 if (!CONST_INT_P (scale_rtx))
11516 scale = INTVAL (scale_rtx);
11519 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11520 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11522 /* Avoid useless 0 displacement. */
11523 if (disp == const0_rtx && (base || index))
11526 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11527 if (base_reg && index_reg && scale == 1
11528 && (index_reg == arg_pointer_rtx
11529 || index_reg == frame_pointer_rtx
11530 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11533 tmp = base, base = index, index = tmp;
11534 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11537 /* Special case: %ebp cannot be encoded as a base without a displacement.
11541 && (base_reg == hard_frame_pointer_rtx
11542 || base_reg == frame_pointer_rtx
11543 || base_reg == arg_pointer_rtx
11544 || (REG_P (base_reg)
11545 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11546 || REGNO (base_reg) == R13_REG))))
11549 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11550 Avoid this by transforming to [%esi+0].
11551 Reload calls address legitimization without cfun defined, so we need
11552 to test cfun for being non-NULL. */
11553 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11554 && base_reg && !index_reg && !disp
11555 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11558 /* Special case: encode reg+reg instead of reg*2. */
11559 if (!base && index && scale == 2)
11560 base = index, base_reg = index_reg, scale = 1;
11562 /* Special case: scaling cannot be encoded without base or displacement. */
11563 if (!base && !disp && index && scale != 1)
11567 out->index = index;
11569 out->scale = scale;
11575 /* Return cost of the memory address x.
11576 For i386, it is better to use a complex address than let gcc copy
11577 the address into a reg and make a new pseudo. But not if the address
11578 requires to two regs - that would mean more pseudos with longer
11581 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11583 struct ix86_address parts;
11585 int ok = ix86_decompose_address (x, &parts);
11589 if (parts.base && GET_CODE (parts.base) == SUBREG)
11590 parts.base = SUBREG_REG (parts.base);
11591 if (parts.index && GET_CODE (parts.index) == SUBREG)
11592 parts.index = SUBREG_REG (parts.index);
11594 /* Attempt to minimize number of registers in the address. */
11596 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11598 && (!REG_P (parts.index)
11599 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11603 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11605 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11606 && parts.base != parts.index)
11609 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11610 since it's predecode logic can't detect the length of instructions
11611 and it degenerates to vector decoded. Increase cost of such
11612 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11613 to split such addresses or even refuse such addresses at all.
11615 Following addressing modes are affected:
11620 The first and last case may be avoidable by explicitly coding the zero in
11621 memory address, but I don't have AMD-K6 machine handy to check this
11625 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11626 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11627 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11633 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11634 this is used for to form addresses to local data when -fPIC is in
11638 darwin_local_data_pic (rtx disp)
11640 return (GET_CODE (disp) == UNSPEC
11641 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11644 /* Determine if a given RTX is a valid constant. We already know this
11645 satisfies CONSTANT_P. */
11648 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11650 switch (GET_CODE (x))
11655 if (GET_CODE (x) == PLUS)
11657 if (!CONST_INT_P (XEXP (x, 1)))
11662 if (TARGET_MACHO && darwin_local_data_pic (x))
11665 /* Only some unspecs are valid as "constants". */
11666 if (GET_CODE (x) == UNSPEC)
11667 switch (XINT (x, 1))
11670 case UNSPEC_GOTOFF:
11671 case UNSPEC_PLTOFF:
11672 return TARGET_64BIT;
11674 case UNSPEC_NTPOFF:
11675 x = XVECEXP (x, 0, 0);
11676 return (GET_CODE (x) == SYMBOL_REF
11677 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11678 case UNSPEC_DTPOFF:
11679 x = XVECEXP (x, 0, 0);
11680 return (GET_CODE (x) == SYMBOL_REF
11681 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11686 /* We must have drilled down to a symbol. */
11687 if (GET_CODE (x) == LABEL_REF)
11689 if (GET_CODE (x) != SYMBOL_REF)
11694 /* TLS symbols are never valid. */
11695 if (SYMBOL_REF_TLS_MODEL (x))
11698 /* DLLIMPORT symbols are never valid. */
11699 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11700 && SYMBOL_REF_DLLIMPORT_P (x))
11704 /* mdynamic-no-pic */
11705 if (MACHO_DYNAMIC_NO_PIC_P)
11706 return machopic_symbol_defined_p (x);
11711 if (GET_MODE (x) == TImode
11712 && x != CONST0_RTX (TImode)
11718 if (!standard_sse_constant_p (x))
11725 /* Otherwise we handle everything else in the move patterns. */
11729 /* Determine if it's legal to put X into the constant pool. This
11730 is not possible for the address of thread-local symbols, which
11731 is checked above. */
11734 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11736 /* We can always put integral constants and vectors in memory. */
11737 switch (GET_CODE (x))
11747 return !ix86_legitimate_constant_p (mode, x);
11751 /* Nonzero if the constant value X is a legitimate general operand
11752 when generating PIC code. It is given that flag_pic is on and
11753 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11756 legitimate_pic_operand_p (rtx x)
11760 switch (GET_CODE (x))
11763 inner = XEXP (x, 0);
11764 if (GET_CODE (inner) == PLUS
11765 && CONST_INT_P (XEXP (inner, 1)))
11766 inner = XEXP (inner, 0);
11768 /* Only some unspecs are valid as "constants". */
11769 if (GET_CODE (inner) == UNSPEC)
11770 switch (XINT (inner, 1))
11773 case UNSPEC_GOTOFF:
11774 case UNSPEC_PLTOFF:
11775 return TARGET_64BIT;
11777 x = XVECEXP (inner, 0, 0);
11778 return (GET_CODE (x) == SYMBOL_REF
11779 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11780 case UNSPEC_MACHOPIC_OFFSET:
11781 return legitimate_pic_address_disp_p (x);
11789 return legitimate_pic_address_disp_p (x);
11796 /* Determine if a given CONST RTX is a valid memory displacement
11800 legitimate_pic_address_disp_p (rtx disp)
11804 /* In 64bit mode we can allow direct addresses of symbols and labels
11805 when they are not dynamic symbols. */
11808 rtx op0 = disp, op1;
11810 switch (GET_CODE (disp))
11816 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11818 op0 = XEXP (XEXP (disp, 0), 0);
11819 op1 = XEXP (XEXP (disp, 0), 1);
11820 if (!CONST_INT_P (op1)
11821 || INTVAL (op1) >= 16*1024*1024
11822 || INTVAL (op1) < -16*1024*1024)
11824 if (GET_CODE (op0) == LABEL_REF)
11826 if (GET_CODE (op0) == CONST
11827 && GET_CODE (XEXP (op0, 0)) == UNSPEC
11828 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
11830 if (GET_CODE (op0) == UNSPEC
11831 && XINT (op0, 1) == UNSPEC_PCREL)
11833 if (GET_CODE (op0) != SYMBOL_REF)
11838 /* TLS references should always be enclosed in UNSPEC. */
11839 if (SYMBOL_REF_TLS_MODEL (op0))
11841 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11842 && ix86_cmodel != CM_LARGE_PIC)
11850 if (GET_CODE (disp) != CONST)
11852 disp = XEXP (disp, 0);
11856 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11857 of GOT tables. We should not need these anyway. */
11858 if (GET_CODE (disp) != UNSPEC
11859 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11860 && XINT (disp, 1) != UNSPEC_GOTOFF
11861 && XINT (disp, 1) != UNSPEC_PCREL
11862 && XINT (disp, 1) != UNSPEC_PLTOFF))
11865 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11866 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11872 if (GET_CODE (disp) == PLUS)
11874 if (!CONST_INT_P (XEXP (disp, 1)))
11876 disp = XEXP (disp, 0);
11880 if (TARGET_MACHO && darwin_local_data_pic (disp))
11883 if (GET_CODE (disp) != UNSPEC)
11886 switch (XINT (disp, 1))
11891 /* We need to check for both symbols and labels because VxWorks loads
11892 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11894 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11895 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11896 case UNSPEC_GOTOFF:
11897 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11898 While ABI specify also 32bit relocation but we don't produce it in
11899 small PIC model at all. */
11900 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11901 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11903 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11905 case UNSPEC_GOTTPOFF:
11906 case UNSPEC_GOTNTPOFF:
11907 case UNSPEC_INDNTPOFF:
11910 disp = XVECEXP (disp, 0, 0);
11911 return (GET_CODE (disp) == SYMBOL_REF
11912 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11913 case UNSPEC_NTPOFF:
11914 disp = XVECEXP (disp, 0, 0);
11915 return (GET_CODE (disp) == SYMBOL_REF
11916 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11917 case UNSPEC_DTPOFF:
11918 disp = XVECEXP (disp, 0, 0);
11919 return (GET_CODE (disp) == SYMBOL_REF
11920 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11926 /* Recognizes RTL expressions that are valid memory addresses for an
11927 instruction. The MODE argument is the machine mode for the MEM
11928 expression that wants to use this address.
11930 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11931 convert common non-canonical forms to canonical form so that they will
11935 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11936 rtx addr, bool strict)
11938 struct ix86_address parts;
11939 rtx base, index, disp;
11940 HOST_WIDE_INT scale;
11942 /* Since constant address in x32 is signed extended to 64bit,
11943 we have to prevent addresses from 0x80000000 to 0xffffffff. */
11945 && CONST_INT_P (addr)
11946 && INTVAL (addr) < 0)
11949 if (ix86_decompose_address (addr, &parts) <= 0)
11950 /* Decomposition failed. */
11954 index = parts.index;
11956 scale = parts.scale;
11958 /* Validate base register. */
11965 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
11966 reg = SUBREG_REG (base);
11968 /* Base is not a register. */
11971 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
11974 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11975 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11976 /* Base is not valid. */
11980 /* Validate index register. */
11987 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
11988 reg = SUBREG_REG (index);
11990 /* Index is not a register. */
11993 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
11996 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11997 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11998 /* Index is not valid. */
12002 /* Index and base should have the same mode. */
12004 && GET_MODE (base) != GET_MODE (index))
12007 /* Validate scale factor. */
12011 /* Scale without index. */
12014 if (scale != 2 && scale != 4 && scale != 8)
12015 /* Scale is not a valid multiplier. */
12019 /* Validate displacement. */
12022 if (GET_CODE (disp) == CONST
12023 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12024 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12025 switch (XINT (XEXP (disp, 0), 1))
12027 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12028 used. While ABI specify also 32bit relocations, we don't produce
12029 them at all and use IP relative instead. */
12031 case UNSPEC_GOTOFF:
12032 gcc_assert (flag_pic);
12034 goto is_legitimate_pic;
12036 /* 64bit address unspec. */
12039 case UNSPEC_GOTPCREL:
12041 gcc_assert (flag_pic);
12042 goto is_legitimate_pic;
12044 case UNSPEC_GOTTPOFF:
12045 case UNSPEC_GOTNTPOFF:
12046 case UNSPEC_INDNTPOFF:
12047 case UNSPEC_NTPOFF:
12048 case UNSPEC_DTPOFF:
12051 case UNSPEC_STACK_CHECK:
12052 gcc_assert (flag_split_stack);
12056 /* Invalid address unspec. */
12060 else if (SYMBOLIC_CONST (disp)
12064 && MACHOPIC_INDIRECT
12065 && !machopic_operand_p (disp)
12071 if (TARGET_64BIT && (index || base))
12073 /* foo@dtpoff(%rX) is ok. */
12074 if (GET_CODE (disp) != CONST
12075 || GET_CODE (XEXP (disp, 0)) != PLUS
12076 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12077 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12078 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12079 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12080 /* Non-constant pic memory reference. */
12083 else if ((!TARGET_MACHO || flag_pic)
12084 && ! legitimate_pic_address_disp_p (disp))
12085 /* Displacement is an invalid pic construct. */
12088 else if (MACHO_DYNAMIC_NO_PIC_P
12089 && !ix86_legitimate_constant_p (Pmode, disp))
12090 /* displacment must be referenced via non_lazy_pointer */
12094 /* This code used to verify that a symbolic pic displacement
12095 includes the pic_offset_table_rtx register.
12097 While this is good idea, unfortunately these constructs may
12098 be created by "adds using lea" optimization for incorrect
12107 This code is nonsensical, but results in addressing
12108 GOT table with pic_offset_table_rtx base. We can't
12109 just refuse it easily, since it gets matched by
12110 "addsi3" pattern, that later gets split to lea in the
12111 case output register differs from input. While this
12112 can be handled by separate addsi pattern for this case
12113 that never results in lea, this seems to be easier and
12114 correct fix for crash to disable this test. */
12116 else if (GET_CODE (disp) != LABEL_REF
12117 && !CONST_INT_P (disp)
12118 && (GET_CODE (disp) != CONST
12119 || !ix86_legitimate_constant_p (Pmode, disp))
12120 && (GET_CODE (disp) != SYMBOL_REF
12121 || !ix86_legitimate_constant_p (Pmode, disp)))
12122 /* Displacement is not constant. */
12124 else if (TARGET_64BIT
12125 && !x86_64_immediate_operand (disp, VOIDmode))
12126 /* Displacement is out of range. */
12130 /* Everything looks valid. */
12134 /* Determine if a given RTX is a valid constant address. */
12137 constant_address_p (rtx x)
12139 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12142 /* Return a unique alias set for the GOT. */
12144 static alias_set_type
12145 ix86_GOT_alias_set (void)
12147 static alias_set_type set = -1;
12149 set = new_alias_set ();
12153 /* Return a legitimate reference for ORIG (an address) using the
12154 register REG. If REG is 0, a new pseudo is generated.
12156 There are two types of references that must be handled:
12158 1. Global data references must load the address from the GOT, via
12159 the PIC reg. An insn is emitted to do this load, and the reg is
12162 2. Static data references, constant pool addresses, and code labels
12163 compute the address as an offset from the GOT, whose base is in
12164 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12165 differentiate them from global data objects. The returned
12166 address is the PIC reg + an unspec constant.
12168 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12169 reg also appears in the address. */
12172 legitimize_pic_address (rtx orig, rtx reg)
12175 rtx new_rtx = orig;
12179 if (TARGET_MACHO && !TARGET_64BIT)
12182 reg = gen_reg_rtx (Pmode);
12183 /* Use the generic Mach-O PIC machinery. */
12184 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12188 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12190 else if (TARGET_64BIT
12191 && ix86_cmodel != CM_SMALL_PIC
12192 && gotoff_operand (addr, Pmode))
12195 /* This symbol may be referenced via a displacement from the PIC
12196 base address (@GOTOFF). */
12198 if (reload_in_progress)
12199 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12200 if (GET_CODE (addr) == CONST)
12201 addr = XEXP (addr, 0);
12202 if (GET_CODE (addr) == PLUS)
12204 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12206 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12209 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12210 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12212 tmpreg = gen_reg_rtx (Pmode);
12215 emit_move_insn (tmpreg, new_rtx);
12219 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12220 tmpreg, 1, OPTAB_DIRECT);
12223 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12225 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12227 /* This symbol may be referenced via a displacement from the PIC
12228 base address (@GOTOFF). */
12230 if (reload_in_progress)
12231 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12232 if (GET_CODE (addr) == CONST)
12233 addr = XEXP (addr, 0);
12234 if (GET_CODE (addr) == PLUS)
12236 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12238 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12241 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12242 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12243 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12247 emit_move_insn (reg, new_rtx);
12251 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12252 /* We can't use @GOTOFF for text labels on VxWorks;
12253 see gotoff_operand. */
12254 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12256 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12258 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12259 return legitimize_dllimport_symbol (addr, true);
12260 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12261 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12262 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12264 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12265 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12269 /* For x64 PE-COFF there is no GOT table. So we use address
12271 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12273 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12274 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12277 reg = gen_reg_rtx (Pmode);
12278 emit_move_insn (reg, new_rtx);
12281 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12283 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12284 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12285 new_rtx = gen_const_mem (Pmode, new_rtx);
12286 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12289 reg = gen_reg_rtx (Pmode);
12290 /* Use directly gen_movsi, otherwise the address is loaded
12291 into register for CSE. We don't want to CSE this addresses,
12292 instead we CSE addresses from the GOT table, so skip this. */
12293 emit_insn (gen_movsi (reg, new_rtx));
12298 /* This symbol must be referenced via a load from the
12299 Global Offset Table (@GOT). */
12301 if (reload_in_progress)
12302 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12303 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12304 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12306 new_rtx = force_reg (Pmode, new_rtx);
12307 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12308 new_rtx = gen_const_mem (Pmode, new_rtx);
12309 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12312 reg = gen_reg_rtx (Pmode);
12313 emit_move_insn (reg, new_rtx);
12319 if (CONST_INT_P (addr)
12320 && !x86_64_immediate_operand (addr, VOIDmode))
12324 emit_move_insn (reg, addr);
12328 new_rtx = force_reg (Pmode, addr);
12330 else if (GET_CODE (addr) == CONST)
12332 addr = XEXP (addr, 0);
12334 /* We must match stuff we generate before. Assume the only
12335 unspecs that can get here are ours. Not that we could do
12336 anything with them anyway.... */
12337 if (GET_CODE (addr) == UNSPEC
12338 || (GET_CODE (addr) == PLUS
12339 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12341 gcc_assert (GET_CODE (addr) == PLUS);
12343 if (GET_CODE (addr) == PLUS)
12345 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12347 /* Check first to see if this is a constant offset from a @GOTOFF
12348 symbol reference. */
12349 if (gotoff_operand (op0, Pmode)
12350 && CONST_INT_P (op1))
12354 if (reload_in_progress)
12355 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12356 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12358 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12359 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12360 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12364 emit_move_insn (reg, new_rtx);
12370 if (INTVAL (op1) < -16*1024*1024
12371 || INTVAL (op1) >= 16*1024*1024)
12373 if (!x86_64_immediate_operand (op1, Pmode))
12374 op1 = force_reg (Pmode, op1);
12375 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12381 base = legitimize_pic_address (XEXP (addr, 0), reg);
12382 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12383 base == reg ? NULL_RTX : reg);
12385 if (CONST_INT_P (new_rtx))
12386 new_rtx = plus_constant (base, INTVAL (new_rtx));
12389 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12391 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12392 new_rtx = XEXP (new_rtx, 1);
12394 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12402 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12405 get_thread_pointer (bool to_reg)
12407 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12409 if (GET_MODE (tp) != Pmode)
12410 tp = convert_to_mode (Pmode, tp, 1);
12413 tp = copy_addr_to_reg (tp);
12418 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12420 static GTY(()) rtx ix86_tls_symbol;
12423 ix86_tls_get_addr (void)
12425 if (!ix86_tls_symbol)
12428 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12429 ? "___tls_get_addr" : "__tls_get_addr");
12431 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12434 return ix86_tls_symbol;
12437 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12439 static GTY(()) rtx ix86_tls_module_base_symbol;
12442 ix86_tls_module_base (void)
12444 if (!ix86_tls_module_base_symbol)
12446 ix86_tls_module_base_symbol
12447 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12449 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12450 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12453 return ix86_tls_module_base_symbol;
12456 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12457 false if we expect this to be used for a memory address and true if
12458 we expect to load the address into a register. */
12461 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12463 rtx dest, base, off;
12464 rtx pic = NULL_RTX, tp = NULL_RTX;
12469 case TLS_MODEL_GLOBAL_DYNAMIC:
12470 dest = gen_reg_rtx (Pmode);
12475 pic = pic_offset_table_rtx;
12478 pic = gen_reg_rtx (Pmode);
12479 emit_insn (gen_set_got (pic));
12483 if (TARGET_GNU2_TLS)
12486 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12488 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12490 tp = get_thread_pointer (true);
12491 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12493 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12497 rtx caddr = ix86_tls_get_addr ();
12501 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12504 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12505 insns = get_insns ();
12508 RTL_CONST_CALL_P (insns) = 1;
12509 emit_libcall_block (insns, dest, rax, x);
12512 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12516 case TLS_MODEL_LOCAL_DYNAMIC:
12517 base = gen_reg_rtx (Pmode);
12522 pic = pic_offset_table_rtx;
12525 pic = gen_reg_rtx (Pmode);
12526 emit_insn (gen_set_got (pic));
12530 if (TARGET_GNU2_TLS)
12532 rtx tmp = ix86_tls_module_base ();
12535 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12537 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12539 tp = get_thread_pointer (true);
12540 set_unique_reg_note (get_last_insn (), REG_EQUAL,
12541 gen_rtx_MINUS (Pmode, tmp, tp));
12545 rtx caddr = ix86_tls_get_addr ();
12549 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12552 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12553 insns = get_insns ();
12556 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12557 share the LD_BASE result with other LD model accesses. */
12558 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12559 UNSPEC_TLS_LD_BASE);
12561 RTL_CONST_CALL_P (insns) = 1;
12562 emit_libcall_block (insns, base, rax, eqv);
12565 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12568 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12569 off = gen_rtx_CONST (Pmode, off);
12571 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12573 if (TARGET_GNU2_TLS)
12575 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12577 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12581 case TLS_MODEL_INITIAL_EXEC:
12584 if (TARGET_SUN_TLS)
12586 /* The Sun linker took the AMD64 TLS spec literally
12587 and can only handle %rax as destination of the
12588 initial executable code sequence. */
12590 dest = gen_reg_rtx (Pmode);
12591 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12596 type = UNSPEC_GOTNTPOFF;
12600 if (reload_in_progress)
12601 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12602 pic = pic_offset_table_rtx;
12603 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12605 else if (!TARGET_ANY_GNU_TLS)
12607 pic = gen_reg_rtx (Pmode);
12608 emit_insn (gen_set_got (pic));
12609 type = UNSPEC_GOTTPOFF;
12614 type = UNSPEC_INDNTPOFF;
12617 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12618 off = gen_rtx_CONST (Pmode, off);
12620 off = gen_rtx_PLUS (Pmode, pic, off);
12621 off = gen_const_mem (Pmode, off);
12622 set_mem_alias_set (off, ix86_GOT_alias_set ());
12624 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12626 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12627 off = force_reg (Pmode, off);
12628 return gen_rtx_PLUS (Pmode, base, off);
12632 base = get_thread_pointer (true);
12633 dest = gen_reg_rtx (Pmode);
12634 emit_insn (gen_subsi3 (dest, base, off));
12638 case TLS_MODEL_LOCAL_EXEC:
12639 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12640 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12641 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12642 off = gen_rtx_CONST (Pmode, off);
12644 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12646 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12647 return gen_rtx_PLUS (Pmode, base, off);
12651 base = get_thread_pointer (true);
12652 dest = gen_reg_rtx (Pmode);
12653 emit_insn (gen_subsi3 (dest, base, off));
12658 gcc_unreachable ();
12664 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12667 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12668 htab_t dllimport_map;
12671 get_dllimport_decl (tree decl)
12673 struct tree_map *h, in;
12676 const char *prefix;
12677 size_t namelen, prefixlen;
12682 if (!dllimport_map)
12683 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12685 in.hash = htab_hash_pointer (decl);
12686 in.base.from = decl;
12687 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12688 h = (struct tree_map *) *loc;
12692 *loc = h = ggc_alloc_tree_map ();
12694 h->base.from = decl;
12695 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12696 VAR_DECL, NULL, ptr_type_node);
12697 DECL_ARTIFICIAL (to) = 1;
12698 DECL_IGNORED_P (to) = 1;
12699 DECL_EXTERNAL (to) = 1;
12700 TREE_READONLY (to) = 1;
12702 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12703 name = targetm.strip_name_encoding (name);
12704 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12705 ? "*__imp_" : "*__imp__";
12706 namelen = strlen (name);
12707 prefixlen = strlen (prefix);
12708 imp_name = (char *) alloca (namelen + prefixlen + 1);
12709 memcpy (imp_name, prefix, prefixlen);
12710 memcpy (imp_name + prefixlen, name, namelen + 1);
12712 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12713 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12714 SET_SYMBOL_REF_DECL (rtl, to);
12715 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12717 rtl = gen_const_mem (Pmode, rtl);
12718 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12720 SET_DECL_RTL (to, rtl);
12721 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12726 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12727 true if we require the result be a register. */
12730 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12735 gcc_assert (SYMBOL_REF_DECL (symbol));
12736 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12738 x = DECL_RTL (imp_decl);
12740 x = force_reg (Pmode, x);
12744 /* Try machine-dependent ways of modifying an illegitimate address
12745 to be legitimate. If we find one, return the new, valid address.
12746 This macro is used in only one place: `memory_address' in explow.c.
12748 OLDX is the address as it was before break_out_memory_refs was called.
12749 In some cases it is useful to look at this to decide what needs to be done.
12751 It is always safe for this macro to do nothing. It exists to recognize
12752 opportunities to optimize the output.
12754 For the 80386, we handle X+REG by loading X into a register R and
12755 using R+REG. R will go in a general reg and indexing will be used.
12756 However, if REG is a broken-out memory address or multiplication,
12757 nothing needs to be done because REG can certainly go in a general reg.
12759 When -fpic is used, special handling is needed for symbolic references.
12760 See comments by legitimize_pic_address in i386.c for details. */
12763 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12764 enum machine_mode mode)
12769 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12771 return legitimize_tls_address (x, (enum tls_model) log, false);
12772 if (GET_CODE (x) == CONST
12773 && GET_CODE (XEXP (x, 0)) == PLUS
12774 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12775 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12777 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12778 (enum tls_model) log, false);
12779 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12782 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12784 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12785 return legitimize_dllimport_symbol (x, true);
12786 if (GET_CODE (x) == CONST
12787 && GET_CODE (XEXP (x, 0)) == PLUS
12788 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12789 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12791 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12792 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12796 if (flag_pic && SYMBOLIC_CONST (x))
12797 return legitimize_pic_address (x, 0);
12800 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12801 return machopic_indirect_data_reference (x, 0);
12804 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12805 if (GET_CODE (x) == ASHIFT
12806 && CONST_INT_P (XEXP (x, 1))
12807 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12810 log = INTVAL (XEXP (x, 1));
12811 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12812 GEN_INT (1 << log));
12815 if (GET_CODE (x) == PLUS)
12817 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12819 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12820 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12821 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12824 log = INTVAL (XEXP (XEXP (x, 0), 1));
12825 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12826 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12827 GEN_INT (1 << log));
12830 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12831 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12832 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12835 log = INTVAL (XEXP (XEXP (x, 1), 1));
12836 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12837 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12838 GEN_INT (1 << log));
12841 /* Put multiply first if it isn't already. */
12842 if (GET_CODE (XEXP (x, 1)) == MULT)
12844 rtx tmp = XEXP (x, 0);
12845 XEXP (x, 0) = XEXP (x, 1);
12850 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12851 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12852 created by virtual register instantiation, register elimination, and
12853 similar optimizations. */
12854 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12857 x = gen_rtx_PLUS (Pmode,
12858 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12859 XEXP (XEXP (x, 1), 0)),
12860 XEXP (XEXP (x, 1), 1));
12864 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12865 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12866 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12867 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12868 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12869 && CONSTANT_P (XEXP (x, 1)))
12872 rtx other = NULL_RTX;
12874 if (CONST_INT_P (XEXP (x, 1)))
12876 constant = XEXP (x, 1);
12877 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12879 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12881 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12882 other = XEXP (x, 1);
12890 x = gen_rtx_PLUS (Pmode,
12891 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12892 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12893 plus_constant (other, INTVAL (constant)));
12897 if (changed && ix86_legitimate_address_p (mode, x, false))
12900 if (GET_CODE (XEXP (x, 0)) == MULT)
12903 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12906 if (GET_CODE (XEXP (x, 1)) == MULT)
12909 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12913 && REG_P (XEXP (x, 1))
12914 && REG_P (XEXP (x, 0)))
12917 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12920 x = legitimize_pic_address (x, 0);
12923 if (changed && ix86_legitimate_address_p (mode, x, false))
12926 if (REG_P (XEXP (x, 0)))
12928 rtx temp = gen_reg_rtx (Pmode);
12929 rtx val = force_operand (XEXP (x, 1), temp);
12932 if (GET_MODE (val) != Pmode)
12933 val = convert_to_mode (Pmode, val, 1);
12934 emit_move_insn (temp, val);
12937 XEXP (x, 1) = temp;
12941 else if (REG_P (XEXP (x, 1)))
12943 rtx temp = gen_reg_rtx (Pmode);
12944 rtx val = force_operand (XEXP (x, 0), temp);
12947 if (GET_MODE (val) != Pmode)
12948 val = convert_to_mode (Pmode, val, 1);
12949 emit_move_insn (temp, val);
12952 XEXP (x, 0) = temp;
12960 /* Print an integer constant expression in assembler syntax. Addition
12961 and subtraction are the only arithmetic that may appear in these
12962 expressions. FILE is the stdio stream to write to, X is the rtx, and
12963 CODE is the operand print code from the output string. */
12966 output_pic_addr_const (FILE *file, rtx x, int code)
12970 switch (GET_CODE (x))
12973 gcc_assert (flag_pic);
12978 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12979 output_addr_const (file, x);
12982 const char *name = XSTR (x, 0);
12984 /* Mark the decl as referenced so that cgraph will
12985 output the function. */
12986 if (SYMBOL_REF_DECL (x))
12987 mark_decl_referenced (SYMBOL_REF_DECL (x));
12990 if (MACHOPIC_INDIRECT
12991 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12992 name = machopic_indirection_name (x, /*stub_p=*/true);
12994 assemble_name (file, name);
12996 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12997 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12998 fputs ("@PLT", file);
13005 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13006 assemble_name (asm_out_file, buf);
13010 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13014 /* This used to output parentheses around the expression,
13015 but that does not work on the 386 (either ATT or BSD assembler). */
13016 output_pic_addr_const (file, XEXP (x, 0), code);
13020 if (GET_MODE (x) == VOIDmode)
13022 /* We can use %d if the number is <32 bits and positive. */
13023 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13024 fprintf (file, "0x%lx%08lx",
13025 (unsigned long) CONST_DOUBLE_HIGH (x),
13026 (unsigned long) CONST_DOUBLE_LOW (x));
13028 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13031 /* We can't handle floating point constants;
13032 TARGET_PRINT_OPERAND must handle them. */
13033 output_operand_lossage ("floating constant misused");
13037 /* Some assemblers need integer constants to appear first. */
13038 if (CONST_INT_P (XEXP (x, 0)))
13040 output_pic_addr_const (file, XEXP (x, 0), code);
13042 output_pic_addr_const (file, XEXP (x, 1), code);
13046 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13047 output_pic_addr_const (file, XEXP (x, 1), code);
13049 output_pic_addr_const (file, XEXP (x, 0), code);
13055 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13056 output_pic_addr_const (file, XEXP (x, 0), code);
13058 output_pic_addr_const (file, XEXP (x, 1), code);
13060 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13064 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13066 bool f = i386_asm_output_addr_const_extra (file, x);
13071 gcc_assert (XVECLEN (x, 0) == 1);
13072 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13073 switch (XINT (x, 1))
13076 fputs ("@GOT", file);
13078 case UNSPEC_GOTOFF:
13079 fputs ("@GOTOFF", file);
13081 case UNSPEC_PLTOFF:
13082 fputs ("@PLTOFF", file);
13085 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13086 "(%rip)" : "[rip]", file);
13088 case UNSPEC_GOTPCREL:
13089 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13090 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13092 case UNSPEC_GOTTPOFF:
13093 /* FIXME: This might be @TPOFF in Sun ld too. */
13094 fputs ("@gottpoff", file);
13097 fputs ("@tpoff", file);
13099 case UNSPEC_NTPOFF:
13101 fputs ("@tpoff", file);
13103 fputs ("@ntpoff", file);
13105 case UNSPEC_DTPOFF:
13106 fputs ("@dtpoff", file);
13108 case UNSPEC_GOTNTPOFF:
13110 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13111 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13113 fputs ("@gotntpoff", file);
13115 case UNSPEC_INDNTPOFF:
13116 fputs ("@indntpoff", file);
13119 case UNSPEC_MACHOPIC_OFFSET:
13121 machopic_output_function_base_name (file);
13125 output_operand_lossage ("invalid UNSPEC as operand");
13131 output_operand_lossage ("invalid expression as operand");
13135 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13136 We need to emit DTP-relative relocations. */
13138 static void ATTRIBUTE_UNUSED
13139 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13141 fputs (ASM_LONG, file);
13142 output_addr_const (file, x);
13143 fputs ("@dtpoff", file);
13149 fputs (", 0", file);
13152 gcc_unreachable ();
13156 /* Return true if X is a representation of the PIC register. This copes
13157 with calls from ix86_find_base_term, where the register might have
13158 been replaced by a cselib value. */
13161 ix86_pic_register_p (rtx x)
13163 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13164 return (pic_offset_table_rtx
13165 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13167 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13170 /* Helper function for ix86_delegitimize_address.
13171 Attempt to delegitimize TLS local-exec accesses. */
13174 ix86_delegitimize_tls_address (rtx orig_x)
13176 rtx x = orig_x, unspec;
13177 struct ix86_address addr;
13179 if (!TARGET_TLS_DIRECT_SEG_REFS)
13183 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13185 if (ix86_decompose_address (x, &addr) == 0
13186 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13187 || addr.disp == NULL_RTX
13188 || GET_CODE (addr.disp) != CONST)
13190 unspec = XEXP (addr.disp, 0);
13191 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13192 unspec = XEXP (unspec, 0);
13193 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13195 x = XVECEXP (unspec, 0, 0);
13196 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13197 if (unspec != XEXP (addr.disp, 0))
13198 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13201 rtx idx = addr.index;
13202 if (addr.scale != 1)
13203 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13204 x = gen_rtx_PLUS (Pmode, idx, x);
13207 x = gen_rtx_PLUS (Pmode, addr.base, x);
13208 if (MEM_P (orig_x))
13209 x = replace_equiv_address_nv (orig_x, x);
13213 /* In the name of slightly smaller debug output, and to cater to
13214 general assembler lossage, recognize PIC+GOTOFF and turn it back
13215 into a direct symbol reference.
13217 On Darwin, this is necessary to avoid a crash, because Darwin
13218 has a different PIC label for each routine but the DWARF debugging
13219 information is not associated with any particular routine, so it's
13220 necessary to remove references to the PIC label from RTL stored by
13221 the DWARF output code. */
13224 ix86_delegitimize_address (rtx x)
13226 rtx orig_x = delegitimize_mem_from_attrs (x);
13227 /* addend is NULL or some rtx if x is something+GOTOFF where
13228 something doesn't include the PIC register. */
13229 rtx addend = NULL_RTX;
13230 /* reg_addend is NULL or a multiple of some register. */
13231 rtx reg_addend = NULL_RTX;
13232 /* const_addend is NULL or a const_int. */
13233 rtx const_addend = NULL_RTX;
13234 /* This is the result, or NULL. */
13235 rtx result = NULL_RTX;
13244 if (GET_CODE (x) != CONST
13245 || GET_CODE (XEXP (x, 0)) != UNSPEC
13246 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13247 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13248 || (!MEM_P (orig_x) && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL))
13249 return ix86_delegitimize_tls_address (orig_x);
13250 x = XVECEXP (XEXP (x, 0), 0, 0);
13251 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
13253 x = simplify_gen_subreg (GET_MODE (orig_x), x,
13261 if (GET_CODE (x) != PLUS
13262 || GET_CODE (XEXP (x, 1)) != CONST)
13263 return ix86_delegitimize_tls_address (orig_x);
13265 if (ix86_pic_register_p (XEXP (x, 0)))
13266 /* %ebx + GOT/GOTOFF */
13268 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13270 /* %ebx + %reg * scale + GOT/GOTOFF */
13271 reg_addend = XEXP (x, 0);
13272 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13273 reg_addend = XEXP (reg_addend, 1);
13274 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13275 reg_addend = XEXP (reg_addend, 0);
13278 reg_addend = NULL_RTX;
13279 addend = XEXP (x, 0);
13283 addend = XEXP (x, 0);
13285 x = XEXP (XEXP (x, 1), 0);
13286 if (GET_CODE (x) == PLUS
13287 && CONST_INT_P (XEXP (x, 1)))
13289 const_addend = XEXP (x, 1);
13293 if (GET_CODE (x) == UNSPEC
13294 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13295 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13296 result = XVECEXP (x, 0, 0);
13298 if (TARGET_MACHO && darwin_local_data_pic (x)
13299 && !MEM_P (orig_x))
13300 result = XVECEXP (x, 0, 0);
13303 return ix86_delegitimize_tls_address (orig_x);
13306 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13308 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13311 /* If the rest of original X doesn't involve the PIC register, add
13312 addend and subtract pic_offset_table_rtx. This can happen e.g.
13314 leal (%ebx, %ecx, 4), %ecx
13316 movl foo@GOTOFF(%ecx), %edx
13317 in which case we return (%ecx - %ebx) + foo. */
13318 if (pic_offset_table_rtx)
13319 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13320 pic_offset_table_rtx),
13325 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13327 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13328 if (result == NULL_RTX)
13334 /* If X is a machine specific address (i.e. a symbol or label being
13335 referenced as a displacement from the GOT implemented using an
13336 UNSPEC), then return the base term. Otherwise return X. */
13339 ix86_find_base_term (rtx x)
13345 if (GET_CODE (x) != CONST)
13347 term = XEXP (x, 0);
13348 if (GET_CODE (term) == PLUS
13349 && (CONST_INT_P (XEXP (term, 1))
13350 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13351 term = XEXP (term, 0);
13352 if (GET_CODE (term) != UNSPEC
13353 || (XINT (term, 1) != UNSPEC_GOTPCREL
13354 && XINT (term, 1) != UNSPEC_PCREL))
13357 return XVECEXP (term, 0, 0);
13360 return ix86_delegitimize_address (x);
13364 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13365 int fp, FILE *file)
13367 const char *suffix;
13369 if (mode == CCFPmode || mode == CCFPUmode)
13371 code = ix86_fp_compare_code_to_integer (code);
13375 code = reverse_condition (code);
13426 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13430 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13431 Those same assemblers have the same but opposite lossage on cmov. */
13432 if (mode == CCmode)
13433 suffix = fp ? "nbe" : "a";
13434 else if (mode == CCCmode)
13437 gcc_unreachable ();
13453 gcc_unreachable ();
13457 gcc_assert (mode == CCmode || mode == CCCmode);
13474 gcc_unreachable ();
13478 /* ??? As above. */
13479 gcc_assert (mode == CCmode || mode == CCCmode);
13480 suffix = fp ? "nb" : "ae";
13483 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13487 /* ??? As above. */
13488 if (mode == CCmode)
13490 else if (mode == CCCmode)
13491 suffix = fp ? "nb" : "ae";
13493 gcc_unreachable ();
13496 suffix = fp ? "u" : "p";
13499 suffix = fp ? "nu" : "np";
13502 gcc_unreachable ();
13504 fputs (suffix, file);
13507 /* Print the name of register X to FILE based on its machine mode and number.
13508 If CODE is 'w', pretend the mode is HImode.
13509 If CODE is 'b', pretend the mode is QImode.
13510 If CODE is 'k', pretend the mode is SImode.
13511 If CODE is 'q', pretend the mode is DImode.
13512 If CODE is 'x', pretend the mode is V4SFmode.
13513 If CODE is 't', pretend the mode is V8SFmode.
13514 If CODE is 'h', pretend the reg is the 'high' byte register.
13515 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13516 If CODE is 'd', duplicate the operand for AVX instruction.
13520 print_reg (rtx x, int code, FILE *file)
13523 bool duplicated = code == 'd' && TARGET_AVX;
13525 gcc_assert (x == pc_rtx
13526 || (REGNO (x) != ARG_POINTER_REGNUM
13527 && REGNO (x) != FRAME_POINTER_REGNUM
13528 && REGNO (x) != FLAGS_REG
13529 && REGNO (x) != FPSR_REG
13530 && REGNO (x) != FPCR_REG));
13532 if (ASSEMBLER_DIALECT == ASM_ATT)
13537 gcc_assert (TARGET_64BIT);
13538 fputs ("rip", file);
13542 if (code == 'w' || MMX_REG_P (x))
13544 else if (code == 'b')
13546 else if (code == 'k')
13548 else if (code == 'q')
13550 else if (code == 'y')
13552 else if (code == 'h')
13554 else if (code == 'x')
13556 else if (code == 't')
13559 code = GET_MODE_SIZE (GET_MODE (x));
13561 /* Irritatingly, AMD extended registers use different naming convention
13562 from the normal registers: "r%d[bwd]" */
13563 if (REX_INT_REG_P (x))
13565 gcc_assert (TARGET_64BIT);
13567 fprint_ul (file, REGNO (x) - FIRST_REX_INT_REG + 8);
13571 error ("extended registers have no high halves");
13586 error ("unsupported operand size for extended register");
13596 if (STACK_TOP_P (x))
13605 if (! ANY_FP_REG_P (x))
13606 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13611 reg = hi_reg_name[REGNO (x)];
13614 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13616 reg = qi_reg_name[REGNO (x)];
13619 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13621 reg = qi_high_reg_name[REGNO (x)];
13626 gcc_assert (!duplicated);
13628 fputs (hi_reg_name[REGNO (x)] + 1, file);
13633 gcc_unreachable ();
13639 if (ASSEMBLER_DIALECT == ASM_ATT)
13640 fprintf (file, ", %%%s", reg);
13642 fprintf (file, ", %s", reg);
13646 /* Locate some local-dynamic symbol still in use by this function
13647 so that we can print its name in some tls_local_dynamic_base
13651 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13655 if (GET_CODE (x) == SYMBOL_REF
13656 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13658 cfun->machine->some_ld_name = XSTR (x, 0);
13665 static const char *
13666 get_some_local_dynamic_name (void)
13670 if (cfun->machine->some_ld_name)
13671 return cfun->machine->some_ld_name;
13673 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13674 if (NONDEBUG_INSN_P (insn)
13675 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13676 return cfun->machine->some_ld_name;
13681 /* Meaning of CODE:
13682 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13683 C -- print opcode suffix for set/cmov insn.
13684 c -- like C, but print reversed condition
13685 F,f -- likewise, but for floating-point.
13686 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13688 R -- print the prefix for register names.
13689 z -- print the opcode suffix for the size of the current operand.
13690 Z -- likewise, with special suffixes for x87 instructions.
13691 * -- print a star (in certain assembler syntax)
13692 A -- print an absolute memory reference.
13693 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13694 s -- print a shift double count, followed by the assemblers argument
13696 b -- print the QImode name of the register for the indicated operand.
13697 %b0 would print %al if operands[0] is reg 0.
13698 w -- likewise, print the HImode name of the register.
13699 k -- likewise, print the SImode name of the register.
13700 q -- likewise, print the DImode name of the register.
13701 x -- likewise, print the V4SFmode name of the register.
13702 t -- likewise, print the V8SFmode name of the register.
13703 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13704 y -- print "st(0)" instead of "st" as a register.
13705 d -- print duplicated register operand for AVX instruction.
13706 D -- print condition for SSE cmp instruction.
13707 P -- if PIC, print an @PLT suffix.
13708 p -- print raw symbol name.
13709 X -- don't print any sort of PIC '@' suffix for a symbol.
13710 & -- print some in-use local-dynamic symbol name.
13711 H -- print a memory address offset by 8; used for sse high-parts
13712 Y -- print condition for XOP pcom* instruction.
13713 + -- print a branch hint as 'cs' or 'ds' prefix
13714 ; -- print a semicolon (after prefixes due to bug in older gas).
13715 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13716 @ -- print a segment register of thread base pointer load
13720 ix86_print_operand (FILE *file, rtx x, int code)
13727 if (ASSEMBLER_DIALECT == ASM_ATT)
13733 const char *name = get_some_local_dynamic_name ();
13735 output_operand_lossage ("'%%&' used without any "
13736 "local dynamic TLS references");
13738 assemble_name (file, name);
13743 switch (ASSEMBLER_DIALECT)
13750 /* Intel syntax. For absolute addresses, registers should not
13751 be surrounded by braces. */
13755 ix86_print_operand (file, x, 0);
13762 gcc_unreachable ();
13765 ix86_print_operand (file, x, 0);
13770 if (ASSEMBLER_DIALECT == ASM_ATT)
13775 if (ASSEMBLER_DIALECT == ASM_ATT)
13780 if (ASSEMBLER_DIALECT == ASM_ATT)
13785 if (ASSEMBLER_DIALECT == ASM_ATT)
13790 if (ASSEMBLER_DIALECT == ASM_ATT)
13795 if (ASSEMBLER_DIALECT == ASM_ATT)
13800 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13802 /* Opcodes don't get size suffixes if using Intel opcodes. */
13803 if (ASSEMBLER_DIALECT == ASM_INTEL)
13806 switch (GET_MODE_SIZE (GET_MODE (x)))
13825 output_operand_lossage
13826 ("invalid operand size for operand code '%c'", code);
13831 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13833 (0, "non-integer operand used with operand code '%c'", code);
13837 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13838 if (ASSEMBLER_DIALECT == ASM_INTEL)
13841 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13843 switch (GET_MODE_SIZE (GET_MODE (x)))
13846 #ifdef HAVE_AS_IX86_FILDS
13856 #ifdef HAVE_AS_IX86_FILDQ
13859 fputs ("ll", file);
13867 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13869 /* 387 opcodes don't get size suffixes
13870 if the operands are registers. */
13871 if (STACK_REG_P (x))
13874 switch (GET_MODE_SIZE (GET_MODE (x)))
13895 output_operand_lossage
13896 ("invalid operand type used with operand code '%c'", code);
13900 output_operand_lossage
13901 ("invalid operand size for operand code '%c'", code);
13919 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13921 ix86_print_operand (file, x, 0);
13922 fputs (", ", file);
13927 /* Little bit of braindamage here. The SSE compare instructions
13928 does use completely different names for the comparisons that the
13929 fp conditional moves. */
13932 switch (GET_CODE (x))
13935 fputs ("eq", file);
13938 fputs ("eq_us", file);
13941 fputs ("lt", file);
13944 fputs ("nge", file);
13947 fputs ("le", file);
13950 fputs ("ngt", file);
13953 fputs ("unord", file);
13956 fputs ("neq", file);
13959 fputs ("neq_oq", file);
13962 fputs ("ge", file);
13965 fputs ("nlt", file);
13968 fputs ("gt", file);
13971 fputs ("nle", file);
13974 fputs ("ord", file);
13977 output_operand_lossage ("operand is not a condition code, "
13978 "invalid operand code 'D'");
13984 switch (GET_CODE (x))
13988 fputs ("eq", file);
13992 fputs ("lt", file);
13996 fputs ("le", file);
13999 fputs ("unord", file);
14003 fputs ("neq", file);
14007 fputs ("nlt", file);
14011 fputs ("nle", file);
14014 fputs ("ord", file);
14017 output_operand_lossage ("operand is not a condition code, "
14018 "invalid operand code 'D'");
14024 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14025 if (ASSEMBLER_DIALECT == ASM_ATT)
14027 switch (GET_MODE (x))
14029 case HImode: putc ('w', file); break;
14031 case SFmode: putc ('l', file); break;
14033 case DFmode: putc ('q', file); break;
14034 default: gcc_unreachable ();
14041 if (!COMPARISON_P (x))
14043 output_operand_lossage ("operand is neither a constant nor a "
14044 "condition code, invalid operand code "
14048 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
14051 if (!COMPARISON_P (x))
14053 output_operand_lossage ("operand is neither a constant nor a "
14054 "condition code, invalid operand code "
14058 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14059 if (ASSEMBLER_DIALECT == ASM_ATT)
14062 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
14065 /* Like above, but reverse condition */
14067 /* Check to see if argument to %c is really a constant
14068 and not a condition code which needs to be reversed. */
14069 if (!COMPARISON_P (x))
14071 output_operand_lossage ("operand is neither a constant nor a "
14072 "condition code, invalid operand "
14076 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
14079 if (!COMPARISON_P (x))
14081 output_operand_lossage ("operand is neither a constant nor a "
14082 "condition code, invalid operand "
14086 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14087 if (ASSEMBLER_DIALECT == ASM_ATT)
14090 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
14094 /* It doesn't actually matter what mode we use here, as we're
14095 only going to use this for printing. */
14096 x = adjust_address_nv (x, DImode, 8);
14104 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
14107 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14110 int pred_val = INTVAL (XEXP (x, 0));
14112 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14113 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14115 int taken = pred_val > REG_BR_PROB_BASE / 2;
14116 int cputaken = final_forward_branch_p (current_output_insn) == 0;
14118 /* Emit hints only in the case default branch prediction
14119 heuristics would fail. */
14120 if (taken != cputaken)
14122 /* We use 3e (DS) prefix for taken branches and
14123 2e (CS) prefix for not taken branches. */
14125 fputs ("ds ; ", file);
14127 fputs ("cs ; ", file);
14135 switch (GET_CODE (x))
14138 fputs ("neq", file);
14141 fputs ("eq", file);
14145 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14149 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14153 fputs ("le", file);
14157 fputs ("lt", file);
14160 fputs ("unord", file);
14163 fputs ("ord", file);
14166 fputs ("ueq", file);
14169 fputs ("nlt", file);
14172 fputs ("nle", file);
14175 fputs ("ule", file);
14178 fputs ("ult", file);
14181 fputs ("une", file);
14184 output_operand_lossage ("operand is not a condition code, "
14185 "invalid operand code 'Y'");
14191 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14197 if (ASSEMBLER_DIALECT == ASM_ATT)
14200 /* The kernel uses a different segment register for performance
14201 reasons; a system call would not have to trash the userspace
14202 segment register, which would be expensive. */
14203 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14204 fputs ("fs", file);
14206 fputs ("gs", file);
14210 putc (TARGET_AVX2 ? 'i' : 'f', file);
14214 output_operand_lossage ("invalid operand code '%c'", code);
14219 print_reg (x, code, file);
14221 else if (MEM_P (x))
14223 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14224 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14225 && GET_MODE (x) != BLKmode)
14228 switch (GET_MODE_SIZE (GET_MODE (x)))
14230 case 1: size = "BYTE"; break;
14231 case 2: size = "WORD"; break;
14232 case 4: size = "DWORD"; break;
14233 case 8: size = "QWORD"; break;
14234 case 12: size = "TBYTE"; break;
14236 if (GET_MODE (x) == XFmode)
14241 case 32: size = "YMMWORD"; break;
14243 gcc_unreachable ();
14246 /* Check for explicit size override (codes 'b', 'w', 'k',
14250 else if (code == 'w')
14252 else if (code == 'k')
14254 else if (code == 'q')
14256 else if (code == 'x')
14259 fputs (size, file);
14260 fputs (" PTR ", file);
14264 /* Avoid (%rip) for call operands. */
14265 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14266 && !CONST_INT_P (x))
14267 output_addr_const (file, x);
14268 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14269 output_operand_lossage ("invalid constraints for operand");
14271 output_address (x);
14274 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14279 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14280 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14282 if (ASSEMBLER_DIALECT == ASM_ATT)
14284 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14286 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14288 fprintf (file, "0x%08x", (unsigned int) l);
14291 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14296 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14297 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14299 if (ASSEMBLER_DIALECT == ASM_ATT)
14301 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14304 /* These float cases don't actually occur as immediate operands. */
14305 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14309 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14310 fputs (dstr, file);
14315 /* We have patterns that allow zero sets of memory, for instance.
14316 In 64-bit mode, we should probably support all 8-byte vectors,
14317 since we can in fact encode that into an immediate. */
14318 if (GET_CODE (x) == CONST_VECTOR)
14320 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14324 if (code != 'P' && code != 'p')
14326 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14328 if (ASSEMBLER_DIALECT == ASM_ATT)
14331 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14332 || GET_CODE (x) == LABEL_REF)
14334 if (ASSEMBLER_DIALECT == ASM_ATT)
14337 fputs ("OFFSET FLAT:", file);
14340 if (CONST_INT_P (x))
14341 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14342 else if (flag_pic || MACHOPIC_INDIRECT)
14343 output_pic_addr_const (file, x, code);
14345 output_addr_const (file, x);
14350 ix86_print_operand_punct_valid_p (unsigned char code)
14352 return (code == '@' || code == '*' || code == '+'
14353 || code == '&' || code == ';' || code == '~');
14356 /* Print a memory operand whose address is ADDR. */
14359 ix86_print_operand_address (FILE *file, rtx addr)
14361 struct ix86_address parts;
14362 rtx base, index, disp;
14367 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
14369 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14370 gcc_assert (parts.index == NULL_RTX);
14371 parts.index = XVECEXP (addr, 0, 1);
14372 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
14373 addr = XVECEXP (addr, 0, 0);
14377 ok = ix86_decompose_address (addr, &parts);
14381 if (parts.base && GET_CODE (parts.base) == SUBREG)
14383 rtx tmp = SUBREG_REG (parts.base);
14384 parts.base = simplify_subreg (GET_MODE (parts.base),
14385 tmp, GET_MODE (tmp), 0);
14388 if (parts.index && GET_CODE (parts.index) == SUBREG)
14390 rtx tmp = SUBREG_REG (parts.index);
14391 parts.index = simplify_subreg (GET_MODE (parts.index),
14392 tmp, GET_MODE (tmp), 0);
14396 index = parts.index;
14398 scale = parts.scale;
14406 if (ASSEMBLER_DIALECT == ASM_ATT)
14408 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14411 gcc_unreachable ();
14414 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14415 if (TARGET_64BIT && !base && !index)
14419 if (GET_CODE (disp) == CONST
14420 && GET_CODE (XEXP (disp, 0)) == PLUS
14421 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14422 symbol = XEXP (XEXP (disp, 0), 0);
14424 if (GET_CODE (symbol) == LABEL_REF
14425 || (GET_CODE (symbol) == SYMBOL_REF
14426 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14429 if (!base && !index)
14431 /* Displacement only requires special attention. */
14433 if (CONST_INT_P (disp))
14435 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14436 fputs ("ds:", file);
14437 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14440 output_pic_addr_const (file, disp, 0);
14442 output_addr_const (file, disp);
14448 /* Print SImode registers for zero-extended addresses to force
14449 addr32 prefix. Otherwise print DImode registers to avoid it. */
14451 code = ((GET_CODE (addr) == ZERO_EXTEND
14452 || GET_CODE (addr) == AND)
14456 if (ASSEMBLER_DIALECT == ASM_ATT)
14461 output_pic_addr_const (file, disp, 0);
14462 else if (GET_CODE (disp) == LABEL_REF)
14463 output_asm_label (disp);
14465 output_addr_const (file, disp);
14470 print_reg (base, code, file);
14474 print_reg (index, vsib ? 0 : code, file);
14475 if (scale != 1 || vsib)
14476 fprintf (file, ",%d", scale);
14482 rtx offset = NULL_RTX;
14486 /* Pull out the offset of a symbol; print any symbol itself. */
14487 if (GET_CODE (disp) == CONST
14488 && GET_CODE (XEXP (disp, 0)) == PLUS
14489 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14491 offset = XEXP (XEXP (disp, 0), 1);
14492 disp = gen_rtx_CONST (VOIDmode,
14493 XEXP (XEXP (disp, 0), 0));
14497 output_pic_addr_const (file, disp, 0);
14498 else if (GET_CODE (disp) == LABEL_REF)
14499 output_asm_label (disp);
14500 else if (CONST_INT_P (disp))
14503 output_addr_const (file, disp);
14509 print_reg (base, code, file);
14512 if (INTVAL (offset) >= 0)
14514 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14518 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14525 print_reg (index, vsib ? 0 : code, file);
14526 if (scale != 1 || vsib)
14527 fprintf (file, "*%d", scale);
14534 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14537 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14541 if (GET_CODE (x) != UNSPEC)
14544 op = XVECEXP (x, 0, 0);
14545 switch (XINT (x, 1))
14547 case UNSPEC_GOTTPOFF:
14548 output_addr_const (file, op);
14549 /* FIXME: This might be @TPOFF in Sun ld. */
14550 fputs ("@gottpoff", file);
14553 output_addr_const (file, op);
14554 fputs ("@tpoff", file);
14556 case UNSPEC_NTPOFF:
14557 output_addr_const (file, op);
14559 fputs ("@tpoff", file);
14561 fputs ("@ntpoff", file);
14563 case UNSPEC_DTPOFF:
14564 output_addr_const (file, op);
14565 fputs ("@dtpoff", file);
14567 case UNSPEC_GOTNTPOFF:
14568 output_addr_const (file, op);
14570 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14571 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14573 fputs ("@gotntpoff", file);
14575 case UNSPEC_INDNTPOFF:
14576 output_addr_const (file, op);
14577 fputs ("@indntpoff", file);
14580 case UNSPEC_MACHOPIC_OFFSET:
14581 output_addr_const (file, op);
14583 machopic_output_function_base_name (file);
14587 case UNSPEC_STACK_CHECK:
14591 gcc_assert (flag_split_stack);
14593 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14594 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14596 gcc_unreachable ();
14599 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14610 /* Split one or more double-mode RTL references into pairs of half-mode
14611 references. The RTL can be REG, offsettable MEM, integer constant, or
14612 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14613 split and "num" is its length. lo_half and hi_half are output arrays
14614 that parallel "operands". */
14617 split_double_mode (enum machine_mode mode, rtx operands[],
14618 int num, rtx lo_half[], rtx hi_half[])
14620 enum machine_mode half_mode;
14626 half_mode = DImode;
14629 half_mode = SImode;
14632 gcc_unreachable ();
14635 byte = GET_MODE_SIZE (half_mode);
14639 rtx op = operands[num];
14641 /* simplify_subreg refuse to split volatile memory addresses,
14642 but we still have to handle it. */
14645 lo_half[num] = adjust_address (op, half_mode, 0);
14646 hi_half[num] = adjust_address (op, half_mode, byte);
14650 lo_half[num] = simplify_gen_subreg (half_mode, op,
14651 GET_MODE (op) == VOIDmode
14652 ? mode : GET_MODE (op), 0);
14653 hi_half[num] = simplify_gen_subreg (half_mode, op,
14654 GET_MODE (op) == VOIDmode
14655 ? mode : GET_MODE (op), byte);
14660 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14661 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14662 is the expression of the binary operation. The output may either be
14663 emitted here, or returned to the caller, like all output_* functions.
14665 There is no guarantee that the operands are the same mode, as they
14666 might be within FLOAT or FLOAT_EXTEND expressions. */
14668 #ifndef SYSV386_COMPAT
14669 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14670 wants to fix the assemblers because that causes incompatibility
14671 with gcc. No-one wants to fix gcc because that causes
14672 incompatibility with assemblers... You can use the option of
14673 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14674 #define SYSV386_COMPAT 1
14678 output_387_binary_op (rtx insn, rtx *operands)
14680 static char buf[40];
14683 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14685 #ifdef ENABLE_CHECKING
14686 /* Even if we do not want to check the inputs, this documents input
14687 constraints. Which helps in understanding the following code. */
14688 if (STACK_REG_P (operands[0])
14689 && ((REG_P (operands[1])
14690 && REGNO (operands[0]) == REGNO (operands[1])
14691 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14692 || (REG_P (operands[2])
14693 && REGNO (operands[0]) == REGNO (operands[2])
14694 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14695 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14698 gcc_assert (is_sse);
14701 switch (GET_CODE (operands[3]))
14704 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14705 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14713 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14714 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14722 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14723 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14731 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14732 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14740 gcc_unreachable ();
14747 strcpy (buf, ssep);
14748 if (GET_MODE (operands[0]) == SFmode)
14749 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14751 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14755 strcpy (buf, ssep + 1);
14756 if (GET_MODE (operands[0]) == SFmode)
14757 strcat (buf, "ss\t{%2, %0|%0, %2}");
14759 strcat (buf, "sd\t{%2, %0|%0, %2}");
14765 switch (GET_CODE (operands[3]))
14769 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14771 rtx temp = operands[2];
14772 operands[2] = operands[1];
14773 operands[1] = temp;
14776 /* know operands[0] == operands[1]. */
14778 if (MEM_P (operands[2]))
14784 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14786 if (STACK_TOP_P (operands[0]))
14787 /* How is it that we are storing to a dead operand[2]?
14788 Well, presumably operands[1] is dead too. We can't
14789 store the result to st(0) as st(0) gets popped on this
14790 instruction. Instead store to operands[2] (which I
14791 think has to be st(1)). st(1) will be popped later.
14792 gcc <= 2.8.1 didn't have this check and generated
14793 assembly code that the Unixware assembler rejected. */
14794 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14796 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14800 if (STACK_TOP_P (operands[0]))
14801 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14803 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14808 if (MEM_P (operands[1]))
14814 if (MEM_P (operands[2]))
14820 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14823 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14824 derived assemblers, confusingly reverse the direction of
14825 the operation for fsub{r} and fdiv{r} when the
14826 destination register is not st(0). The Intel assembler
14827 doesn't have this brain damage. Read !SYSV386_COMPAT to
14828 figure out what the hardware really does. */
14829 if (STACK_TOP_P (operands[0]))
14830 p = "{p\t%0, %2|rp\t%2, %0}";
14832 p = "{rp\t%2, %0|p\t%0, %2}";
14834 if (STACK_TOP_P (operands[0]))
14835 /* As above for fmul/fadd, we can't store to st(0). */
14836 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14838 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14843 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14846 if (STACK_TOP_P (operands[0]))
14847 p = "{rp\t%0, %1|p\t%1, %0}";
14849 p = "{p\t%1, %0|rp\t%0, %1}";
14851 if (STACK_TOP_P (operands[0]))
14852 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14854 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14859 if (STACK_TOP_P (operands[0]))
14861 if (STACK_TOP_P (operands[1]))
14862 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14864 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14867 else if (STACK_TOP_P (operands[1]))
14870 p = "{\t%1, %0|r\t%0, %1}";
14872 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14878 p = "{r\t%2, %0|\t%0, %2}";
14880 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14886 gcc_unreachable ();
14893 /* Return needed mode for entity in optimize_mode_switching pass. */
14896 ix86_mode_needed (int entity, rtx insn)
14898 enum attr_i387_cw mode;
14900 /* The mode UNINITIALIZED is used to store control word after a
14901 function call or ASM pattern. The mode ANY specify that function
14902 has no requirements on the control word and make no changes in the
14903 bits we are interested in. */
14906 || (NONJUMP_INSN_P (insn)
14907 && (asm_noperands (PATTERN (insn)) >= 0
14908 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14909 return I387_CW_UNINITIALIZED;
14911 if (recog_memoized (insn) < 0)
14912 return I387_CW_ANY;
14914 mode = get_attr_i387_cw (insn);
14919 if (mode == I387_CW_TRUNC)
14924 if (mode == I387_CW_FLOOR)
14929 if (mode == I387_CW_CEIL)
14934 if (mode == I387_CW_MASK_PM)
14939 gcc_unreachable ();
14942 return I387_CW_ANY;
14945 /* Output code to initialize control word copies used by trunc?f?i and
14946 rounding patterns. CURRENT_MODE is set to current control word,
14947 while NEW_MODE is set to new control word. */
14950 emit_i387_cw_initialization (int mode)
14952 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14955 enum ix86_stack_slot slot;
14957 rtx reg = gen_reg_rtx (HImode);
14959 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14960 emit_move_insn (reg, copy_rtx (stored_mode));
14962 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14963 || optimize_function_for_size_p (cfun))
14967 case I387_CW_TRUNC:
14968 /* round toward zero (truncate) */
14969 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14970 slot = SLOT_CW_TRUNC;
14973 case I387_CW_FLOOR:
14974 /* round down toward -oo */
14975 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14976 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14977 slot = SLOT_CW_FLOOR;
14981 /* round up toward +oo */
14982 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14983 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14984 slot = SLOT_CW_CEIL;
14987 case I387_CW_MASK_PM:
14988 /* mask precision exception for nearbyint() */
14989 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14990 slot = SLOT_CW_MASK_PM;
14994 gcc_unreachable ();
15001 case I387_CW_TRUNC:
15002 /* round toward zero (truncate) */
15003 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
15004 slot = SLOT_CW_TRUNC;
15007 case I387_CW_FLOOR:
15008 /* round down toward -oo */
15009 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
15010 slot = SLOT_CW_FLOOR;
15014 /* round up toward +oo */
15015 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
15016 slot = SLOT_CW_CEIL;
15019 case I387_CW_MASK_PM:
15020 /* mask precision exception for nearbyint() */
15021 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15022 slot = SLOT_CW_MASK_PM;
15026 gcc_unreachable ();
15030 gcc_assert (slot < MAX_386_STACK_LOCALS);
15032 new_mode = assign_386_stack_local (HImode, slot);
15033 emit_move_insn (new_mode, reg);
15036 /* Output code for INSN to convert a float to a signed int. OPERANDS
15037 are the insn operands. The output may be [HSD]Imode and the input
15038 operand may be [SDX]Fmode. */
15041 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
15043 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15044 int dimode_p = GET_MODE (operands[0]) == DImode;
15045 int round_mode = get_attr_i387_cw (insn);
15047 /* Jump through a hoop or two for DImode, since the hardware has no
15048 non-popping instruction. We used to do this a different way, but
15049 that was somewhat fragile and broke with post-reload splitters. */
15050 if ((dimode_p || fisttp) && !stack_top_dies)
15051 output_asm_insn ("fld\t%y1", operands);
15053 gcc_assert (STACK_TOP_P (operands[1]));
15054 gcc_assert (MEM_P (operands[0]));
15055 gcc_assert (GET_MODE (operands[1]) != TFmode);
15058 output_asm_insn ("fisttp%Z0\t%0", operands);
15061 if (round_mode != I387_CW_ANY)
15062 output_asm_insn ("fldcw\t%3", operands);
15063 if (stack_top_dies || dimode_p)
15064 output_asm_insn ("fistp%Z0\t%0", operands);
15066 output_asm_insn ("fist%Z0\t%0", operands);
15067 if (round_mode != I387_CW_ANY)
15068 output_asm_insn ("fldcw\t%2", operands);
15074 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15075 have the values zero or one, indicates the ffreep insn's operand
15076 from the OPERANDS array. */
15078 static const char *
15079 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15081 if (TARGET_USE_FFREEP)
15082 #ifdef HAVE_AS_IX86_FFREEP
15083 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15086 static char retval[32];
15087 int regno = REGNO (operands[opno]);
15089 gcc_assert (FP_REGNO_P (regno));
15091 regno -= FIRST_STACK_REG;
15093 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15098 return opno ? "fstp\t%y1" : "fstp\t%y0";
15102 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15103 should be used. UNORDERED_P is true when fucom should be used. */
15106 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
15108 int stack_top_dies;
15109 rtx cmp_op0, cmp_op1;
15110 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
15114 cmp_op0 = operands[0];
15115 cmp_op1 = operands[1];
15119 cmp_op0 = operands[1];
15120 cmp_op1 = operands[2];
15125 if (GET_MODE (operands[0]) == SFmode)
15127 return "%vucomiss\t{%1, %0|%0, %1}";
15129 return "%vcomiss\t{%1, %0|%0, %1}";
15132 return "%vucomisd\t{%1, %0|%0, %1}";
15134 return "%vcomisd\t{%1, %0|%0, %1}";
15137 gcc_assert (STACK_TOP_P (cmp_op0));
15139 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15141 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15143 if (stack_top_dies)
15145 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15146 return output_387_ffreep (operands, 1);
15149 return "ftst\n\tfnstsw\t%0";
15152 if (STACK_REG_P (cmp_op1)
15154 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15155 && REGNO (cmp_op1) != FIRST_STACK_REG)
15157 /* If both the top of the 387 stack dies, and the other operand
15158 is also a stack register that dies, then this must be a
15159 `fcompp' float compare */
15163 /* There is no double popping fcomi variant. Fortunately,
15164 eflags is immune from the fstp's cc clobbering. */
15166 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15168 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15169 return output_387_ffreep (operands, 0);
15174 return "fucompp\n\tfnstsw\t%0";
15176 return "fcompp\n\tfnstsw\t%0";
15181 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15183 static const char * const alt[16] =
15185 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15186 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15187 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15188 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15190 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15191 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15195 "fcomi\t{%y1, %0|%0, %y1}",
15196 "fcomip\t{%y1, %0|%0, %y1}",
15197 "fucomi\t{%y1, %0|%0, %y1}",
15198 "fucomip\t{%y1, %0|%0, %y1}",
15209 mask = eflags_p << 3;
15210 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15211 mask |= unordered_p << 1;
15212 mask |= stack_top_dies;
15214 gcc_assert (mask < 16);
15223 ix86_output_addr_vec_elt (FILE *file, int value)
15225 const char *directive = ASM_LONG;
15229 directive = ASM_QUAD;
15231 gcc_assert (!TARGET_64BIT);
15234 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15238 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15240 const char *directive = ASM_LONG;
15243 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15244 directive = ASM_QUAD;
15246 gcc_assert (!TARGET_64BIT);
15248 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15249 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15250 fprintf (file, "%s%s%d-%s%d\n",
15251 directive, LPREFIX, value, LPREFIX, rel);
15252 else if (HAVE_AS_GOTOFF_IN_DATA)
15253 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15255 else if (TARGET_MACHO)
15257 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15258 machopic_output_function_base_name (file);
15263 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15264 GOT_SYMBOL_NAME, LPREFIX, value);
15267 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15271 ix86_expand_clear (rtx dest)
15275 /* We play register width games, which are only valid after reload. */
15276 gcc_assert (reload_completed);
15278 /* Avoid HImode and its attendant prefix byte. */
15279 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15280 dest = gen_rtx_REG (SImode, REGNO (dest));
15281 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15283 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15284 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15286 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15287 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15293 /* X is an unchanging MEM. If it is a constant pool reference, return
15294 the constant pool rtx, else NULL. */
15297 maybe_get_pool_constant (rtx x)
15299 x = ix86_delegitimize_address (XEXP (x, 0));
15301 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15302 return get_pool_constant (x);
15308 ix86_expand_move (enum machine_mode mode, rtx operands[])
15311 enum tls_model model;
15316 if (GET_CODE (op1) == SYMBOL_REF)
15318 model = SYMBOL_REF_TLS_MODEL (op1);
15321 op1 = legitimize_tls_address (op1, model, true);
15322 op1 = force_operand (op1, op0);
15325 if (GET_MODE (op1) != mode)
15326 op1 = convert_to_mode (mode, op1, 1);
15328 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15329 && SYMBOL_REF_DLLIMPORT_P (op1))
15330 op1 = legitimize_dllimport_symbol (op1, false);
15332 else if (GET_CODE (op1) == CONST
15333 && GET_CODE (XEXP (op1, 0)) == PLUS
15334 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15336 rtx addend = XEXP (XEXP (op1, 0), 1);
15337 rtx symbol = XEXP (XEXP (op1, 0), 0);
15340 model = SYMBOL_REF_TLS_MODEL (symbol);
15342 tmp = legitimize_tls_address (symbol, model, true);
15343 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15344 && SYMBOL_REF_DLLIMPORT_P (symbol))
15345 tmp = legitimize_dllimport_symbol (symbol, true);
15349 tmp = force_operand (tmp, NULL);
15350 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15351 op0, 1, OPTAB_DIRECT);
15354 if (GET_MODE (tmp) != mode)
15355 op1 = convert_to_mode (mode, tmp, 1);
15359 if ((flag_pic || MACHOPIC_INDIRECT)
15360 && symbolic_operand (op1, mode))
15362 if (TARGET_MACHO && !TARGET_64BIT)
15365 /* dynamic-no-pic */
15366 if (MACHOPIC_INDIRECT)
15368 rtx temp = ((reload_in_progress
15369 || ((op0 && REG_P (op0))
15371 ? op0 : gen_reg_rtx (Pmode));
15372 op1 = machopic_indirect_data_reference (op1, temp);
15374 op1 = machopic_legitimize_pic_address (op1, mode,
15375 temp == op1 ? 0 : temp);
15377 if (op0 != op1 && GET_CODE (op0) != MEM)
15379 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15383 if (GET_CODE (op0) == MEM)
15384 op1 = force_reg (Pmode, op1);
15388 if (GET_CODE (temp) != REG)
15389 temp = gen_reg_rtx (Pmode);
15390 temp = legitimize_pic_address (op1, temp);
15395 /* dynamic-no-pic */
15401 op1 = force_reg (mode, op1);
15402 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
15404 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15405 op1 = legitimize_pic_address (op1, reg);
15408 if (GET_MODE (op1) != mode)
15409 op1 = convert_to_mode (mode, op1, 1);
15416 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15417 || !push_operand (op0, mode))
15419 op1 = force_reg (mode, op1);
15421 if (push_operand (op0, mode)
15422 && ! general_no_elim_operand (op1, mode))
15423 op1 = copy_to_mode_reg (mode, op1);
15425 /* Force large constants in 64bit compilation into register
15426 to get them CSEed. */
15427 if (can_create_pseudo_p ()
15428 && (mode == DImode) && TARGET_64BIT
15429 && immediate_operand (op1, mode)
15430 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15431 && !register_operand (op0, mode)
15433 op1 = copy_to_mode_reg (mode, op1);
15435 if (can_create_pseudo_p ()
15436 && FLOAT_MODE_P (mode)
15437 && GET_CODE (op1) == CONST_DOUBLE)
15439 /* If we are loading a floating point constant to a register,
15440 force the value to memory now, since we'll get better code
15441 out the back end. */
15443 op1 = validize_mem (force_const_mem (mode, op1));
15444 if (!register_operand (op0, mode))
15446 rtx temp = gen_reg_rtx (mode);
15447 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15448 emit_move_insn (op0, temp);
15454 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15458 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15460 rtx op0 = operands[0], op1 = operands[1];
15461 unsigned int align = GET_MODE_ALIGNMENT (mode);
15463 /* Force constants other than zero into memory. We do not know how
15464 the instructions used to build constants modify the upper 64 bits
15465 of the register, once we have that information we may be able
15466 to handle some of them more efficiently. */
15467 if (can_create_pseudo_p ()
15468 && register_operand (op0, mode)
15469 && (CONSTANT_P (op1)
15470 || (GET_CODE (op1) == SUBREG
15471 && CONSTANT_P (SUBREG_REG (op1))))
15472 && !standard_sse_constant_p (op1))
15473 op1 = validize_mem (force_const_mem (mode, op1));
15475 /* We need to check memory alignment for SSE mode since attribute
15476 can make operands unaligned. */
15477 if (can_create_pseudo_p ()
15478 && SSE_REG_MODE_P (mode)
15479 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15480 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15484 /* ix86_expand_vector_move_misalign() does not like constants ... */
15485 if (CONSTANT_P (op1)
15486 || (GET_CODE (op1) == SUBREG
15487 && CONSTANT_P (SUBREG_REG (op1))))
15488 op1 = validize_mem (force_const_mem (mode, op1));
15490 /* ... nor both arguments in memory. */
15491 if (!register_operand (op0, mode)
15492 && !register_operand (op1, mode))
15493 op1 = force_reg (mode, op1);
15495 tmp[0] = op0; tmp[1] = op1;
15496 ix86_expand_vector_move_misalign (mode, tmp);
15500 /* Make operand1 a register if it isn't already. */
15501 if (can_create_pseudo_p ()
15502 && !register_operand (op0, mode)
15503 && !register_operand (op1, mode))
15505 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15509 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15512 /* Split 32-byte AVX unaligned load and store if needed. */
15515 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15518 rtx (*extract) (rtx, rtx, rtx);
15519 rtx (*move_unaligned) (rtx, rtx);
15520 enum machine_mode mode;
15522 switch (GET_MODE (op0))
15525 gcc_unreachable ();
15527 extract = gen_avx_vextractf128v32qi;
15528 move_unaligned = gen_avx_movdqu256;
15532 extract = gen_avx_vextractf128v8sf;
15533 move_unaligned = gen_avx_movups256;
15537 extract = gen_avx_vextractf128v4df;
15538 move_unaligned = gen_avx_movupd256;
15543 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15545 rtx r = gen_reg_rtx (mode);
15546 m = adjust_address (op1, mode, 0);
15547 emit_move_insn (r, m);
15548 m = adjust_address (op1, mode, 16);
15549 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15550 emit_move_insn (op0, r);
15552 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15554 m = adjust_address (op0, mode, 0);
15555 emit_insn (extract (m, op1, const0_rtx));
15556 m = adjust_address (op0, mode, 16);
15557 emit_insn (extract (m, op1, const1_rtx));
15560 emit_insn (move_unaligned (op0, op1));
15563 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15564 straight to ix86_expand_vector_move. */
15565 /* Code generation for scalar reg-reg moves of single and double precision data:
15566 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15570 if (x86_sse_partial_reg_dependency == true)
15575 Code generation for scalar loads of double precision data:
15576 if (x86_sse_split_regs == true)
15577 movlpd mem, reg (gas syntax)
15581 Code generation for unaligned packed loads of single precision data
15582 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15583 if (x86_sse_unaligned_move_optimal)
15586 if (x86_sse_partial_reg_dependency == true)
15598 Code generation for unaligned packed loads of double precision data
15599 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15600 if (x86_sse_unaligned_move_optimal)
15603 if (x86_sse_split_regs == true)
15616 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15625 switch (GET_MODE_CLASS (mode))
15627 case MODE_VECTOR_INT:
15629 switch (GET_MODE_SIZE (mode))
15632 /* If we're optimizing for size, movups is the smallest. */
15633 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15635 op0 = gen_lowpart (V4SFmode, op0);
15636 op1 = gen_lowpart (V4SFmode, op1);
15637 emit_insn (gen_sse_movups (op0, op1));
15640 op0 = gen_lowpart (V16QImode, op0);
15641 op1 = gen_lowpart (V16QImode, op1);
15642 emit_insn (gen_sse2_movdqu (op0, op1));
15645 op0 = gen_lowpart (V32QImode, op0);
15646 op1 = gen_lowpart (V32QImode, op1);
15647 ix86_avx256_split_vector_move_misalign (op0, op1);
15650 gcc_unreachable ();
15653 case MODE_VECTOR_FLOAT:
15654 op0 = gen_lowpart (mode, op0);
15655 op1 = gen_lowpart (mode, op1);
15660 emit_insn (gen_sse_movups (op0, op1));
15663 ix86_avx256_split_vector_move_misalign (op0, op1);
15666 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15668 op0 = gen_lowpart (V4SFmode, op0);
15669 op1 = gen_lowpart (V4SFmode, op1);
15670 emit_insn (gen_sse_movups (op0, op1));
15673 emit_insn (gen_sse2_movupd (op0, op1));
15676 ix86_avx256_split_vector_move_misalign (op0, op1);
15679 gcc_unreachable ();
15684 gcc_unreachable ();
15692 /* If we're optimizing for size, movups is the smallest. */
15693 if (optimize_insn_for_size_p ()
15694 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15696 op0 = gen_lowpart (V4SFmode, op0);
15697 op1 = gen_lowpart (V4SFmode, op1);
15698 emit_insn (gen_sse_movups (op0, op1));
15702 /* ??? If we have typed data, then it would appear that using
15703 movdqu is the only way to get unaligned data loaded with
15705 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15707 op0 = gen_lowpart (V16QImode, op0);
15708 op1 = gen_lowpart (V16QImode, op1);
15709 emit_insn (gen_sse2_movdqu (op0, op1));
15713 if (TARGET_SSE2 && mode == V2DFmode)
15717 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15719 op0 = gen_lowpart (V2DFmode, op0);
15720 op1 = gen_lowpart (V2DFmode, op1);
15721 emit_insn (gen_sse2_movupd (op0, op1));
15725 /* When SSE registers are split into halves, we can avoid
15726 writing to the top half twice. */
15727 if (TARGET_SSE_SPLIT_REGS)
15729 emit_clobber (op0);
15734 /* ??? Not sure about the best option for the Intel chips.
15735 The following would seem to satisfy; the register is
15736 entirely cleared, breaking the dependency chain. We
15737 then store to the upper half, with a dependency depth
15738 of one. A rumor has it that Intel recommends two movsd
15739 followed by an unpacklpd, but this is unconfirmed. And
15740 given that the dependency depth of the unpacklpd would
15741 still be one, I'm not sure why this would be better. */
15742 zero = CONST0_RTX (V2DFmode);
15745 m = adjust_address (op1, DFmode, 0);
15746 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15747 m = adjust_address (op1, DFmode, 8);
15748 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15752 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15754 op0 = gen_lowpart (V4SFmode, op0);
15755 op1 = gen_lowpart (V4SFmode, op1);
15756 emit_insn (gen_sse_movups (op0, op1));
15760 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15761 emit_move_insn (op0, CONST0_RTX (mode));
15763 emit_clobber (op0);
15765 if (mode != V4SFmode)
15766 op0 = gen_lowpart (V4SFmode, op0);
15767 m = adjust_address (op1, V2SFmode, 0);
15768 emit_insn (gen_sse_loadlps (op0, op0, m));
15769 m = adjust_address (op1, V2SFmode, 8);
15770 emit_insn (gen_sse_loadhps (op0, op0, m));
15773 else if (MEM_P (op0))
15775 /* If we're optimizing for size, movups is the smallest. */
15776 if (optimize_insn_for_size_p ()
15777 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15779 op0 = gen_lowpart (V4SFmode, op0);
15780 op1 = gen_lowpart (V4SFmode, op1);
15781 emit_insn (gen_sse_movups (op0, op1));
15785 /* ??? Similar to above, only less clear because of quote
15786 typeless stores unquote. */
15787 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15788 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15790 op0 = gen_lowpart (V16QImode, op0);
15791 op1 = gen_lowpart (V16QImode, op1);
15792 emit_insn (gen_sse2_movdqu (op0, op1));
15796 if (TARGET_SSE2 && mode == V2DFmode)
15798 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15800 op0 = gen_lowpart (V2DFmode, op0);
15801 op1 = gen_lowpart (V2DFmode, op1);
15802 emit_insn (gen_sse2_movupd (op0, op1));
15806 m = adjust_address (op0, DFmode, 0);
15807 emit_insn (gen_sse2_storelpd (m, op1));
15808 m = adjust_address (op0, DFmode, 8);
15809 emit_insn (gen_sse2_storehpd (m, op1));
15814 if (mode != V4SFmode)
15815 op1 = gen_lowpart (V4SFmode, op1);
15817 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15819 op0 = gen_lowpart (V4SFmode, op0);
15820 emit_insn (gen_sse_movups (op0, op1));
15824 m = adjust_address (op0, V2SFmode, 0);
15825 emit_insn (gen_sse_storelps (m, op1));
15826 m = adjust_address (op0, V2SFmode, 8);
15827 emit_insn (gen_sse_storehps (m, op1));
15832 gcc_unreachable ();
15835 /* Expand a push in MODE. This is some mode for which we do not support
15836 proper push instructions, at least from the registers that we expect
15837 the value to live in. */
15840 ix86_expand_push (enum machine_mode mode, rtx x)
15844 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15845 GEN_INT (-GET_MODE_SIZE (mode)),
15846 stack_pointer_rtx, 1, OPTAB_DIRECT);
15847 if (tmp != stack_pointer_rtx)
15848 emit_move_insn (stack_pointer_rtx, tmp);
15850 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15852 /* When we push an operand onto stack, it has to be aligned at least
15853 at the function argument boundary. However since we don't have
15854 the argument type, we can't determine the actual argument
15856 emit_move_insn (tmp, x);
15859 /* Helper function of ix86_fixup_binary_operands to canonicalize
15860 operand order. Returns true if the operands should be swapped. */
15863 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15866 rtx dst = operands[0];
15867 rtx src1 = operands[1];
15868 rtx src2 = operands[2];
15870 /* If the operation is not commutative, we can't do anything. */
15871 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15874 /* Highest priority is that src1 should match dst. */
15875 if (rtx_equal_p (dst, src1))
15877 if (rtx_equal_p (dst, src2))
15880 /* Next highest priority is that immediate constants come second. */
15881 if (immediate_operand (src2, mode))
15883 if (immediate_operand (src1, mode))
15886 /* Lowest priority is that memory references should come second. */
15896 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15897 destination to use for the operation. If different from the true
15898 destination in operands[0], a copy operation will be required. */
15901 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15904 rtx dst = operands[0];
15905 rtx src1 = operands[1];
15906 rtx src2 = operands[2];
15908 /* Canonicalize operand order. */
15909 if (ix86_swap_binary_operands_p (code, mode, operands))
15913 /* It is invalid to swap operands of different modes. */
15914 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15921 /* Both source operands cannot be in memory. */
15922 if (MEM_P (src1) && MEM_P (src2))
15924 /* Optimization: Only read from memory once. */
15925 if (rtx_equal_p (src1, src2))
15927 src2 = force_reg (mode, src2);
15931 src2 = force_reg (mode, src2);
15934 /* If the destination is memory, and we do not have matching source
15935 operands, do things in registers. */
15936 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15937 dst = gen_reg_rtx (mode);
15939 /* Source 1 cannot be a constant. */
15940 if (CONSTANT_P (src1))
15941 src1 = force_reg (mode, src1);
15943 /* Source 1 cannot be a non-matching memory. */
15944 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15945 src1 = force_reg (mode, src1);
15947 /* Improve address combine. */
15949 && GET_MODE_CLASS (mode) == MODE_INT
15951 src2 = force_reg (mode, src2);
15953 operands[1] = src1;
15954 operands[2] = src2;
15958 /* Similarly, but assume that the destination has already been
15959 set up properly. */
15962 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15963 enum machine_mode mode, rtx operands[])
15965 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15966 gcc_assert (dst == operands[0]);
15969 /* Attempt to expand a binary operator. Make the expansion closer to the
15970 actual machine, then just general_operand, which will allow 3 separate
15971 memory references (one output, two input) in a single insn. */
15974 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15977 rtx src1, src2, dst, op, clob;
15979 dst = ix86_fixup_binary_operands (code, mode, operands);
15980 src1 = operands[1];
15981 src2 = operands[2];
15983 /* Emit the instruction. */
15985 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15986 if (reload_in_progress)
15988 /* Reload doesn't know about the flags register, and doesn't know that
15989 it doesn't want to clobber it. We can only do this with PLUS. */
15990 gcc_assert (code == PLUS);
15993 else if (reload_completed
15995 && !rtx_equal_p (dst, src1))
15997 /* This is going to be an LEA; avoid splitting it later. */
16002 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16003 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16006 /* Fix up the destination if needed. */
16007 if (dst != operands[0])
16008 emit_move_insn (operands[0], dst);
16011 /* Return TRUE or FALSE depending on whether the binary operator meets the
16012 appropriate constraints. */
16015 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
16018 rtx dst = operands[0];
16019 rtx src1 = operands[1];
16020 rtx src2 = operands[2];
16022 /* Both source operands cannot be in memory. */
16023 if (MEM_P (src1) && MEM_P (src2))
16026 /* Canonicalize operand order for commutative operators. */
16027 if (ix86_swap_binary_operands_p (code, mode, operands))
16034 /* If the destination is memory, we must have a matching source operand. */
16035 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16038 /* Source 1 cannot be a constant. */
16039 if (CONSTANT_P (src1))
16042 /* Source 1 cannot be a non-matching memory. */
16043 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16044 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16045 return (code == AND
16048 || (TARGET_64BIT && mode == DImode))
16049 && satisfies_constraint_L (src2));
16054 /* Attempt to expand a unary operator. Make the expansion closer to the
16055 actual machine, then just general_operand, which will allow 2 separate
16056 memory references (one output, one input) in a single insn. */
16059 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
16062 int matching_memory;
16063 rtx src, dst, op, clob;
16068 /* If the destination is memory, and we do not have matching source
16069 operands, do things in registers. */
16070 matching_memory = 0;
16073 if (rtx_equal_p (dst, src))
16074 matching_memory = 1;
16076 dst = gen_reg_rtx (mode);
16079 /* When source operand is memory, destination must match. */
16080 if (MEM_P (src) && !matching_memory)
16081 src = force_reg (mode, src);
16083 /* Emit the instruction. */
16085 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
16086 if (reload_in_progress || code == NOT)
16088 /* Reload doesn't know about the flags register, and doesn't know that
16089 it doesn't want to clobber it. */
16090 gcc_assert (code == NOT);
16095 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16096 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16099 /* Fix up the destination if needed. */
16100 if (dst != operands[0])
16101 emit_move_insn (operands[0], dst);
16104 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16105 divisor are within the range [0-255]. */
16108 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
16111 rtx end_label, qimode_label;
16112 rtx insn, div, mod;
16113 rtx scratch, tmp0, tmp1, tmp2;
16114 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
16115 rtx (*gen_zero_extend) (rtx, rtx);
16116 rtx (*gen_test_ccno_1) (rtx, rtx);
16121 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
16122 gen_test_ccno_1 = gen_testsi_ccno_1;
16123 gen_zero_extend = gen_zero_extendqisi2;
16126 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
16127 gen_test_ccno_1 = gen_testdi_ccno_1;
16128 gen_zero_extend = gen_zero_extendqidi2;
16131 gcc_unreachable ();
16134 end_label = gen_label_rtx ();
16135 qimode_label = gen_label_rtx ();
16137 scratch = gen_reg_rtx (mode);
16139 /* Use 8bit unsigned divimod if dividend and divisor are within
16140 the range [0-255]. */
16141 emit_move_insn (scratch, operands[2]);
16142 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
16143 scratch, 1, OPTAB_DIRECT);
16144 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
16145 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
16146 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16147 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16148 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16150 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16151 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16152 JUMP_LABEL (insn) = qimode_label;
16154 /* Generate original signed/unsigned divimod. */
16155 div = gen_divmod4_1 (operands[0], operands[1],
16156 operands[2], operands[3]);
16159 /* Branch to the end. */
16160 emit_jump_insn (gen_jump (end_label));
16163 /* Generate 8bit unsigned divide. */
16164 emit_label (qimode_label);
16165 /* Don't use operands[0] for result of 8bit divide since not all
16166 registers support QImode ZERO_EXTRACT. */
16167 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16168 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16169 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16170 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16174 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16175 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16179 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16180 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16183 /* Extract remainder from AH. */
16184 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16185 if (REG_P (operands[1]))
16186 insn = emit_move_insn (operands[1], tmp1);
16189 /* Need a new scratch register since the old one has result
16191 scratch = gen_reg_rtx (mode);
16192 emit_move_insn (scratch, tmp1);
16193 insn = emit_move_insn (operands[1], scratch);
16195 set_unique_reg_note (insn, REG_EQUAL, mod);
16197 /* Zero extend quotient from AL. */
16198 tmp1 = gen_lowpart (QImode, tmp0);
16199 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16200 set_unique_reg_note (insn, REG_EQUAL, div);
16202 emit_label (end_label);
16205 #define LEA_MAX_STALL (3)
16206 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16208 /* Increase given DISTANCE in half-cycles according to
16209 dependencies between PREV and NEXT instructions.
16210 Add 1 half-cycle if there is no dependency and
16211 go to next cycle if there is some dependecy. */
16213 static unsigned int
16214 increase_distance (rtx prev, rtx next, unsigned int distance)
16219 if (!prev || !next)
16220 return distance + (distance & 1) + 2;
16222 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
16223 return distance + 1;
16225 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16226 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16227 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
16228 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
16229 return distance + (distance & 1) + 2;
16231 return distance + 1;
16234 /* Function checks if instruction INSN defines register number
16235 REGNO1 or REGNO2. */
16238 insn_defines_reg (unsigned int regno1, unsigned int regno2,
16243 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
16244 if (DF_REF_REG_DEF_P (*def_rec)
16245 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16246 && (regno1 == DF_REF_REGNO (*def_rec)
16247 || regno2 == DF_REF_REGNO (*def_rec)))
16255 /* Function checks if instruction INSN uses register number
16256 REGNO as a part of address expression. */
16259 insn_uses_reg_mem (unsigned int regno, rtx insn)
16263 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
16264 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
16270 /* Search backward for non-agu definition of register number REGNO1
16271 or register number REGNO2 in basic block starting from instruction
16272 START up to head of basic block or instruction INSN.
16274 Function puts true value into *FOUND var if definition was found
16275 and false otherwise.
16277 Distance in half-cycles between START and found instruction or head
16278 of BB is added to DISTANCE and returned. */
16281 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
16282 rtx insn, int distance,
16283 rtx start, bool *found)
16285 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16293 && distance < LEA_SEARCH_THRESHOLD)
16295 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
16297 distance = increase_distance (prev, next, distance);
16298 if (insn_defines_reg (regno1, regno2, prev))
16300 if (recog_memoized (prev) < 0
16301 || get_attr_type (prev) != TYPE_LEA)
16310 if (prev == BB_HEAD (bb))
16313 prev = PREV_INSN (prev);
16319 /* Search backward for non-agu definition of register number REGNO1
16320 or register number REGNO2 in INSN's basic block until
16321 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16322 2. Reach neighbour BBs boundary, or
16323 3. Reach agu definition.
16324 Returns the distance between the non-agu definition point and INSN.
16325 If no definition point, returns -1. */
16328 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16331 basic_block bb = BLOCK_FOR_INSN (insn);
16333 bool found = false;
16335 if (insn != BB_HEAD (bb))
16336 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
16337 distance, PREV_INSN (insn),
16340 if (!found && distance < LEA_SEARCH_THRESHOLD)
16344 bool simple_loop = false;
16346 FOR_EACH_EDGE (e, ei, bb->preds)
16349 simple_loop = true;
16354 distance = distance_non_agu_define_in_bb (regno1, regno2,
16356 BB_END (bb), &found);
16359 int shortest_dist = -1;
16360 bool found_in_bb = false;
16362 FOR_EACH_EDGE (e, ei, bb->preds)
16365 = distance_non_agu_define_in_bb (regno1, regno2,
16371 if (shortest_dist < 0)
16372 shortest_dist = bb_dist;
16373 else if (bb_dist > 0)
16374 shortest_dist = MIN (bb_dist, shortest_dist);
16380 distance = shortest_dist;
16384 /* get_attr_type may modify recog data. We want to make sure
16385 that recog data is valid for instruction INSN, on which
16386 distance_non_agu_define is called. INSN is unchanged here. */
16387 extract_insn_cached (insn);
16392 return distance >> 1;
16395 /* Return the distance in half-cycles between INSN and the next
16396 insn that uses register number REGNO in memory address added
16397 to DISTANCE. Return -1 if REGNO0 is set.
16399 Put true value into *FOUND if register usage was found and
16401 Put true value into *REDEFINED if register redefinition was
16402 found and false otherwise. */
16405 distance_agu_use_in_bb (unsigned int regno,
16406 rtx insn, int distance, rtx start,
16407 bool *found, bool *redefined)
16409 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16414 *redefined = false;
16418 && distance < LEA_SEARCH_THRESHOLD)
16420 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
16422 distance = increase_distance(prev, next, distance);
16423 if (insn_uses_reg_mem (regno, next))
16425 /* Return DISTANCE if OP0 is used in memory
16426 address in NEXT. */
16431 if (insn_defines_reg (regno, INVALID_REGNUM, next))
16433 /* Return -1 if OP0 is set in NEXT. */
16441 if (next == BB_END (bb))
16444 next = NEXT_INSN (next);
16450 /* Return the distance between INSN and the next insn that uses
16451 register number REGNO0 in memory address. Return -1 if no such
16452 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16455 distance_agu_use (unsigned int regno0, rtx insn)
16457 basic_block bb = BLOCK_FOR_INSN (insn);
16459 bool found = false;
16460 bool redefined = false;
16462 if (insn != BB_END (bb))
16463 distance = distance_agu_use_in_bb (regno0, insn, distance,
16465 &found, &redefined);
16467 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
16471 bool simple_loop = false;
16473 FOR_EACH_EDGE (e, ei, bb->succs)
16476 simple_loop = true;
16481 distance = distance_agu_use_in_bb (regno0, insn,
16482 distance, BB_HEAD (bb),
16483 &found, &redefined);
16486 int shortest_dist = -1;
16487 bool found_in_bb = false;
16488 bool redefined_in_bb = false;
16490 FOR_EACH_EDGE (e, ei, bb->succs)
16493 = distance_agu_use_in_bb (regno0, insn,
16494 distance, BB_HEAD (e->dest),
16495 &found_in_bb, &redefined_in_bb);
16498 if (shortest_dist < 0)
16499 shortest_dist = bb_dist;
16500 else if (bb_dist > 0)
16501 shortest_dist = MIN (bb_dist, shortest_dist);
16507 distance = shortest_dist;
16511 if (!found || redefined)
16514 return distance >> 1;
16517 /* Define this macro to tune LEA priority vs ADD, it take effect when
16518 there is a dilemma of choicing LEA or ADD
16519 Negative value: ADD is more preferred than LEA
16521 Positive value: LEA is more preferred than ADD*/
16522 #define IX86_LEA_PRIORITY 0
16524 /* Return true if usage of lea INSN has performance advantage
16525 over a sequence of instructions. Instructions sequence has
16526 SPLIT_COST cycles higher latency than lea latency. */
16529 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
16530 unsigned int regno2, unsigned int split_cost)
16532 int dist_define, dist_use;
16534 dist_define = distance_non_agu_define (regno1, regno2, insn);
16535 dist_use = distance_agu_use (regno0, insn);
16537 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
16539 /* If there is no non AGU operand definition, no AGU
16540 operand usage and split cost is 0 then both lea
16541 and non lea variants have same priority. Currently
16542 we prefer lea for 64 bit code and non lea on 32 bit
16544 if (dist_use < 0 && split_cost == 0)
16545 return TARGET_64BIT || IX86_LEA_PRIORITY;
16550 /* With longer definitions distance lea is more preferable.
16551 Here we change it to take into account splitting cost and
16553 dist_define += split_cost + IX86_LEA_PRIORITY;
16555 /* If there is no use in memory addess then we just check
16556 that split cost does not exceed AGU stall. */
16558 return dist_define >= LEA_MAX_STALL;
16560 /* If this insn has both backward non-agu dependence and forward
16561 agu dependence, the one with short distance takes effect. */
16562 return dist_define >= dist_use;
16565 /* Return true if it is legal to clobber flags by INSN and
16566 false otherwise. */
16569 ix86_ok_to_clobber_flags (rtx insn)
16571 basic_block bb = BLOCK_FOR_INSN (insn);
16577 if (NONDEBUG_INSN_P (insn))
16579 for (use = DF_INSN_USES (insn); *use; use++)
16580 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
16583 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
16587 if (insn == BB_END (bb))
16590 insn = NEXT_INSN (insn);
16593 live = df_get_live_out(bb);
16594 return !REGNO_REG_SET_P (live, FLAGS_REG);
16597 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16598 move and add to avoid AGU stalls. */
16601 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
16603 unsigned int regno0 = true_regnum (operands[0]);
16604 unsigned int regno1 = true_regnum (operands[1]);
16605 unsigned int regno2 = true_regnum (operands[2]);
16607 /* Check if we need to optimize. */
16608 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16611 /* Check it is correct to split here. */
16612 if (!ix86_ok_to_clobber_flags(insn))
16615 /* We need to split only adds with non destructive
16616 destination operand. */
16617 if (regno0 == regno1 || regno0 == regno2)
16620 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
16623 /* Return true if we should emit lea instruction instead of mov
16627 ix86_use_lea_for_mov (rtx insn, rtx operands[])
16629 unsigned int regno0;
16630 unsigned int regno1;
16632 /* Check if we need to optimize. */
16633 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16636 /* Use lea for reg to reg moves only. */
16637 if (!REG_P (operands[0]) || !REG_P (operands[1]))
16640 regno0 = true_regnum (operands[0]);
16641 regno1 = true_regnum (operands[1]);
16643 return ix86_lea_outperforms (insn, regno0, regno1, -1, 0);
16646 /* Return true if we need to split lea into a sequence of
16647 instructions to avoid AGU stalls. */
16650 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
16652 unsigned int regno0 = true_regnum (operands[0]) ;
16653 unsigned int regno1 = -1;
16654 unsigned int regno2 = -1;
16655 unsigned int split_cost = 0;
16656 struct ix86_address parts;
16659 /* Check we need to optimize. */
16660 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16663 /* Check it is correct to split here. */
16664 if (!ix86_ok_to_clobber_flags(insn))
16667 ok = ix86_decompose_address (operands[1], &parts);
16670 /* We should not split into add if non legitimate pic
16671 operand is used as displacement. */
16672 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
16676 regno1 = true_regnum (parts.base);
16678 regno2 = true_regnum (parts.index);
16680 /* Compute how many cycles we will add to execution time
16681 if split lea into a sequence of instructions. */
16682 if (parts.base || parts.index)
16684 /* Have to use mov instruction if non desctructive
16685 destination form is used. */
16686 if (regno1 != regno0 && regno2 != regno0)
16689 /* Have to add index to base if both exist. */
16690 if (parts.base && parts.index)
16693 /* Have to use shift and adds if scale is 2 or greater. */
16694 if (parts.scale > 1)
16696 if (regno0 != regno1)
16698 else if (regno2 == regno0)
16701 split_cost += parts.scale;
16704 /* Have to use add instruction with immediate if
16705 disp is non zero. */
16706 if (parts.disp && parts.disp != const0_rtx)
16709 /* Subtract the price of lea. */
16713 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
16716 /* Emit x86 binary operand CODE in mode MODE, where the first operand
16717 matches destination. RTX includes clobber of FLAGS_REG. */
16720 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
16725 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
16726 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16728 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16731 /* Split lea instructions into a sequence of instructions
16732 which are executed on ALU to avoid AGU stalls.
16733 It is assumed that it is allowed to clobber flags register
16734 at lea position. */
16737 ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
16739 unsigned int regno0 = true_regnum (operands[0]) ;
16740 unsigned int regno1 = INVALID_REGNUM;
16741 unsigned int regno2 = INVALID_REGNUM;
16742 struct ix86_address parts;
16746 ok = ix86_decompose_address (operands[1], &parts);
16751 if (GET_MODE (parts.base) != mode)
16752 parts.base = gen_rtx_SUBREG (mode, parts.base, 0);
16753 regno1 = true_regnum (parts.base);
16758 if (GET_MODE (parts.index) != mode)
16759 parts.index = gen_rtx_SUBREG (mode, parts.index, 0);
16760 regno2 = true_regnum (parts.index);
16763 if (parts.scale > 1)
16765 /* Case r1 = r1 + ... */
16766 if (regno1 == regno0)
16768 /* If we have a case r1 = r1 + C * r1 then we
16769 should use multiplication which is very
16770 expensive. Assume cost model is wrong if we
16771 have such case here. */
16772 gcc_assert (regno2 != regno0);
16774 for (adds = parts.scale; adds > 0; adds--)
16775 ix86_emit_binop (PLUS, mode, operands[0], parts.index);
16779 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
16780 if (regno0 != regno2)
16781 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
16783 /* Use shift for scaling. */
16784 ix86_emit_binop (ASHIFT, mode, operands[0],
16785 GEN_INT (exact_log2 (parts.scale)));
16788 ix86_emit_binop (PLUS, mode, operands[0], parts.base);
16790 if (parts.disp && parts.disp != const0_rtx)
16791 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
16794 else if (!parts.base && !parts.index)
16796 gcc_assert(parts.disp);
16797 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.disp));
16803 if (regno0 != regno2)
16804 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
16806 else if (!parts.index)
16808 if (regno0 != regno1)
16809 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
16813 if (regno0 == regno1)
16815 else if (regno0 == regno2)
16819 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
16823 ix86_emit_binop (PLUS, mode, operands[0], tmp);
16826 if (parts.disp && parts.disp != const0_rtx)
16827 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
16831 /* Return true if it is ok to optimize an ADD operation to LEA
16832 operation to avoid flag register consumation. For most processors,
16833 ADD is faster than LEA. For the processors like ATOM, if the
16834 destination register of LEA holds an actual address which will be
16835 used soon, LEA is better and otherwise ADD is better. */
16838 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16840 unsigned int regno0 = true_regnum (operands[0]);
16841 unsigned int regno1 = true_regnum (operands[1]);
16842 unsigned int regno2 = true_regnum (operands[2]);
16844 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16845 if (regno0 != regno1 && regno0 != regno2)
16848 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16851 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
16854 /* Return true if destination reg of SET_BODY is shift count of
16858 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16864 /* Retrieve destination of SET_BODY. */
16865 switch (GET_CODE (set_body))
16868 set_dest = SET_DEST (set_body);
16869 if (!set_dest || !REG_P (set_dest))
16873 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16874 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16882 /* Retrieve shift count of USE_BODY. */
16883 switch (GET_CODE (use_body))
16886 shift_rtx = XEXP (use_body, 1);
16889 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16890 if (ix86_dep_by_shift_count_body (set_body,
16891 XVECEXP (use_body, 0, i)))
16899 && (GET_CODE (shift_rtx) == ASHIFT
16900 || GET_CODE (shift_rtx) == LSHIFTRT
16901 || GET_CODE (shift_rtx) == ASHIFTRT
16902 || GET_CODE (shift_rtx) == ROTATE
16903 || GET_CODE (shift_rtx) == ROTATERT))
16905 rtx shift_count = XEXP (shift_rtx, 1);
16907 /* Return true if shift count is dest of SET_BODY. */
16908 if (REG_P (shift_count)
16909 && true_regnum (set_dest) == true_regnum (shift_count))
16916 /* Return true if destination reg of SET_INSN is shift count of
16920 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16922 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16923 PATTERN (use_insn));
16926 /* Return TRUE or FALSE depending on whether the unary operator meets the
16927 appropriate constraints. */
16930 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16931 enum machine_mode mode ATTRIBUTE_UNUSED,
16932 rtx operands[2] ATTRIBUTE_UNUSED)
16934 /* If one of operands is memory, source and destination must match. */
16935 if ((MEM_P (operands[0])
16936 || MEM_P (operands[1]))
16937 && ! rtx_equal_p (operands[0], operands[1]))
16942 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16943 are ok, keeping in mind the possible movddup alternative. */
16946 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16948 if (MEM_P (operands[0]))
16949 return rtx_equal_p (operands[0], operands[1 + high]);
16950 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16951 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16955 /* Post-reload splitter for converting an SF or DFmode value in an
16956 SSE register into an unsigned SImode. */
16959 ix86_split_convert_uns_si_sse (rtx operands[])
16961 enum machine_mode vecmode;
16962 rtx value, large, zero_or_two31, input, two31, x;
16964 large = operands[1];
16965 zero_or_two31 = operands[2];
16966 input = operands[3];
16967 two31 = operands[4];
16968 vecmode = GET_MODE (large);
16969 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16971 /* Load up the value into the low element. We must ensure that the other
16972 elements are valid floats -- zero is the easiest such value. */
16975 if (vecmode == V4SFmode)
16976 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16978 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16982 input = gen_rtx_REG (vecmode, REGNO (input));
16983 emit_move_insn (value, CONST0_RTX (vecmode));
16984 if (vecmode == V4SFmode)
16985 emit_insn (gen_sse_movss (value, value, input));
16987 emit_insn (gen_sse2_movsd (value, value, input));
16990 emit_move_insn (large, two31);
16991 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16993 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16994 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16996 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16997 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16999 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
17000 emit_insn (gen_rtx_SET (VOIDmode, value, x));
17002 large = gen_rtx_REG (V4SImode, REGNO (large));
17003 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
17005 x = gen_rtx_REG (V4SImode, REGNO (value));
17006 if (vecmode == V4SFmode)
17007 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
17009 emit_insn (gen_sse2_cvttpd2dq (x, value));
17012 emit_insn (gen_xorv4si3 (value, value, large));
17015 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17016 Expects the 64-bit DImode to be supplied in a pair of integral
17017 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17018 -mfpmath=sse, !optimize_size only. */
17021 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
17023 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
17024 rtx int_xmm, fp_xmm;
17025 rtx biases, exponents;
17028 int_xmm = gen_reg_rtx (V4SImode);
17029 if (TARGET_INTER_UNIT_MOVES)
17030 emit_insn (gen_movdi_to_sse (int_xmm, input));
17031 else if (TARGET_SSE_SPLIT_REGS)
17033 emit_clobber (int_xmm);
17034 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
17038 x = gen_reg_rtx (V2DImode);
17039 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
17040 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
17043 x = gen_rtx_CONST_VECTOR (V4SImode,
17044 gen_rtvec (4, GEN_INT (0x43300000UL),
17045 GEN_INT (0x45300000UL),
17046 const0_rtx, const0_rtx));
17047 exponents = validize_mem (force_const_mem (V4SImode, x));
17049 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17050 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
17052 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17053 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17054 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17055 (0x1.0p84 + double(fp_value_hi_xmm)).
17056 Note these exponents differ by 32. */
17058 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
17060 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17061 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17062 real_ldexp (&bias_lo_rvt, &dconst1, 52);
17063 real_ldexp (&bias_hi_rvt, &dconst1, 84);
17064 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
17065 x = const_double_from_real_value (bias_hi_rvt, DFmode);
17066 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
17067 biases = validize_mem (force_const_mem (V2DFmode, biases));
17068 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
17070 /* Add the upper and lower DFmode values together. */
17072 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
17075 x = copy_to_mode_reg (V2DFmode, fp_xmm);
17076 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
17077 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
17080 ix86_expand_vector_extract (false, target, fp_xmm, 0);
17083 /* Not used, but eases macroization of patterns. */
17085 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
17086 rtx input ATTRIBUTE_UNUSED)
17088 gcc_unreachable ();
17091 /* Convert an unsigned SImode value into a DFmode. Only currently used
17092 for SSE, but applicable anywhere. */
17095 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
17097 REAL_VALUE_TYPE TWO31r;
17100 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
17101 NULL, 1, OPTAB_DIRECT);
17103 fp = gen_reg_rtx (DFmode);
17104 emit_insn (gen_floatsidf2 (fp, x));
17106 real_ldexp (&TWO31r, &dconst1, 31);
17107 x = const_double_from_real_value (TWO31r, DFmode);
17109 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
17111 emit_move_insn (target, x);
17114 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17115 32-bit mode; otherwise we have a direct convert instruction. */
17118 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
17120 REAL_VALUE_TYPE TWO32r;
17121 rtx fp_lo, fp_hi, x;
17123 fp_lo = gen_reg_rtx (DFmode);
17124 fp_hi = gen_reg_rtx (DFmode);
17126 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
17128 real_ldexp (&TWO32r, &dconst1, 32);
17129 x = const_double_from_real_value (TWO32r, DFmode);
17130 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
17132 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
17134 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
17137 emit_move_insn (target, x);
17140 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17141 For x86_32, -mfpmath=sse, !optimize_size only. */
17143 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
17145 REAL_VALUE_TYPE ONE16r;
17146 rtx fp_hi, fp_lo, int_hi, int_lo, x;
17148 real_ldexp (&ONE16r, &dconst1, 16);
17149 x = const_double_from_real_value (ONE16r, SFmode);
17150 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
17151 NULL, 0, OPTAB_DIRECT);
17152 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
17153 NULL, 0, OPTAB_DIRECT);
17154 fp_hi = gen_reg_rtx (SFmode);
17155 fp_lo = gen_reg_rtx (SFmode);
17156 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
17157 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
17158 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
17160 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
17162 if (!rtx_equal_p (target, fp_hi))
17163 emit_move_insn (target, fp_hi);
17166 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17167 a vector of unsigned ints VAL to vector of floats TARGET. */
17170 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
17173 REAL_VALUE_TYPE TWO16r;
17174 enum machine_mode intmode = GET_MODE (val);
17175 enum machine_mode fltmode = GET_MODE (target);
17176 rtx (*cvt) (rtx, rtx);
17178 if (intmode == V4SImode)
17179 cvt = gen_floatv4siv4sf2;
17181 cvt = gen_floatv8siv8sf2;
17182 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
17183 tmp[0] = force_reg (intmode, tmp[0]);
17184 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
17186 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
17187 NULL_RTX, 1, OPTAB_DIRECT);
17188 tmp[3] = gen_reg_rtx (fltmode);
17189 emit_insn (cvt (tmp[3], tmp[1]));
17190 tmp[4] = gen_reg_rtx (fltmode);
17191 emit_insn (cvt (tmp[4], tmp[2]));
17192 real_ldexp (&TWO16r, &dconst1, 16);
17193 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
17194 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
17195 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
17197 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
17199 if (tmp[7] != target)
17200 emit_move_insn (target, tmp[7]);
17203 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17204 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17205 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17206 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17209 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
17211 REAL_VALUE_TYPE TWO31r;
17212 rtx two31r, tmp[4];
17213 enum machine_mode mode = GET_MODE (val);
17214 enum machine_mode scalarmode = GET_MODE_INNER (mode);
17215 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
17216 rtx (*cmp) (rtx, rtx, rtx, rtx);
17219 for (i = 0; i < 3; i++)
17220 tmp[i] = gen_reg_rtx (mode);
17221 real_ldexp (&TWO31r, &dconst1, 31);
17222 two31r = const_double_from_real_value (TWO31r, scalarmode);
17223 two31r = ix86_build_const_vector (mode, 1, two31r);
17224 two31r = force_reg (mode, two31r);
17227 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
17228 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
17229 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
17230 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
17231 default: gcc_unreachable ();
17233 tmp[3] = gen_rtx_LE (mode, two31r, val);
17234 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
17235 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
17237 if (intmode == V4SImode || TARGET_AVX2)
17238 *xorp = expand_simple_binop (intmode, ASHIFT,
17239 gen_lowpart (intmode, tmp[0]),
17240 GEN_INT (31), NULL_RTX, 0,
17244 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
17245 two31 = ix86_build_const_vector (intmode, 1, two31);
17246 *xorp = expand_simple_binop (intmode, AND,
17247 gen_lowpart (intmode, tmp[0]),
17248 two31, NULL_RTX, 0,
17251 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
17255 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17256 then replicate the value for all elements of the vector
17260 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
17264 enum machine_mode scalar_mode;
17281 n_elt = GET_MODE_NUNITS (mode);
17282 v = rtvec_alloc (n_elt);
17283 scalar_mode = GET_MODE_INNER (mode);
17285 RTVEC_ELT (v, 0) = value;
17287 for (i = 1; i < n_elt; ++i)
17288 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
17290 return gen_rtx_CONST_VECTOR (mode, v);
17293 gcc_unreachable ();
17297 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17298 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17299 for an SSE register. If VECT is true, then replicate the mask for
17300 all elements of the vector register. If INVERT is true, then create
17301 a mask excluding the sign bit. */
17304 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
17306 enum machine_mode vec_mode, imode;
17307 HOST_WIDE_INT hi, lo;
17312 /* Find the sign bit, sign extended to 2*HWI. */
17320 mode = GET_MODE_INNER (mode);
17322 lo = 0x80000000, hi = lo < 0;
17330 mode = GET_MODE_INNER (mode);
17332 if (HOST_BITS_PER_WIDE_INT >= 64)
17333 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
17335 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17340 vec_mode = VOIDmode;
17341 if (HOST_BITS_PER_WIDE_INT >= 64)
17344 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
17351 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17355 lo = ~lo, hi = ~hi;
17361 mask = immed_double_const (lo, hi, imode);
17363 vec = gen_rtvec (2, v, mask);
17364 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
17365 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
17372 gcc_unreachable ();
17376 lo = ~lo, hi = ~hi;
17378 /* Force this value into the low part of a fp vector constant. */
17379 mask = immed_double_const (lo, hi, imode);
17380 mask = gen_lowpart (mode, mask);
17382 if (vec_mode == VOIDmode)
17383 return force_reg (mode, mask);
17385 v = ix86_build_const_vector (vec_mode, vect, mask);
17386 return force_reg (vec_mode, v);
17389 /* Generate code for floating point ABS or NEG. */
17392 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
17395 rtx mask, set, dst, src;
17396 bool use_sse = false;
17397 bool vector_mode = VECTOR_MODE_P (mode);
17398 enum machine_mode vmode = mode;
17402 else if (mode == TFmode)
17404 else if (TARGET_SSE_MATH)
17406 use_sse = SSE_FLOAT_MODE_P (mode);
17407 if (mode == SFmode)
17409 else if (mode == DFmode)
17413 /* NEG and ABS performed with SSE use bitwise mask operations.
17414 Create the appropriate mask now. */
17416 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
17423 set = gen_rtx_fmt_e (code, mode, src);
17424 set = gen_rtx_SET (VOIDmode, dst, set);
17431 use = gen_rtx_USE (VOIDmode, mask);
17433 par = gen_rtvec (2, set, use);
17436 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17437 par = gen_rtvec (3, set, use, clob);
17439 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
17445 /* Expand a copysign operation. Special case operand 0 being a constant. */
17448 ix86_expand_copysign (rtx operands[])
17450 enum machine_mode mode, vmode;
17451 rtx dest, op0, op1, mask, nmask;
17453 dest = operands[0];
17457 mode = GET_MODE (dest);
17459 if (mode == SFmode)
17461 else if (mode == DFmode)
17466 if (GET_CODE (op0) == CONST_DOUBLE)
17468 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
17470 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
17471 op0 = simplify_unary_operation (ABS, mode, op0, mode);
17473 if (mode == SFmode || mode == DFmode)
17475 if (op0 == CONST0_RTX (mode))
17476 op0 = CONST0_RTX (vmode);
17479 rtx v = ix86_build_const_vector (vmode, false, op0);
17481 op0 = force_reg (vmode, v);
17484 else if (op0 != CONST0_RTX (mode))
17485 op0 = force_reg (mode, op0);
17487 mask = ix86_build_signbit_mask (vmode, 0, 0);
17489 if (mode == SFmode)
17490 copysign_insn = gen_copysignsf3_const;
17491 else if (mode == DFmode)
17492 copysign_insn = gen_copysigndf3_const;
17494 copysign_insn = gen_copysigntf3_const;
17496 emit_insn (copysign_insn (dest, op0, op1, mask));
17500 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17502 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17503 mask = ix86_build_signbit_mask (vmode, 0, 0);
17505 if (mode == SFmode)
17506 copysign_insn = gen_copysignsf3_var;
17507 else if (mode == DFmode)
17508 copysign_insn = gen_copysigndf3_var;
17510 copysign_insn = gen_copysigntf3_var;
17512 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17516 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17517 be a constant, and so has already been expanded into a vector constant. */
17520 ix86_split_copysign_const (rtx operands[])
17522 enum machine_mode mode, vmode;
17523 rtx dest, op0, mask, x;
17525 dest = operands[0];
17527 mask = operands[3];
17529 mode = GET_MODE (dest);
17530 vmode = GET_MODE (mask);
17532 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17533 x = gen_rtx_AND (vmode, dest, mask);
17534 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17536 if (op0 != CONST0_RTX (vmode))
17538 x = gen_rtx_IOR (vmode, dest, op0);
17539 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17543 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17544 so we have to do two masks. */
17547 ix86_split_copysign_var (rtx operands[])
17549 enum machine_mode mode, vmode;
17550 rtx dest, scratch, op0, op1, mask, nmask, x;
17552 dest = operands[0];
17553 scratch = operands[1];
17556 nmask = operands[4];
17557 mask = operands[5];
17559 mode = GET_MODE (dest);
17560 vmode = GET_MODE (mask);
17562 if (rtx_equal_p (op0, op1))
17564 /* Shouldn't happen often (it's useless, obviously), but when it does
17565 we'd generate incorrect code if we continue below. */
17566 emit_move_insn (dest, op0);
17570 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17572 gcc_assert (REGNO (op1) == REGNO (scratch));
17574 x = gen_rtx_AND (vmode, scratch, mask);
17575 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17578 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17579 x = gen_rtx_NOT (vmode, dest);
17580 x = gen_rtx_AND (vmode, x, op0);
17581 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17585 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17587 x = gen_rtx_AND (vmode, scratch, mask);
17589 else /* alternative 2,4 */
17591 gcc_assert (REGNO (mask) == REGNO (scratch));
17592 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17593 x = gen_rtx_AND (vmode, scratch, op1);
17595 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17597 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17599 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17600 x = gen_rtx_AND (vmode, dest, nmask);
17602 else /* alternative 3,4 */
17604 gcc_assert (REGNO (nmask) == REGNO (dest));
17606 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17607 x = gen_rtx_AND (vmode, dest, op0);
17609 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17612 x = gen_rtx_IOR (vmode, dest, scratch);
17613 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17616 /* Return TRUE or FALSE depending on whether the first SET in INSN
17617 has source and destination with matching CC modes, and that the
17618 CC mode is at least as constrained as REQ_MODE. */
17621 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17624 enum machine_mode set_mode;
17626 set = PATTERN (insn);
17627 if (GET_CODE (set) == PARALLEL)
17628 set = XVECEXP (set, 0, 0);
17629 gcc_assert (GET_CODE (set) == SET);
17630 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17632 set_mode = GET_MODE (SET_DEST (set));
17636 if (req_mode != CCNOmode
17637 && (req_mode != CCmode
17638 || XEXP (SET_SRC (set), 1) != const0_rtx))
17642 if (req_mode == CCGCmode)
17646 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17650 if (req_mode == CCZmode)
17660 if (set_mode != req_mode)
17665 gcc_unreachable ();
17668 return GET_MODE (SET_SRC (set)) == set_mode;
17671 /* Generate insn patterns to do an integer compare of OPERANDS. */
17674 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17676 enum machine_mode cmpmode;
17679 cmpmode = SELECT_CC_MODE (code, op0, op1);
17680 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17682 /* This is very simple, but making the interface the same as in the
17683 FP case makes the rest of the code easier. */
17684 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17685 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17687 /* Return the test that should be put into the flags user, i.e.
17688 the bcc, scc, or cmov instruction. */
17689 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17692 /* Figure out whether to use ordered or unordered fp comparisons.
17693 Return the appropriate mode to use. */
17696 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17698 /* ??? In order to make all comparisons reversible, we do all comparisons
17699 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17700 all forms trapping and nontrapping comparisons, we can make inequality
17701 comparisons trapping again, since it results in better code when using
17702 FCOM based compares. */
17703 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17707 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17709 enum machine_mode mode = GET_MODE (op0);
17711 if (SCALAR_FLOAT_MODE_P (mode))
17713 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17714 return ix86_fp_compare_mode (code);
17719 /* Only zero flag is needed. */
17720 case EQ: /* ZF=0 */
17721 case NE: /* ZF!=0 */
17723 /* Codes needing carry flag. */
17724 case GEU: /* CF=0 */
17725 case LTU: /* CF=1 */
17726 /* Detect overflow checks. They need just the carry flag. */
17727 if (GET_CODE (op0) == PLUS
17728 && rtx_equal_p (op1, XEXP (op0, 0)))
17732 case GTU: /* CF=0 & ZF=0 */
17733 case LEU: /* CF=1 | ZF=1 */
17734 /* Detect overflow checks. They need just the carry flag. */
17735 if (GET_CODE (op0) == MINUS
17736 && rtx_equal_p (op1, XEXP (op0, 0)))
17740 /* Codes possibly doable only with sign flag when
17741 comparing against zero. */
17742 case GE: /* SF=OF or SF=0 */
17743 case LT: /* SF<>OF or SF=1 */
17744 if (op1 == const0_rtx)
17747 /* For other cases Carry flag is not required. */
17749 /* Codes doable only with sign flag when comparing
17750 against zero, but we miss jump instruction for it
17751 so we need to use relational tests against overflow
17752 that thus needs to be zero. */
17753 case GT: /* ZF=0 & SF=OF */
17754 case LE: /* ZF=1 | SF<>OF */
17755 if (op1 == const0_rtx)
17759 /* strcmp pattern do (use flags) and combine may ask us for proper
17764 gcc_unreachable ();
17768 /* Return the fixed registers used for condition codes. */
17771 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17778 /* If two condition code modes are compatible, return a condition code
17779 mode which is compatible with both. Otherwise, return
17782 static enum machine_mode
17783 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17788 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17791 if ((m1 == CCGCmode && m2 == CCGOCmode)
17792 || (m1 == CCGOCmode && m2 == CCGCmode))
17798 gcc_unreachable ();
17828 /* These are only compatible with themselves, which we already
17835 /* Return a comparison we can do and that it is equivalent to
17836 swap_condition (code) apart possibly from orderedness.
17837 But, never change orderedness if TARGET_IEEE_FP, returning
17838 UNKNOWN in that case if necessary. */
17840 static enum rtx_code
17841 ix86_fp_swap_condition (enum rtx_code code)
17845 case GT: /* GTU - CF=0 & ZF=0 */
17846 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17847 case GE: /* GEU - CF=0 */
17848 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17849 case UNLT: /* LTU - CF=1 */
17850 return TARGET_IEEE_FP ? UNKNOWN : GT;
17851 case UNLE: /* LEU - CF=1 | ZF=1 */
17852 return TARGET_IEEE_FP ? UNKNOWN : GE;
17854 return swap_condition (code);
17858 /* Return cost of comparison CODE using the best strategy for performance.
17859 All following functions do use number of instructions as a cost metrics.
17860 In future this should be tweaked to compute bytes for optimize_size and
17861 take into account performance of various instructions on various CPUs. */
17864 ix86_fp_comparison_cost (enum rtx_code code)
17868 /* The cost of code using bit-twiddling on %ah. */
17885 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17889 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17892 gcc_unreachable ();
17895 switch (ix86_fp_comparison_strategy (code))
17897 case IX86_FPCMP_COMI:
17898 return arith_cost > 4 ? 3 : 2;
17899 case IX86_FPCMP_SAHF:
17900 return arith_cost > 4 ? 4 : 3;
17906 /* Return strategy to use for floating-point. We assume that fcomi is always
17907 preferrable where available, since that is also true when looking at size
17908 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17910 enum ix86_fpcmp_strategy
17911 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17913 /* Do fcomi/sahf based test when profitable. */
17916 return IX86_FPCMP_COMI;
17918 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17919 return IX86_FPCMP_SAHF;
17921 return IX86_FPCMP_ARITH;
17924 /* Swap, force into registers, or otherwise massage the two operands
17925 to a fp comparison. The operands are updated in place; the new
17926 comparison code is returned. */
17928 static enum rtx_code
17929 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17931 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17932 rtx op0 = *pop0, op1 = *pop1;
17933 enum machine_mode op_mode = GET_MODE (op0);
17934 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17936 /* All of the unordered compare instructions only work on registers.
17937 The same is true of the fcomi compare instructions. The XFmode
17938 compare instructions require registers except when comparing
17939 against zero or when converting operand 1 from fixed point to
17943 && (fpcmp_mode == CCFPUmode
17944 || (op_mode == XFmode
17945 && ! (standard_80387_constant_p (op0) == 1
17946 || standard_80387_constant_p (op1) == 1)
17947 && GET_CODE (op1) != FLOAT)
17948 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17950 op0 = force_reg (op_mode, op0);
17951 op1 = force_reg (op_mode, op1);
17955 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17956 things around if they appear profitable, otherwise force op0
17957 into a register. */
17959 if (standard_80387_constant_p (op0) == 0
17961 && ! (standard_80387_constant_p (op1) == 0
17964 enum rtx_code new_code = ix86_fp_swap_condition (code);
17965 if (new_code != UNKNOWN)
17968 tmp = op0, op0 = op1, op1 = tmp;
17974 op0 = force_reg (op_mode, op0);
17976 if (CONSTANT_P (op1))
17978 int tmp = standard_80387_constant_p (op1);
17980 op1 = validize_mem (force_const_mem (op_mode, op1));
17984 op1 = force_reg (op_mode, op1);
17987 op1 = force_reg (op_mode, op1);
17991 /* Try to rearrange the comparison to make it cheaper. */
17992 if (ix86_fp_comparison_cost (code)
17993 > ix86_fp_comparison_cost (swap_condition (code))
17994 && (REG_P (op1) || can_create_pseudo_p ()))
17997 tmp = op0, op0 = op1, op1 = tmp;
17998 code = swap_condition (code);
18000 op0 = force_reg (op_mode, op0);
18008 /* Convert comparison codes we use to represent FP comparison to integer
18009 code that will result in proper branch. Return UNKNOWN if no such code
18013 ix86_fp_compare_code_to_integer (enum rtx_code code)
18042 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18045 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
18047 enum machine_mode fpcmp_mode, intcmp_mode;
18050 fpcmp_mode = ix86_fp_compare_mode (code);
18051 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
18053 /* Do fcomi/sahf based test when profitable. */
18054 switch (ix86_fp_comparison_strategy (code))
18056 case IX86_FPCMP_COMI:
18057 intcmp_mode = fpcmp_mode;
18058 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18059 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
18064 case IX86_FPCMP_SAHF:
18065 intcmp_mode = fpcmp_mode;
18066 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18067 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
18071 scratch = gen_reg_rtx (HImode);
18072 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
18073 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
18076 case IX86_FPCMP_ARITH:
18077 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18078 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18079 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
18081 scratch = gen_reg_rtx (HImode);
18082 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
18084 /* In the unordered case, we have to check C2 for NaN's, which
18085 doesn't happen to work out to anything nice combination-wise.
18086 So do some bit twiddling on the value we've got in AH to come
18087 up with an appropriate set of condition codes. */
18089 intcmp_mode = CCNOmode;
18094 if (code == GT || !TARGET_IEEE_FP)
18096 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
18101 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18102 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
18103 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
18104 intcmp_mode = CCmode;
18110 if (code == LT && TARGET_IEEE_FP)
18112 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18113 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
18114 intcmp_mode = CCmode;
18119 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
18125 if (code == GE || !TARGET_IEEE_FP)
18127 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
18132 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18133 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
18139 if (code == LE && TARGET_IEEE_FP)
18141 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18142 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
18143 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
18144 intcmp_mode = CCmode;
18149 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
18155 if (code == EQ && TARGET_IEEE_FP)
18157 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18158 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
18159 intcmp_mode = CCmode;
18164 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
18170 if (code == NE && TARGET_IEEE_FP)
18172 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18173 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
18179 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
18185 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
18189 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
18194 gcc_unreachable ();
18202 /* Return the test that should be put into the flags user, i.e.
18203 the bcc, scc, or cmov instruction. */
18204 return gen_rtx_fmt_ee (code, VOIDmode,
18205 gen_rtx_REG (intcmp_mode, FLAGS_REG),
18210 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
18214 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
18215 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
18217 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
18219 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
18220 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18223 ret = ix86_expand_int_compare (code, op0, op1);
18229 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
18231 enum machine_mode mode = GET_MODE (op0);
18243 tmp = ix86_expand_compare (code, op0, op1);
18244 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18245 gen_rtx_LABEL_REF (VOIDmode, label),
18247 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18254 /* Expand DImode branch into multiple compare+branch. */
18256 rtx lo[2], hi[2], label2;
18257 enum rtx_code code1, code2, code3;
18258 enum machine_mode submode;
18260 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
18262 tmp = op0, op0 = op1, op1 = tmp;
18263 code = swap_condition (code);
18266 split_double_mode (mode, &op0, 1, lo+0, hi+0);
18267 split_double_mode (mode, &op1, 1, lo+1, hi+1);
18269 submode = mode == DImode ? SImode : DImode;
18271 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18272 avoid two branches. This costs one extra insn, so disable when
18273 optimizing for size. */
18275 if ((code == EQ || code == NE)
18276 && (!optimize_insn_for_size_p ()
18277 || hi[1] == const0_rtx || lo[1] == const0_rtx))
18282 if (hi[1] != const0_rtx)
18283 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
18284 NULL_RTX, 0, OPTAB_WIDEN);
18287 if (lo[1] != const0_rtx)
18288 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
18289 NULL_RTX, 0, OPTAB_WIDEN);
18291 tmp = expand_binop (submode, ior_optab, xor1, xor0,
18292 NULL_RTX, 0, OPTAB_WIDEN);
18294 ix86_expand_branch (code, tmp, const0_rtx, label);
18298 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18299 op1 is a constant and the low word is zero, then we can just
18300 examine the high word. Similarly for low word -1 and
18301 less-or-equal-than or greater-than. */
18303 if (CONST_INT_P (hi[1]))
18306 case LT: case LTU: case GE: case GEU:
18307 if (lo[1] == const0_rtx)
18309 ix86_expand_branch (code, hi[0], hi[1], label);
18313 case LE: case LEU: case GT: case GTU:
18314 if (lo[1] == constm1_rtx)
18316 ix86_expand_branch (code, hi[0], hi[1], label);
18324 /* Otherwise, we need two or three jumps. */
18326 label2 = gen_label_rtx ();
18329 code2 = swap_condition (code);
18330 code3 = unsigned_condition (code);
18334 case LT: case GT: case LTU: case GTU:
18337 case LE: code1 = LT; code2 = GT; break;
18338 case GE: code1 = GT; code2 = LT; break;
18339 case LEU: code1 = LTU; code2 = GTU; break;
18340 case GEU: code1 = GTU; code2 = LTU; break;
18342 case EQ: code1 = UNKNOWN; code2 = NE; break;
18343 case NE: code2 = UNKNOWN; break;
18346 gcc_unreachable ();
18351 * if (hi(a) < hi(b)) goto true;
18352 * if (hi(a) > hi(b)) goto false;
18353 * if (lo(a) < lo(b)) goto true;
18357 if (code1 != UNKNOWN)
18358 ix86_expand_branch (code1, hi[0], hi[1], label);
18359 if (code2 != UNKNOWN)
18360 ix86_expand_branch (code2, hi[0], hi[1], label2);
18362 ix86_expand_branch (code3, lo[0], lo[1], label);
18364 if (code2 != UNKNOWN)
18365 emit_label (label2);
18370 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
18375 /* Split branch based on floating point condition. */
18377 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
18378 rtx target1, rtx target2, rtx tmp, rtx pushed)
18383 if (target2 != pc_rtx)
18386 code = reverse_condition_maybe_unordered (code);
18391 condition = ix86_expand_fp_compare (code, op1, op2,
18394 /* Remove pushed operand from stack. */
18396 ix86_free_from_memory (GET_MODE (pushed));
18398 i = emit_jump_insn (gen_rtx_SET
18400 gen_rtx_IF_THEN_ELSE (VOIDmode,
18401 condition, target1, target2)));
18402 if (split_branch_probability >= 0)
18403 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
18407 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
18411 gcc_assert (GET_MODE (dest) == QImode);
18413 ret = ix86_expand_compare (code, op0, op1);
18414 PUT_MODE (ret, QImode);
18415 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
18418 /* Expand comparison setting or clearing carry flag. Return true when
18419 successful and set pop for the operation. */
18421 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
18423 enum machine_mode mode =
18424 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
18426 /* Do not handle double-mode compares that go through special path. */
18427 if (mode == (TARGET_64BIT ? TImode : DImode))
18430 if (SCALAR_FLOAT_MODE_P (mode))
18432 rtx compare_op, compare_seq;
18434 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18436 /* Shortcut: following common codes never translate
18437 into carry flag compares. */
18438 if (code == EQ || code == NE || code == UNEQ || code == LTGT
18439 || code == ORDERED || code == UNORDERED)
18442 /* These comparisons require zero flag; swap operands so they won't. */
18443 if ((code == GT || code == UNLE || code == LE || code == UNGT)
18444 && !TARGET_IEEE_FP)
18449 code = swap_condition (code);
18452 /* Try to expand the comparison and verify that we end up with
18453 carry flag based comparison. This fails to be true only when
18454 we decide to expand comparison using arithmetic that is not
18455 too common scenario. */
18457 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18458 compare_seq = get_insns ();
18461 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
18462 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
18463 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
18465 code = GET_CODE (compare_op);
18467 if (code != LTU && code != GEU)
18470 emit_insn (compare_seq);
18475 if (!INTEGRAL_MODE_P (mode))
18484 /* Convert a==0 into (unsigned)a<1. */
18487 if (op1 != const0_rtx)
18490 code = (code == EQ ? LTU : GEU);
18493 /* Convert a>b into b<a or a>=b-1. */
18496 if (CONST_INT_P (op1))
18498 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18499 /* Bail out on overflow. We still can swap operands but that
18500 would force loading of the constant into register. */
18501 if (op1 == const0_rtx
18502 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18504 code = (code == GTU ? GEU : LTU);
18511 code = (code == GTU ? LTU : GEU);
18515 /* Convert a>=0 into (unsigned)a<0x80000000. */
18518 if (mode == DImode || op1 != const0_rtx)
18520 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18521 code = (code == LT ? GEU : LTU);
18525 if (mode == DImode || op1 != constm1_rtx)
18527 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18528 code = (code == LE ? GEU : LTU);
18534 /* Swapping operands may cause constant to appear as first operand. */
18535 if (!nonimmediate_operand (op0, VOIDmode))
18537 if (!can_create_pseudo_p ())
18539 op0 = force_reg (mode, op0);
18541 *pop = ix86_expand_compare (code, op0, op1);
18542 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18547 ix86_expand_int_movcc (rtx operands[])
18549 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18550 rtx compare_seq, compare_op;
18551 enum machine_mode mode = GET_MODE (operands[0]);
18552 bool sign_bit_compare_p = false;
18553 rtx op0 = XEXP (operands[1], 0);
18554 rtx op1 = XEXP (operands[1], 1);
18557 compare_op = ix86_expand_compare (code, op0, op1);
18558 compare_seq = get_insns ();
18561 compare_code = GET_CODE (compare_op);
18563 if ((op1 == const0_rtx && (code == GE || code == LT))
18564 || (op1 == constm1_rtx && (code == GT || code == LE)))
18565 sign_bit_compare_p = true;
18567 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18568 HImode insns, we'd be swallowed in word prefix ops. */
18570 if ((mode != HImode || TARGET_FAST_PREFIX)
18571 && (mode != (TARGET_64BIT ? TImode : DImode))
18572 && CONST_INT_P (operands[2])
18573 && CONST_INT_P (operands[3]))
18575 rtx out = operands[0];
18576 HOST_WIDE_INT ct = INTVAL (operands[2]);
18577 HOST_WIDE_INT cf = INTVAL (operands[3]);
18578 HOST_WIDE_INT diff;
18581 /* Sign bit compares are better done using shifts than we do by using
18583 if (sign_bit_compare_p
18584 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18586 /* Detect overlap between destination and compare sources. */
18589 if (!sign_bit_compare_p)
18592 bool fpcmp = false;
18594 compare_code = GET_CODE (compare_op);
18596 flags = XEXP (compare_op, 0);
18598 if (GET_MODE (flags) == CCFPmode
18599 || GET_MODE (flags) == CCFPUmode)
18603 = ix86_fp_compare_code_to_integer (compare_code);
18606 /* To simplify rest of code, restrict to the GEU case. */
18607 if (compare_code == LTU)
18609 HOST_WIDE_INT tmp = ct;
18612 compare_code = reverse_condition (compare_code);
18613 code = reverse_condition (code);
18618 PUT_CODE (compare_op,
18619 reverse_condition_maybe_unordered
18620 (GET_CODE (compare_op)));
18622 PUT_CODE (compare_op,
18623 reverse_condition (GET_CODE (compare_op)));
18627 if (reg_overlap_mentioned_p (out, op0)
18628 || reg_overlap_mentioned_p (out, op1))
18629 tmp = gen_reg_rtx (mode);
18631 if (mode == DImode)
18632 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18634 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18635 flags, compare_op));
18639 if (code == GT || code == GE)
18640 code = reverse_condition (code);
18643 HOST_WIDE_INT tmp = ct;
18648 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18661 tmp = expand_simple_binop (mode, PLUS,
18663 copy_rtx (tmp), 1, OPTAB_DIRECT);
18674 tmp = expand_simple_binop (mode, IOR,
18676 copy_rtx (tmp), 1, OPTAB_DIRECT);
18678 else if (diff == -1 && ct)
18688 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18690 tmp = expand_simple_binop (mode, PLUS,
18691 copy_rtx (tmp), GEN_INT (cf),
18692 copy_rtx (tmp), 1, OPTAB_DIRECT);
18700 * andl cf - ct, dest
18710 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18713 tmp = expand_simple_binop (mode, AND,
18715 gen_int_mode (cf - ct, mode),
18716 copy_rtx (tmp), 1, OPTAB_DIRECT);
18718 tmp = expand_simple_binop (mode, PLUS,
18719 copy_rtx (tmp), GEN_INT (ct),
18720 copy_rtx (tmp), 1, OPTAB_DIRECT);
18723 if (!rtx_equal_p (tmp, out))
18724 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18731 enum machine_mode cmp_mode = GET_MODE (op0);
18734 tmp = ct, ct = cf, cf = tmp;
18737 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18739 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18741 /* We may be reversing unordered compare to normal compare, that
18742 is not valid in general (we may convert non-trapping condition
18743 to trapping one), however on i386 we currently emit all
18744 comparisons unordered. */
18745 compare_code = reverse_condition_maybe_unordered (compare_code);
18746 code = reverse_condition_maybe_unordered (code);
18750 compare_code = reverse_condition (compare_code);
18751 code = reverse_condition (code);
18755 compare_code = UNKNOWN;
18756 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18757 && CONST_INT_P (op1))
18759 if (op1 == const0_rtx
18760 && (code == LT || code == GE))
18761 compare_code = code;
18762 else if (op1 == constm1_rtx)
18766 else if (code == GT)
18771 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18772 if (compare_code != UNKNOWN
18773 && GET_MODE (op0) == GET_MODE (out)
18774 && (cf == -1 || ct == -1))
18776 /* If lea code below could be used, only optimize
18777 if it results in a 2 insn sequence. */
18779 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18780 || diff == 3 || diff == 5 || diff == 9)
18781 || (compare_code == LT && ct == -1)
18782 || (compare_code == GE && cf == -1))
18785 * notl op1 (if necessary)
18793 code = reverse_condition (code);
18796 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18798 out = expand_simple_binop (mode, IOR,
18800 out, 1, OPTAB_DIRECT);
18801 if (out != operands[0])
18802 emit_move_insn (operands[0], out);
18809 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18810 || diff == 3 || diff == 5 || diff == 9)
18811 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18813 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18819 * lea cf(dest*(ct-cf)),dest
18823 * This also catches the degenerate setcc-only case.
18829 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18832 /* On x86_64 the lea instruction operates on Pmode, so we need
18833 to get arithmetics done in proper mode to match. */
18835 tmp = copy_rtx (out);
18839 out1 = copy_rtx (out);
18840 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18844 tmp = gen_rtx_PLUS (mode, tmp, out1);
18850 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18853 if (!rtx_equal_p (tmp, out))
18856 out = force_operand (tmp, copy_rtx (out));
18858 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18860 if (!rtx_equal_p (out, operands[0]))
18861 emit_move_insn (operands[0], copy_rtx (out));
18867 * General case: Jumpful:
18868 * xorl dest,dest cmpl op1, op2
18869 * cmpl op1, op2 movl ct, dest
18870 * setcc dest jcc 1f
18871 * decl dest movl cf, dest
18872 * andl (cf-ct),dest 1:
18875 * Size 20. Size 14.
18877 * This is reasonably steep, but branch mispredict costs are
18878 * high on modern cpus, so consider failing only if optimizing
18882 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18883 && BRANCH_COST (optimize_insn_for_speed_p (),
18888 enum machine_mode cmp_mode = GET_MODE (op0);
18893 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18895 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18897 /* We may be reversing unordered compare to normal compare,
18898 that is not valid in general (we may convert non-trapping
18899 condition to trapping one), however on i386 we currently
18900 emit all comparisons unordered. */
18901 code = reverse_condition_maybe_unordered (code);
18905 code = reverse_condition (code);
18906 if (compare_code != UNKNOWN)
18907 compare_code = reverse_condition (compare_code);
18911 if (compare_code != UNKNOWN)
18913 /* notl op1 (if needed)
18918 For x < 0 (resp. x <= -1) there will be no notl,
18919 so if possible swap the constants to get rid of the
18921 True/false will be -1/0 while code below (store flag
18922 followed by decrement) is 0/-1, so the constants need
18923 to be exchanged once more. */
18925 if (compare_code == GE || !cf)
18927 code = reverse_condition (code);
18932 HOST_WIDE_INT tmp = cf;
18937 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18941 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18943 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18945 copy_rtx (out), 1, OPTAB_DIRECT);
18948 out = expand_simple_binop (mode, AND, copy_rtx (out),
18949 gen_int_mode (cf - ct, mode),
18950 copy_rtx (out), 1, OPTAB_DIRECT);
18952 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18953 copy_rtx (out), 1, OPTAB_DIRECT);
18954 if (!rtx_equal_p (out, operands[0]))
18955 emit_move_insn (operands[0], copy_rtx (out));
18961 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18963 /* Try a few things more with specific constants and a variable. */
18966 rtx var, orig_out, out, tmp;
18968 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18971 /* If one of the two operands is an interesting constant, load a
18972 constant with the above and mask it in with a logical operation. */
18974 if (CONST_INT_P (operands[2]))
18977 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18978 operands[3] = constm1_rtx, op = and_optab;
18979 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18980 operands[3] = const0_rtx, op = ior_optab;
18984 else if (CONST_INT_P (operands[3]))
18987 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18988 operands[2] = constm1_rtx, op = and_optab;
18989 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18990 operands[2] = const0_rtx, op = ior_optab;
18997 orig_out = operands[0];
18998 tmp = gen_reg_rtx (mode);
19001 /* Recurse to get the constant loaded. */
19002 if (ix86_expand_int_movcc (operands) == 0)
19005 /* Mask in the interesting variable. */
19006 out = expand_binop (mode, op, var, tmp, orig_out, 0,
19008 if (!rtx_equal_p (out, orig_out))
19009 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
19015 * For comparison with above,
19025 if (! nonimmediate_operand (operands[2], mode))
19026 operands[2] = force_reg (mode, operands[2]);
19027 if (! nonimmediate_operand (operands[3], mode))
19028 operands[3] = force_reg (mode, operands[3]);
19030 if (! register_operand (operands[2], VOIDmode)
19032 || ! register_operand (operands[3], VOIDmode)))
19033 operands[2] = force_reg (mode, operands[2]);
19036 && ! register_operand (operands[3], VOIDmode))
19037 operands[3] = force_reg (mode, operands[3]);
19039 emit_insn (compare_seq);
19040 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19041 gen_rtx_IF_THEN_ELSE (mode,
19042 compare_op, operands[2],
19047 /* Swap, force into registers, or otherwise massage the two operands
19048 to an sse comparison with a mask result. Thus we differ a bit from
19049 ix86_prepare_fp_compare_args which expects to produce a flags result.
19051 The DEST operand exists to help determine whether to commute commutative
19052 operators. The POP0/POP1 operands are updated in place. The new
19053 comparison code is returned, or UNKNOWN if not implementable. */
19055 static enum rtx_code
19056 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
19057 rtx *pop0, rtx *pop1)
19065 /* AVX supports all the needed comparisons. */
19068 /* We have no LTGT as an operator. We could implement it with
19069 NE & ORDERED, but this requires an extra temporary. It's
19070 not clear that it's worth it. */
19077 /* These are supported directly. */
19084 /* AVX has 3 operand comparisons, no need to swap anything. */
19087 /* For commutative operators, try to canonicalize the destination
19088 operand to be first in the comparison - this helps reload to
19089 avoid extra moves. */
19090 if (!dest || !rtx_equal_p (dest, *pop1))
19098 /* These are not supported directly before AVX, and furthermore
19099 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19100 comparison operands to transform into something that is
19105 code = swap_condition (code);
19109 gcc_unreachable ();
19115 /* Detect conditional moves that exactly match min/max operational
19116 semantics. Note that this is IEEE safe, as long as we don't
19117 interchange the operands.
19119 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19120 and TRUE if the operation is successful and instructions are emitted. */
19123 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
19124 rtx cmp_op1, rtx if_true, rtx if_false)
19126 enum machine_mode mode;
19132 else if (code == UNGE)
19135 if_true = if_false;
19141 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
19143 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
19148 mode = GET_MODE (dest);
19150 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19151 but MODE may be a vector mode and thus not appropriate. */
19152 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
19154 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
19157 if_true = force_reg (mode, if_true);
19158 v = gen_rtvec (2, if_true, if_false);
19159 tmp = gen_rtx_UNSPEC (mode, v, u);
19163 code = is_min ? SMIN : SMAX;
19164 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
19167 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
19171 /* Expand an sse vector comparison. Return the register with the result. */
19174 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
19175 rtx op_true, rtx op_false)
19177 enum machine_mode mode = GET_MODE (dest);
19178 enum machine_mode cmp_mode = GET_MODE (cmp_op0);
19181 cmp_op0 = force_reg (cmp_mode, cmp_op0);
19182 if (!nonimmediate_operand (cmp_op1, cmp_mode))
19183 cmp_op1 = force_reg (cmp_mode, cmp_op1);
19186 || reg_overlap_mentioned_p (dest, op_true)
19187 || reg_overlap_mentioned_p (dest, op_false))
19188 dest = gen_reg_rtx (mode);
19190 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
19191 if (cmp_mode != mode)
19193 x = force_reg (cmp_mode, x);
19194 convert_move (dest, x, false);
19197 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19202 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19203 operations. This is used for both scalar and vector conditional moves. */
19206 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
19208 enum machine_mode mode = GET_MODE (dest);
19211 if (vector_all_ones_operand (op_true, mode)
19212 && rtx_equal_p (op_false, CONST0_RTX (mode)))
19214 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
19216 else if (op_false == CONST0_RTX (mode))
19218 op_true = force_reg (mode, op_true);
19219 x = gen_rtx_AND (mode, cmp, op_true);
19220 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19222 else if (op_true == CONST0_RTX (mode))
19224 op_false = force_reg (mode, op_false);
19225 x = gen_rtx_NOT (mode, cmp);
19226 x = gen_rtx_AND (mode, x, op_false);
19227 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19229 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
19231 op_false = force_reg (mode, op_false);
19232 x = gen_rtx_IOR (mode, cmp, op_false);
19233 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19235 else if (TARGET_XOP)
19237 op_true = force_reg (mode, op_true);
19239 if (!nonimmediate_operand (op_false, mode))
19240 op_false = force_reg (mode, op_false);
19242 emit_insn (gen_rtx_SET (mode, dest,
19243 gen_rtx_IF_THEN_ELSE (mode, cmp,
19249 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
19251 if (!nonimmediate_operand (op_true, mode))
19252 op_true = force_reg (mode, op_true);
19254 op_false = force_reg (mode, op_false);
19260 gen = gen_sse4_1_blendvps;
19264 gen = gen_sse4_1_blendvpd;
19272 gen = gen_sse4_1_pblendvb;
19273 dest = gen_lowpart (V16QImode, dest);
19274 op_false = gen_lowpart (V16QImode, op_false);
19275 op_true = gen_lowpart (V16QImode, op_true);
19276 cmp = gen_lowpart (V16QImode, cmp);
19281 gen = gen_avx_blendvps256;
19285 gen = gen_avx_blendvpd256;
19293 gen = gen_avx2_pblendvb;
19294 dest = gen_lowpart (V32QImode, dest);
19295 op_false = gen_lowpart (V32QImode, op_false);
19296 op_true = gen_lowpart (V32QImode, op_true);
19297 cmp = gen_lowpart (V32QImode, cmp);
19305 emit_insn (gen (dest, op_false, op_true, cmp));
19308 op_true = force_reg (mode, op_true);
19310 t2 = gen_reg_rtx (mode);
19312 t3 = gen_reg_rtx (mode);
19316 x = gen_rtx_AND (mode, op_true, cmp);
19317 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
19319 x = gen_rtx_NOT (mode, cmp);
19320 x = gen_rtx_AND (mode, x, op_false);
19321 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
19323 x = gen_rtx_IOR (mode, t3, t2);
19324 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19329 /* Expand a floating-point conditional move. Return true if successful. */
19332 ix86_expand_fp_movcc (rtx operands[])
19334 enum machine_mode mode = GET_MODE (operands[0]);
19335 enum rtx_code code = GET_CODE (operands[1]);
19336 rtx tmp, compare_op;
19337 rtx op0 = XEXP (operands[1], 0);
19338 rtx op1 = XEXP (operands[1], 1);
19340 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
19342 enum machine_mode cmode;
19344 /* Since we've no cmove for sse registers, don't force bad register
19345 allocation just to gain access to it. Deny movcc when the
19346 comparison mode doesn't match the move mode. */
19347 cmode = GET_MODE (op0);
19348 if (cmode == VOIDmode)
19349 cmode = GET_MODE (op1);
19353 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
19354 if (code == UNKNOWN)
19357 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
19358 operands[2], operands[3]))
19361 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
19362 operands[2], operands[3]);
19363 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
19367 /* The floating point conditional move instructions don't directly
19368 support conditions resulting from a signed integer comparison. */
19370 compare_op = ix86_expand_compare (code, op0, op1);
19371 if (!fcmov_comparison_operator (compare_op, VOIDmode))
19373 tmp = gen_reg_rtx (QImode);
19374 ix86_expand_setcc (tmp, code, op0, op1);
19376 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
19379 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19380 gen_rtx_IF_THEN_ELSE (mode, compare_op,
19381 operands[2], operands[3])));
19386 /* Expand a floating-point vector conditional move; a vcond operation
19387 rather than a movcc operation. */
19390 ix86_expand_fp_vcond (rtx operands[])
19392 enum rtx_code code = GET_CODE (operands[3]);
19395 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
19396 &operands[4], &operands[5]);
19397 if (code == UNKNOWN)
19400 switch (GET_CODE (operands[3]))
19403 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
19404 operands[5], operands[0], operands[0]);
19405 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
19406 operands[5], operands[1], operands[2]);
19410 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
19411 operands[5], operands[0], operands[0]);
19412 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
19413 operands[5], operands[1], operands[2]);
19417 gcc_unreachable ();
19419 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
19421 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19425 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
19426 operands[5], operands[1], operands[2]))
19429 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
19430 operands[1], operands[2]);
19431 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19435 /* Expand a signed/unsigned integral vector conditional move. */
19438 ix86_expand_int_vcond (rtx operands[])
19440 enum machine_mode data_mode = GET_MODE (operands[0]);
19441 enum machine_mode mode = GET_MODE (operands[4]);
19442 enum rtx_code code = GET_CODE (operands[3]);
19443 bool negate = false;
19446 cop0 = operands[4];
19447 cop1 = operands[5];
19449 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
19450 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
19451 if ((code == LT || code == GE)
19452 && data_mode == mode
19453 && cop1 == CONST0_RTX (mode)
19454 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
19455 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
19456 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
19457 && (GET_MODE_SIZE (data_mode) == 16
19458 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
19460 rtx negop = operands[2 - (code == LT)];
19461 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
19462 if (negop == CONST1_RTX (data_mode))
19464 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
19465 operands[0], 1, OPTAB_DIRECT);
19466 if (res != operands[0])
19467 emit_move_insn (operands[0], res);
19470 else if (GET_MODE_INNER (data_mode) != DImode
19471 && vector_all_ones_operand (negop, data_mode))
19473 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
19474 operands[0], 0, OPTAB_DIRECT);
19475 if (res != operands[0])
19476 emit_move_insn (operands[0], res);
19481 if (!nonimmediate_operand (cop1, mode))
19482 cop1 = force_reg (mode, cop1);
19483 if (!general_operand (operands[1], data_mode))
19484 operands[1] = force_reg (data_mode, operands[1]);
19485 if (!general_operand (operands[2], data_mode))
19486 operands[2] = force_reg (data_mode, operands[2]);
19488 /* XOP supports all of the comparisons on all 128-bit vector int types. */
19490 && (mode == V16QImode || mode == V8HImode
19491 || mode == V4SImode || mode == V2DImode))
19495 /* Canonicalize the comparison to EQ, GT, GTU. */
19506 code = reverse_condition (code);
19512 code = reverse_condition (code);
19518 code = swap_condition (code);
19519 x = cop0, cop0 = cop1, cop1 = x;
19523 gcc_unreachable ();
19526 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19527 if (mode == V2DImode)
19532 /* SSE4.1 supports EQ. */
19533 if (!TARGET_SSE4_1)
19539 /* SSE4.2 supports GT/GTU. */
19540 if (!TARGET_SSE4_2)
19545 gcc_unreachable ();
19549 /* Unsigned parallel compare is not supported by the hardware.
19550 Play some tricks to turn this into a signed comparison
19554 cop0 = force_reg (mode, cop0);
19564 rtx (*gen_sub3) (rtx, rtx, rtx);
19568 case V8SImode: gen_sub3 = gen_subv8si3; break;
19569 case V4DImode: gen_sub3 = gen_subv4di3; break;
19570 case V4SImode: gen_sub3 = gen_subv4si3; break;
19571 case V2DImode: gen_sub3 = gen_subv2di3; break;
19573 gcc_unreachable ();
19575 /* Subtract (-(INT MAX) - 1) from both operands to make
19577 mask = ix86_build_signbit_mask (mode, true, false);
19578 t1 = gen_reg_rtx (mode);
19579 emit_insn (gen_sub3 (t1, cop0, mask));
19581 t2 = gen_reg_rtx (mode);
19582 emit_insn (gen_sub3 (t2, cop1, mask));
19594 /* Perform a parallel unsigned saturating subtraction. */
19595 x = gen_reg_rtx (mode);
19596 emit_insn (gen_rtx_SET (VOIDmode, x,
19597 gen_rtx_US_MINUS (mode, cop0, cop1)));
19600 cop1 = CONST0_RTX (mode);
19606 gcc_unreachable ();
19611 /* Allow the comparison to be done in one mode, but the movcc to
19612 happen in another mode. */
19613 if (data_mode == mode)
19615 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
19616 operands[1+negate], operands[2-negate]);
19620 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
19621 x = ix86_expand_sse_cmp (gen_lowpart (mode, operands[0]),
19623 operands[1+negate], operands[2-negate]);
19624 x = gen_lowpart (data_mode, x);
19627 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
19628 operands[2-negate]);
19632 /* Expand a variable vector permutation. */
19635 ix86_expand_vec_perm (rtx operands[])
19637 rtx target = operands[0];
19638 rtx op0 = operands[1];
19639 rtx op1 = operands[2];
19640 rtx mask = operands[3];
19641 rtx t1, t2, t3, t4, vt, vt2, vec[32];
19642 enum machine_mode mode = GET_MODE (op0);
19643 enum machine_mode maskmode = GET_MODE (mask);
19645 bool one_operand_shuffle = rtx_equal_p (op0, op1);
19647 /* Number of elements in the vector. */
19648 w = GET_MODE_NUNITS (mode);
19649 e = GET_MODE_UNIT_SIZE (mode);
19650 gcc_assert (w <= 32);
19654 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
19656 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
19657 an constant shuffle operand. With a tiny bit of effort we can
19658 use VPERMD instead. A re-interpretation stall for V4DFmode is
19659 unfortunate but there's no avoiding it.
19660 Similarly for V16HImode we don't have instructions for variable
19661 shuffling, while for V32QImode we can use after preparing suitable
19662 masks vpshufb; vpshufb; vpermq; vpor. */
19664 if (mode == V16HImode)
19666 maskmode = mode = V32QImode;
19672 maskmode = mode = V8SImode;
19676 t1 = gen_reg_rtx (maskmode);
19678 /* Replicate the low bits of the V4DImode mask into V8SImode:
19680 t1 = { A A B B C C D D }. */
19681 for (i = 0; i < w / 2; ++i)
19682 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
19683 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19684 vt = force_reg (maskmode, vt);
19685 mask = gen_lowpart (maskmode, mask);
19686 if (maskmode == V8SImode)
19687 emit_insn (gen_avx2_permvarv8si (t1, vt, mask));
19689 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
19691 /* Multiply the shuffle indicies by two. */
19692 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
19695 /* Add one to the odd shuffle indicies:
19696 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
19697 for (i = 0; i < w / 2; ++i)
19699 vec[i * 2] = const0_rtx;
19700 vec[i * 2 + 1] = const1_rtx;
19702 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19703 vt = force_const_mem (maskmode, vt);
19704 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
19707 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
19708 operands[3] = mask = t1;
19709 target = gen_lowpart (mode, target);
19710 op0 = gen_lowpart (mode, op0);
19711 op1 = gen_lowpart (mode, op1);
19717 /* The VPERMD and VPERMPS instructions already properly ignore
19718 the high bits of the shuffle elements. No need for us to
19719 perform an AND ourselves. */
19720 if (one_operand_shuffle)
19721 emit_insn (gen_avx2_permvarv8si (target, mask, op0));
19724 t1 = gen_reg_rtx (V8SImode);
19725 t2 = gen_reg_rtx (V8SImode);
19726 emit_insn (gen_avx2_permvarv8si (t1, mask, op0));
19727 emit_insn (gen_avx2_permvarv8si (t2, mask, op1));
19733 mask = gen_lowpart (V8SFmode, mask);
19734 if (one_operand_shuffle)
19735 emit_insn (gen_avx2_permvarv8sf (target, mask, op0));
19738 t1 = gen_reg_rtx (V8SFmode);
19739 t2 = gen_reg_rtx (V8SFmode);
19740 emit_insn (gen_avx2_permvarv8sf (t1, mask, op0));
19741 emit_insn (gen_avx2_permvarv8sf (t2, mask, op1));
19747 /* By combining the two 128-bit input vectors into one 256-bit
19748 input vector, we can use VPERMD and VPERMPS for the full
19749 two-operand shuffle. */
19750 t1 = gen_reg_rtx (V8SImode);
19751 t2 = gen_reg_rtx (V8SImode);
19752 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
19753 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
19754 emit_insn (gen_avx2_permvarv8si (t1, t2, t1));
19755 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
19759 t1 = gen_reg_rtx (V8SFmode);
19760 t2 = gen_reg_rtx (V8SFmode);
19761 mask = gen_lowpart (V4SFmode, mask);
19762 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
19763 emit_insn (gen_avx_vec_concatv8sf (t2, mask, mask));
19764 emit_insn (gen_avx2_permvarv8sf (t1, t2, t1));
19765 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
19769 t1 = gen_reg_rtx (V32QImode);
19770 t2 = gen_reg_rtx (V32QImode);
19771 t3 = gen_reg_rtx (V32QImode);
19772 vt2 = GEN_INT (128);
19773 for (i = 0; i < 32; i++)
19775 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
19776 vt = force_reg (V32QImode, vt);
19777 for (i = 0; i < 32; i++)
19778 vec[i] = i < 16 ? vt2 : const0_rtx;
19779 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
19780 vt2 = force_reg (V32QImode, vt2);
19781 /* From mask create two adjusted masks, which contain the same
19782 bits as mask in the low 7 bits of each vector element.
19783 The first mask will have the most significant bit clear
19784 if it requests element from the same 128-bit lane
19785 and MSB set if it requests element from the other 128-bit lane.
19786 The second mask will have the opposite values of the MSB,
19787 and additionally will have its 128-bit lanes swapped.
19788 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
19789 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
19790 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
19791 stands for other 12 bytes. */
19792 /* The bit whether element is from the same lane or the other
19793 lane is bit 4, so shift it up by 3 to the MSB position. */
19794 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode, t1),
19795 gen_lowpart (V4DImode, mask),
19797 /* Clear MSB bits from the mask just in case it had them set. */
19798 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
19799 /* After this t1 will have MSB set for elements from other lane. */
19800 emit_insn (gen_xorv32qi3 (t1, t1, vt2));
19801 /* Clear bits other than MSB. */
19802 emit_insn (gen_andv32qi3 (t1, t1, vt));
19803 /* Or in the lower bits from mask into t3. */
19804 emit_insn (gen_iorv32qi3 (t3, t1, t2));
19805 /* And invert MSB bits in t1, so MSB is set for elements from the same
19807 emit_insn (gen_xorv32qi3 (t1, t1, vt));
19808 /* Swap 128-bit lanes in t3. */
19809 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
19810 gen_lowpart (V4DImode, t3),
19811 const2_rtx, GEN_INT (3),
19812 const0_rtx, const1_rtx));
19813 /* And or in the lower bits from mask into t1. */
19814 emit_insn (gen_iorv32qi3 (t1, t1, t2));
19815 if (one_operand_shuffle)
19817 /* Each of these shuffles will put 0s in places where
19818 element from the other 128-bit lane is needed, otherwise
19819 will shuffle in the requested value. */
19820 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0, t3));
19821 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
19822 /* For t3 the 128-bit lanes are swapped again. */
19823 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
19824 gen_lowpart (V4DImode, t3),
19825 const2_rtx, GEN_INT (3),
19826 const0_rtx, const1_rtx));
19827 /* And oring both together leads to the result. */
19828 emit_insn (gen_iorv32qi3 (target, t1, t3));
19832 t4 = gen_reg_rtx (V32QImode);
19833 /* Similarly to the above one_operand_shuffle code,
19834 just for repeated twice for each operand. merge_two:
19835 code will merge the two results together. */
19836 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0, t3));
19837 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1, t3));
19838 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
19839 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
19840 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t4),
19841 gen_lowpart (V4DImode, t4),
19842 const2_rtx, GEN_INT (3),
19843 const0_rtx, const1_rtx));
19844 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
19845 gen_lowpart (V4DImode, t3),
19846 const2_rtx, GEN_INT (3),
19847 const0_rtx, const1_rtx));
19848 emit_insn (gen_iorv32qi3 (t4, t2, t4));
19849 emit_insn (gen_iorv32qi3 (t3, t1, t3));
19855 gcc_assert (GET_MODE_SIZE (mode) <= 16);
19862 /* The XOP VPPERM insn supports three inputs. By ignoring the
19863 one_operand_shuffle special case, we avoid creating another
19864 set of constant vectors in memory. */
19865 one_operand_shuffle = false;
19867 /* mask = mask & {2*w-1, ...} */
19868 vt = GEN_INT (2*w - 1);
19872 /* mask = mask & {w-1, ...} */
19873 vt = GEN_INT (w - 1);
19876 for (i = 0; i < w; i++)
19878 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19879 mask = expand_simple_binop (maskmode, AND, mask, vt,
19880 NULL_RTX, 0, OPTAB_DIRECT);
19882 /* For non-QImode operations, convert the word permutation control
19883 into a byte permutation control. */
19884 if (mode != V16QImode)
19886 mask = expand_simple_binop (maskmode, ASHIFT, mask,
19887 GEN_INT (exact_log2 (e)),
19888 NULL_RTX, 0, OPTAB_DIRECT);
19890 /* Convert mask to vector of chars. */
19891 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
19893 /* Replicate each of the input bytes into byte positions:
19894 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
19895 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
19896 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
19897 for (i = 0; i < 16; ++i)
19898 vec[i] = GEN_INT (i/e * e);
19899 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
19900 vt = force_const_mem (V16QImode, vt);
19902 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
19904 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
19906 /* Convert it into the byte positions by doing
19907 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
19908 for (i = 0; i < 16; ++i)
19909 vec[i] = GEN_INT (i % e);
19910 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
19911 vt = force_const_mem (V16QImode, vt);
19912 emit_insn (gen_addv16qi3 (mask, mask, vt));
19915 /* The actual shuffle operations all operate on V16QImode. */
19916 op0 = gen_lowpart (V16QImode, op0);
19917 op1 = gen_lowpart (V16QImode, op1);
19918 target = gen_lowpart (V16QImode, target);
19922 emit_insn (gen_xop_pperm (target, op0, op1, mask));
19924 else if (one_operand_shuffle)
19926 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
19933 /* Shuffle the two input vectors independently. */
19934 t1 = gen_reg_rtx (V16QImode);
19935 t2 = gen_reg_rtx (V16QImode);
19936 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
19937 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
19940 /* Then merge them together. The key is whether any given control
19941 element contained a bit set that indicates the second word. */
19942 mask = operands[3];
19944 if (maskmode == V2DImode && !TARGET_SSE4_1)
19946 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
19947 more shuffle to convert the V2DI input mask into a V4SI
19948 input mask. At which point the masking that expand_int_vcond
19949 will work as desired. */
19950 rtx t3 = gen_reg_rtx (V4SImode);
19951 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
19952 const0_rtx, const0_rtx,
19953 const2_rtx, const2_rtx));
19955 maskmode = V4SImode;
19959 for (i = 0; i < w; i++)
19961 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19962 vt = force_reg (maskmode, vt);
19963 mask = expand_simple_binop (maskmode, AND, mask, vt,
19964 NULL_RTX, 0, OPTAB_DIRECT);
19966 xops[0] = gen_lowpart (mode, operands[0]);
19967 xops[1] = gen_lowpart (mode, t2);
19968 xops[2] = gen_lowpart (mode, t1);
19969 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
19972 ok = ix86_expand_int_vcond (xops);
19977 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
19978 true if we should do zero extension, else sign extension. HIGH_P is
19979 true if we want the N/2 high elements, else the low elements. */
19982 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
19984 enum machine_mode imode = GET_MODE (operands[1]);
19989 rtx (*unpack)(rtx, rtx);
19990 rtx (*extract)(rtx, rtx) = NULL;
19991 enum machine_mode halfmode = BLKmode;
19997 unpack = gen_avx2_zero_extendv16qiv16hi2;
19999 unpack = gen_avx2_sign_extendv16qiv16hi2;
20000 halfmode = V16QImode;
20002 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
20006 unpack = gen_avx2_zero_extendv8hiv8si2;
20008 unpack = gen_avx2_sign_extendv8hiv8si2;
20009 halfmode = V8HImode;
20011 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
20015 unpack = gen_avx2_zero_extendv4siv4di2;
20017 unpack = gen_avx2_sign_extendv4siv4di2;
20018 halfmode = V4SImode;
20020 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
20024 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
20026 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
20030 unpack = gen_sse4_1_zero_extendv4hiv4si2;
20032 unpack = gen_sse4_1_sign_extendv4hiv4si2;
20036 unpack = gen_sse4_1_zero_extendv2siv2di2;
20038 unpack = gen_sse4_1_sign_extendv2siv2di2;
20041 gcc_unreachable ();
20044 if (GET_MODE_SIZE (imode) == 32)
20046 tmp = gen_reg_rtx (halfmode);
20047 emit_insn (extract (tmp, operands[1]));
20051 /* Shift higher 8 bytes to lower 8 bytes. */
20052 tmp = gen_reg_rtx (imode);
20053 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
20054 gen_lowpart (V1TImode, operands[1]),
20060 emit_insn (unpack (operands[0], tmp));
20064 rtx (*unpack)(rtx, rtx, rtx);
20070 unpack = gen_vec_interleave_highv16qi;
20072 unpack = gen_vec_interleave_lowv16qi;
20076 unpack = gen_vec_interleave_highv8hi;
20078 unpack = gen_vec_interleave_lowv8hi;
20082 unpack = gen_vec_interleave_highv4si;
20084 unpack = gen_vec_interleave_lowv4si;
20087 gcc_unreachable ();
20090 dest = gen_lowpart (imode, operands[0]);
20093 tmp = force_reg (imode, CONST0_RTX (imode));
20095 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
20096 operands[1], pc_rtx, pc_rtx);
20098 emit_insn (unpack (dest, operands[1], tmp));
20102 /* Expand conditional increment or decrement using adb/sbb instructions.
20103 The default case using setcc followed by the conditional move can be
20104 done by generic code. */
20106 ix86_expand_int_addcc (rtx operands[])
20108 enum rtx_code code = GET_CODE (operands[1]);
20110 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
20112 rtx val = const0_rtx;
20113 bool fpcmp = false;
20114 enum machine_mode mode;
20115 rtx op0 = XEXP (operands[1], 0);
20116 rtx op1 = XEXP (operands[1], 1);
20118 if (operands[3] != const1_rtx
20119 && operands[3] != constm1_rtx)
20121 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20123 code = GET_CODE (compare_op);
20125 flags = XEXP (compare_op, 0);
20127 if (GET_MODE (flags) == CCFPmode
20128 || GET_MODE (flags) == CCFPUmode)
20131 code = ix86_fp_compare_code_to_integer (code);
20138 PUT_CODE (compare_op,
20139 reverse_condition_maybe_unordered
20140 (GET_CODE (compare_op)));
20142 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
20145 mode = GET_MODE (operands[0]);
20147 /* Construct either adc or sbb insn. */
20148 if ((code == LTU) == (operands[3] == constm1_rtx))
20153 insn = gen_subqi3_carry;
20156 insn = gen_subhi3_carry;
20159 insn = gen_subsi3_carry;
20162 insn = gen_subdi3_carry;
20165 gcc_unreachable ();
20173 insn = gen_addqi3_carry;
20176 insn = gen_addhi3_carry;
20179 insn = gen_addsi3_carry;
20182 insn = gen_adddi3_carry;
20185 gcc_unreachable ();
20188 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
20194 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20195 but works for floating pointer parameters and nonoffsetable memories.
20196 For pushes, it returns just stack offsets; the values will be saved
20197 in the right order. Maximally three parts are generated. */
20200 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
20205 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
20207 size = (GET_MODE_SIZE (mode) + 4) / 8;
20209 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
20210 gcc_assert (size >= 2 && size <= 4);
20212 /* Optimize constant pool reference to immediates. This is used by fp
20213 moves, that force all constants to memory to allow combining. */
20214 if (MEM_P (operand) && MEM_READONLY_P (operand))
20216 rtx tmp = maybe_get_pool_constant (operand);
20221 if (MEM_P (operand) && !offsettable_memref_p (operand))
20223 /* The only non-offsetable memories we handle are pushes. */
20224 int ok = push_operand (operand, VOIDmode);
20228 operand = copy_rtx (operand);
20229 PUT_MODE (operand, Pmode);
20230 parts[0] = parts[1] = parts[2] = parts[3] = operand;
20234 if (GET_CODE (operand) == CONST_VECTOR)
20236 enum machine_mode imode = int_mode_for_mode (mode);
20237 /* Caution: if we looked through a constant pool memory above,
20238 the operand may actually have a different mode now. That's
20239 ok, since we want to pun this all the way back to an integer. */
20240 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
20241 gcc_assert (operand != NULL);
20247 if (mode == DImode)
20248 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
20253 if (REG_P (operand))
20255 gcc_assert (reload_completed);
20256 for (i = 0; i < size; i++)
20257 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
20259 else if (offsettable_memref_p (operand))
20261 operand = adjust_address (operand, SImode, 0);
20262 parts[0] = operand;
20263 for (i = 1; i < size; i++)
20264 parts[i] = adjust_address (operand, SImode, 4 * i);
20266 else if (GET_CODE (operand) == CONST_DOUBLE)
20271 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
20275 real_to_target (l, &r, mode);
20276 parts[3] = gen_int_mode (l[3], SImode);
20277 parts[2] = gen_int_mode (l[2], SImode);
20280 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
20281 parts[2] = gen_int_mode (l[2], SImode);
20284 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
20287 gcc_unreachable ();
20289 parts[1] = gen_int_mode (l[1], SImode);
20290 parts[0] = gen_int_mode (l[0], SImode);
20293 gcc_unreachable ();
20298 if (mode == TImode)
20299 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
20300 if (mode == XFmode || mode == TFmode)
20302 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
20303 if (REG_P (operand))
20305 gcc_assert (reload_completed);
20306 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
20307 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
20309 else if (offsettable_memref_p (operand))
20311 operand = adjust_address (operand, DImode, 0);
20312 parts[0] = operand;
20313 parts[1] = adjust_address (operand, upper_mode, 8);
20315 else if (GET_CODE (operand) == CONST_DOUBLE)
20320 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
20321 real_to_target (l, &r, mode);
20323 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20324 if (HOST_BITS_PER_WIDE_INT >= 64)
20327 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
20328 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
20331 parts[0] = immed_double_const (l[0], l[1], DImode);
20333 if (upper_mode == SImode)
20334 parts[1] = gen_int_mode (l[2], SImode);
20335 else if (HOST_BITS_PER_WIDE_INT >= 64)
20338 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
20339 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
20342 parts[1] = immed_double_const (l[2], l[3], DImode);
20345 gcc_unreachable ();
20352 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20353 Return false when normal moves are needed; true when all required
20354 insns have been emitted. Operands 2-4 contain the input values
20355 int the correct order; operands 5-7 contain the output values. */
20358 ix86_split_long_move (rtx operands[])
20363 int collisions = 0;
20364 enum machine_mode mode = GET_MODE (operands[0]);
20365 bool collisionparts[4];
20367 /* The DFmode expanders may ask us to move double.
20368 For 64bit target this is single move. By hiding the fact
20369 here we simplify i386.md splitters. */
20370 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
20372 /* Optimize constant pool reference to immediates. This is used by
20373 fp moves, that force all constants to memory to allow combining. */
20375 if (MEM_P (operands[1])
20376 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
20377 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
20378 operands[1] = get_pool_constant (XEXP (operands[1], 0));
20379 if (push_operand (operands[0], VOIDmode))
20381 operands[0] = copy_rtx (operands[0]);
20382 PUT_MODE (operands[0], Pmode);
20385 operands[0] = gen_lowpart (DImode, operands[0]);
20386 operands[1] = gen_lowpart (DImode, operands[1]);
20387 emit_move_insn (operands[0], operands[1]);
20391 /* The only non-offsettable memory we handle is push. */
20392 if (push_operand (operands[0], VOIDmode))
20395 gcc_assert (!MEM_P (operands[0])
20396 || offsettable_memref_p (operands[0]));
20398 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
20399 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
20401 /* When emitting push, take care for source operands on the stack. */
20402 if (push && MEM_P (operands[1])
20403 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
20405 rtx src_base = XEXP (part[1][nparts - 1], 0);
20407 /* Compensate for the stack decrement by 4. */
20408 if (!TARGET_64BIT && nparts == 3
20409 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
20410 src_base = plus_constant (src_base, 4);
20412 /* src_base refers to the stack pointer and is
20413 automatically decreased by emitted push. */
20414 for (i = 0; i < nparts; i++)
20415 part[1][i] = change_address (part[1][i],
20416 GET_MODE (part[1][i]), src_base);
20419 /* We need to do copy in the right order in case an address register
20420 of the source overlaps the destination. */
20421 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
20425 for (i = 0; i < nparts; i++)
20428 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
20429 if (collisionparts[i])
20433 /* Collision in the middle part can be handled by reordering. */
20434 if (collisions == 1 && nparts == 3 && collisionparts [1])
20436 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
20437 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
20439 else if (collisions == 1
20441 && (collisionparts [1] || collisionparts [2]))
20443 if (collisionparts [1])
20445 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
20446 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
20450 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
20451 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
20455 /* If there are more collisions, we can't handle it by reordering.
20456 Do an lea to the last part and use only one colliding move. */
20457 else if (collisions > 1)
20463 base = part[0][nparts - 1];
20465 /* Handle the case when the last part isn't valid for lea.
20466 Happens in 64-bit mode storing the 12-byte XFmode. */
20467 if (GET_MODE (base) != Pmode)
20468 base = gen_rtx_REG (Pmode, REGNO (base));
20470 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
20471 part[1][0] = replace_equiv_address (part[1][0], base);
20472 for (i = 1; i < nparts; i++)
20474 tmp = plus_constant (base, UNITS_PER_WORD * i);
20475 part[1][i] = replace_equiv_address (part[1][i], tmp);
20486 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
20487 emit_insn (gen_addsi3 (stack_pointer_rtx,
20488 stack_pointer_rtx, GEN_INT (-4)));
20489 emit_move_insn (part[0][2], part[1][2]);
20491 else if (nparts == 4)
20493 emit_move_insn (part[0][3], part[1][3]);
20494 emit_move_insn (part[0][2], part[1][2]);
20499 /* In 64bit mode we don't have 32bit push available. In case this is
20500 register, it is OK - we will just use larger counterpart. We also
20501 retype memory - these comes from attempt to avoid REX prefix on
20502 moving of second half of TFmode value. */
20503 if (GET_MODE (part[1][1]) == SImode)
20505 switch (GET_CODE (part[1][1]))
20508 part[1][1] = adjust_address (part[1][1], DImode, 0);
20512 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
20516 gcc_unreachable ();
20519 if (GET_MODE (part[1][0]) == SImode)
20520 part[1][0] = part[1][1];
20523 emit_move_insn (part[0][1], part[1][1]);
20524 emit_move_insn (part[0][0], part[1][0]);
20528 /* Choose correct order to not overwrite the source before it is copied. */
20529 if ((REG_P (part[0][0])
20530 && REG_P (part[1][1])
20531 && (REGNO (part[0][0]) == REGNO (part[1][1])
20533 && REGNO (part[0][0]) == REGNO (part[1][2]))
20535 && REGNO (part[0][0]) == REGNO (part[1][3]))))
20537 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
20539 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
20541 operands[2 + i] = part[0][j];
20542 operands[6 + i] = part[1][j];
20547 for (i = 0; i < nparts; i++)
20549 operands[2 + i] = part[0][i];
20550 operands[6 + i] = part[1][i];
20554 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20555 if (optimize_insn_for_size_p ())
20557 for (j = 0; j < nparts - 1; j++)
20558 if (CONST_INT_P (operands[6 + j])
20559 && operands[6 + j] != const0_rtx
20560 && REG_P (operands[2 + j]))
20561 for (i = j; i < nparts - 1; i++)
20562 if (CONST_INT_P (operands[7 + i])
20563 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
20564 operands[7 + i] = operands[2 + j];
20567 for (i = 0; i < nparts; i++)
20568 emit_move_insn (operands[2 + i], operands[6 + i]);
20573 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20574 left shift by a constant, either using a single shift or
20575 a sequence of add instructions. */
20578 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
20580 rtx (*insn)(rtx, rtx, rtx);
20583 || (count * ix86_cost->add <= ix86_cost->shift_const
20584 && !optimize_insn_for_size_p ()))
20586 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
20587 while (count-- > 0)
20588 emit_insn (insn (operand, operand, operand));
20592 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20593 emit_insn (insn (operand, operand, GEN_INT (count)));
20598 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
20600 rtx (*gen_ashl3)(rtx, rtx, rtx);
20601 rtx (*gen_shld)(rtx, rtx, rtx);
20602 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20604 rtx low[2], high[2];
20607 if (CONST_INT_P (operands[2]))
20609 split_double_mode (mode, operands, 2, low, high);
20610 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20612 if (count >= half_width)
20614 emit_move_insn (high[0], low[1]);
20615 emit_move_insn (low[0], const0_rtx);
20617 if (count > half_width)
20618 ix86_expand_ashl_const (high[0], count - half_width, mode);
20622 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
20624 if (!rtx_equal_p (operands[0], operands[1]))
20625 emit_move_insn (operands[0], operands[1]);
20627 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
20628 ix86_expand_ashl_const (low[0], count, mode);
20633 split_double_mode (mode, operands, 1, low, high);
20635 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20637 if (operands[1] == const1_rtx)
20639 /* Assuming we've chosen a QImode capable registers, then 1 << N
20640 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20641 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
20643 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
20645 ix86_expand_clear (low[0]);
20646 ix86_expand_clear (high[0]);
20647 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
20649 d = gen_lowpart (QImode, low[0]);
20650 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20651 s = gen_rtx_EQ (QImode, flags, const0_rtx);
20652 emit_insn (gen_rtx_SET (VOIDmode, d, s));
20654 d = gen_lowpart (QImode, high[0]);
20655 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20656 s = gen_rtx_NE (QImode, flags, const0_rtx);
20657 emit_insn (gen_rtx_SET (VOIDmode, d, s));
20660 /* Otherwise, we can get the same results by manually performing
20661 a bit extract operation on bit 5/6, and then performing the two
20662 shifts. The two methods of getting 0/1 into low/high are exactly
20663 the same size. Avoiding the shift in the bit extract case helps
20664 pentium4 a bit; no one else seems to care much either way. */
20667 enum machine_mode half_mode;
20668 rtx (*gen_lshr3)(rtx, rtx, rtx);
20669 rtx (*gen_and3)(rtx, rtx, rtx);
20670 rtx (*gen_xor3)(rtx, rtx, rtx);
20671 HOST_WIDE_INT bits;
20674 if (mode == DImode)
20676 half_mode = SImode;
20677 gen_lshr3 = gen_lshrsi3;
20678 gen_and3 = gen_andsi3;
20679 gen_xor3 = gen_xorsi3;
20684 half_mode = DImode;
20685 gen_lshr3 = gen_lshrdi3;
20686 gen_and3 = gen_anddi3;
20687 gen_xor3 = gen_xordi3;
20691 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
20692 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
20694 x = gen_lowpart (half_mode, operands[2]);
20695 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
20697 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
20698 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
20699 emit_move_insn (low[0], high[0]);
20700 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
20703 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
20704 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
20708 if (operands[1] == constm1_rtx)
20710 /* For -1 << N, we can avoid the shld instruction, because we
20711 know that we're shifting 0...31/63 ones into a -1. */
20712 emit_move_insn (low[0], constm1_rtx);
20713 if (optimize_insn_for_size_p ())
20714 emit_move_insn (high[0], low[0]);
20716 emit_move_insn (high[0], constm1_rtx);
20720 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
20722 if (!rtx_equal_p (operands[0], operands[1]))
20723 emit_move_insn (operands[0], operands[1]);
20725 split_double_mode (mode, operands, 1, low, high);
20726 emit_insn (gen_shld (high[0], low[0], operands[2]));
20729 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
20731 if (TARGET_CMOVE && scratch)
20733 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20734 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20736 ix86_expand_clear (scratch);
20737 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
20741 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
20742 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
20744 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
20749 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
20751 rtx (*gen_ashr3)(rtx, rtx, rtx)
20752 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
20753 rtx (*gen_shrd)(rtx, rtx, rtx);
20754 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20756 rtx low[2], high[2];
20759 if (CONST_INT_P (operands[2]))
20761 split_double_mode (mode, operands, 2, low, high);
20762 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20764 if (count == GET_MODE_BITSIZE (mode) - 1)
20766 emit_move_insn (high[0], high[1]);
20767 emit_insn (gen_ashr3 (high[0], high[0],
20768 GEN_INT (half_width - 1)));
20769 emit_move_insn (low[0], high[0]);
20772 else if (count >= half_width)
20774 emit_move_insn (low[0], high[1]);
20775 emit_move_insn (high[0], low[0]);
20776 emit_insn (gen_ashr3 (high[0], high[0],
20777 GEN_INT (half_width - 1)));
20779 if (count > half_width)
20780 emit_insn (gen_ashr3 (low[0], low[0],
20781 GEN_INT (count - half_width)));
20785 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20787 if (!rtx_equal_p (operands[0], operands[1]))
20788 emit_move_insn (operands[0], operands[1]);
20790 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
20791 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
20796 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20798 if (!rtx_equal_p (operands[0], operands[1]))
20799 emit_move_insn (operands[0], operands[1]);
20801 split_double_mode (mode, operands, 1, low, high);
20803 emit_insn (gen_shrd (low[0], high[0], operands[2]));
20804 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
20806 if (TARGET_CMOVE && scratch)
20808 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20809 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20811 emit_move_insn (scratch, high[0]);
20812 emit_insn (gen_ashr3 (scratch, scratch,
20813 GEN_INT (half_width - 1)));
20814 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
20819 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
20820 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
20822 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
20828 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
20830 rtx (*gen_lshr3)(rtx, rtx, rtx)
20831 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
20832 rtx (*gen_shrd)(rtx, rtx, rtx);
20833 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20835 rtx low[2], high[2];
20838 if (CONST_INT_P (operands[2]))
20840 split_double_mode (mode, operands, 2, low, high);
20841 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20843 if (count >= half_width)
20845 emit_move_insn (low[0], high[1]);
20846 ix86_expand_clear (high[0]);
20848 if (count > half_width)
20849 emit_insn (gen_lshr3 (low[0], low[0],
20850 GEN_INT (count - half_width)));
20854 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20856 if (!rtx_equal_p (operands[0], operands[1]))
20857 emit_move_insn (operands[0], operands[1]);
20859 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
20860 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
20865 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20867 if (!rtx_equal_p (operands[0], operands[1]))
20868 emit_move_insn (operands[0], operands[1]);
20870 split_double_mode (mode, operands, 1, low, high);
20872 emit_insn (gen_shrd (low[0], high[0], operands[2]));
20873 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
20875 if (TARGET_CMOVE && scratch)
20877 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20878 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20880 ix86_expand_clear (scratch);
20881 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
20886 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
20887 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
20889 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
20894 /* Predict just emitted jump instruction to be taken with probability PROB. */
20896 predict_jump (int prob)
20898 rtx insn = get_last_insn ();
20899 gcc_assert (JUMP_P (insn));
20900 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
20903 /* Helper function for the string operations below. Dest VARIABLE whether
20904 it is aligned to VALUE bytes. If true, jump to the label. */
20906 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
20908 rtx label = gen_label_rtx ();
20909 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
20910 if (GET_MODE (variable) == DImode)
20911 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
20913 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
20914 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
20917 predict_jump (REG_BR_PROB_BASE * 50 / 100);
20919 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20923 /* Adjust COUNTER by the VALUE. */
20925 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
20927 rtx (*gen_add)(rtx, rtx, rtx)
20928 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
20930 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
20933 /* Zero extend possibly SImode EXP to Pmode register. */
20935 ix86_zero_extend_to_Pmode (rtx exp)
20938 if (GET_MODE (exp) == VOIDmode)
20939 return force_reg (Pmode, exp);
20940 if (GET_MODE (exp) == Pmode)
20941 return copy_to_mode_reg (Pmode, exp);
20942 r = gen_reg_rtx (Pmode);
20943 emit_insn (gen_zero_extendsidi2 (r, exp));
20947 /* Divide COUNTREG by SCALE. */
20949 scale_counter (rtx countreg, int scale)
20955 if (CONST_INT_P (countreg))
20956 return GEN_INT (INTVAL (countreg) / scale);
20957 gcc_assert (REG_P (countreg));
20959 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
20960 GEN_INT (exact_log2 (scale)),
20961 NULL, 1, OPTAB_DIRECT);
20965 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
20966 DImode for constant loop counts. */
20968 static enum machine_mode
20969 counter_mode (rtx count_exp)
20971 if (GET_MODE (count_exp) != VOIDmode)
20972 return GET_MODE (count_exp);
20973 if (!CONST_INT_P (count_exp))
20975 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
20980 /* When SRCPTR is non-NULL, output simple loop to move memory
20981 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
20982 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
20983 equivalent loop to set memory by VALUE (supposed to be in MODE).
20985 The size is rounded down to whole number of chunk size moved at once.
20986 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
20990 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
20991 rtx destptr, rtx srcptr, rtx value,
20992 rtx count, enum machine_mode mode, int unroll,
20995 rtx out_label, top_label, iter, tmp;
20996 enum machine_mode iter_mode = counter_mode (count);
20997 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
20998 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
21004 top_label = gen_label_rtx ();
21005 out_label = gen_label_rtx ();
21006 iter = gen_reg_rtx (iter_mode);
21008 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
21009 NULL, 1, OPTAB_DIRECT);
21010 /* Those two should combine. */
21011 if (piece_size == const1_rtx)
21013 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
21015 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21017 emit_move_insn (iter, const0_rtx);
21019 emit_label (top_label);
21021 tmp = convert_modes (Pmode, iter_mode, iter, true);
21022 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
21023 destmem = change_address (destmem, mode, x_addr);
21027 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
21028 srcmem = change_address (srcmem, mode, y_addr);
21030 /* When unrolling for chips that reorder memory reads and writes,
21031 we can save registers by using single temporary.
21032 Also using 4 temporaries is overkill in 32bit mode. */
21033 if (!TARGET_64BIT && 0)
21035 for (i = 0; i < unroll; i++)
21040 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21042 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21044 emit_move_insn (destmem, srcmem);
21050 gcc_assert (unroll <= 4);
21051 for (i = 0; i < unroll; i++)
21053 tmpreg[i] = gen_reg_rtx (mode);
21057 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21059 emit_move_insn (tmpreg[i], srcmem);
21061 for (i = 0; i < unroll; i++)
21066 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21068 emit_move_insn (destmem, tmpreg[i]);
21073 for (i = 0; i < unroll; i++)
21077 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21078 emit_move_insn (destmem, value);
21081 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
21082 true, OPTAB_LIB_WIDEN);
21084 emit_move_insn (iter, tmp);
21086 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
21088 if (expected_size != -1)
21090 expected_size /= GET_MODE_SIZE (mode) * unroll;
21091 if (expected_size == 0)
21093 else if (expected_size > REG_BR_PROB_BASE)
21094 predict_jump (REG_BR_PROB_BASE - 1);
21096 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
21099 predict_jump (REG_BR_PROB_BASE * 80 / 100);
21100 iter = ix86_zero_extend_to_Pmode (iter);
21101 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
21102 true, OPTAB_LIB_WIDEN);
21103 if (tmp != destptr)
21104 emit_move_insn (destptr, tmp);
21107 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
21108 true, OPTAB_LIB_WIDEN);
21110 emit_move_insn (srcptr, tmp);
21112 emit_label (out_label);
21115 /* Output "rep; mov" instruction.
21116 Arguments have same meaning as for previous function */
21118 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
21119 rtx destptr, rtx srcptr,
21121 enum machine_mode mode)
21126 HOST_WIDE_INT rounded_count;
21128 /* If the size is known, it is shorter to use rep movs. */
21129 if (mode == QImode && CONST_INT_P (count)
21130 && !(INTVAL (count) & 3))
21133 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
21134 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
21135 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
21136 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
21137 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
21138 if (mode != QImode)
21140 destexp = gen_rtx_ASHIFT (Pmode, countreg,
21141 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21142 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
21143 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
21144 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21145 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
21149 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
21150 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
21152 if (CONST_INT_P (count))
21154 rounded_count = (INTVAL (count)
21155 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
21156 destmem = shallow_copy_rtx (destmem);
21157 srcmem = shallow_copy_rtx (srcmem);
21158 set_mem_size (destmem, rounded_count);
21159 set_mem_size (srcmem, rounded_count);
21163 if (MEM_SIZE_KNOWN_P (destmem))
21164 clear_mem_size (destmem);
21165 if (MEM_SIZE_KNOWN_P (srcmem))
21166 clear_mem_size (srcmem);
21168 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
21172 /* Output "rep; stos" instruction.
21173 Arguments have same meaning as for previous function */
21175 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
21176 rtx count, enum machine_mode mode,
21181 HOST_WIDE_INT rounded_count;
21183 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
21184 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
21185 value = force_reg (mode, gen_lowpart (mode, value));
21186 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
21187 if (mode != QImode)
21189 destexp = gen_rtx_ASHIFT (Pmode, countreg,
21190 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21191 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
21194 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
21195 if (orig_value == const0_rtx && CONST_INT_P (count))
21197 rounded_count = (INTVAL (count)
21198 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
21199 destmem = shallow_copy_rtx (destmem);
21200 set_mem_size (destmem, rounded_count);
21202 else if (MEM_SIZE_KNOWN_P (destmem))
21203 clear_mem_size (destmem);
21204 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
21208 emit_strmov (rtx destmem, rtx srcmem,
21209 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
21211 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
21212 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
21213 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21216 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21218 expand_movmem_epilogue (rtx destmem, rtx srcmem,
21219 rtx destptr, rtx srcptr, rtx count, int max_size)
21222 if (CONST_INT_P (count))
21224 HOST_WIDE_INT countval = INTVAL (count);
21227 if ((countval & 0x10) && max_size > 16)
21231 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
21232 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
21235 gcc_unreachable ();
21238 if ((countval & 0x08) && max_size > 8)
21241 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
21244 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
21245 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
21249 if ((countval & 0x04) && max_size > 4)
21251 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
21254 if ((countval & 0x02) && max_size > 2)
21256 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
21259 if ((countval & 0x01) && max_size > 1)
21261 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
21268 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
21269 count, 1, OPTAB_DIRECT);
21270 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
21271 count, QImode, 1, 4);
21275 /* When there are stringops, we can cheaply increase dest and src pointers.
21276 Otherwise we save code size by maintaining offset (zero is readily
21277 available from preceding rep operation) and using x86 addressing modes.
21279 if (TARGET_SINGLE_STRINGOP)
21283 rtx label = ix86_expand_aligntest (count, 4, true);
21284 src = change_address (srcmem, SImode, srcptr);
21285 dest = change_address (destmem, SImode, destptr);
21286 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21287 emit_label (label);
21288 LABEL_NUSES (label) = 1;
21292 rtx label = ix86_expand_aligntest (count, 2, true);
21293 src = change_address (srcmem, HImode, srcptr);
21294 dest = change_address (destmem, HImode, destptr);
21295 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21296 emit_label (label);
21297 LABEL_NUSES (label) = 1;
21301 rtx label = ix86_expand_aligntest (count, 1, true);
21302 src = change_address (srcmem, QImode, srcptr);
21303 dest = change_address (destmem, QImode, destptr);
21304 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21305 emit_label (label);
21306 LABEL_NUSES (label) = 1;
21311 rtx offset = force_reg (Pmode, const0_rtx);
21316 rtx label = ix86_expand_aligntest (count, 4, true);
21317 src = change_address (srcmem, SImode, srcptr);
21318 dest = change_address (destmem, SImode, destptr);
21319 emit_move_insn (dest, src);
21320 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
21321 true, OPTAB_LIB_WIDEN);
21323 emit_move_insn (offset, tmp);
21324 emit_label (label);
21325 LABEL_NUSES (label) = 1;
21329 rtx label = ix86_expand_aligntest (count, 2, true);
21330 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
21331 src = change_address (srcmem, HImode, tmp);
21332 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
21333 dest = change_address (destmem, HImode, tmp);
21334 emit_move_insn (dest, src);
21335 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
21336 true, OPTAB_LIB_WIDEN);
21338 emit_move_insn (offset, tmp);
21339 emit_label (label);
21340 LABEL_NUSES (label) = 1;
21344 rtx label = ix86_expand_aligntest (count, 1, true);
21345 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
21346 src = change_address (srcmem, QImode, tmp);
21347 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
21348 dest = change_address (destmem, QImode, tmp);
21349 emit_move_insn (dest, src);
21350 emit_label (label);
21351 LABEL_NUSES (label) = 1;
21356 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21358 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
21359 rtx count, int max_size)
21362 expand_simple_binop (counter_mode (count), AND, count,
21363 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
21364 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
21365 gen_lowpart (QImode, value), count, QImode,
21369 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21371 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
21375 if (CONST_INT_P (count))
21377 HOST_WIDE_INT countval = INTVAL (count);
21380 if ((countval & 0x10) && max_size > 16)
21384 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
21385 emit_insn (gen_strset (destptr, dest, value));
21386 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
21387 emit_insn (gen_strset (destptr, dest, value));
21390 gcc_unreachable ();
21393 if ((countval & 0x08) && max_size > 8)
21397 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
21398 emit_insn (gen_strset (destptr, dest, value));
21402 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
21403 emit_insn (gen_strset (destptr, dest, value));
21404 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
21405 emit_insn (gen_strset (destptr, dest, value));
21409 if ((countval & 0x04) && max_size > 4)
21411 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
21412 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
21415 if ((countval & 0x02) && max_size > 2)
21417 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
21418 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
21421 if ((countval & 0x01) && max_size > 1)
21423 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
21424 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
21431 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
21436 rtx label = ix86_expand_aligntest (count, 16, true);
21439 dest = change_address (destmem, DImode, destptr);
21440 emit_insn (gen_strset (destptr, dest, value));
21441 emit_insn (gen_strset (destptr, dest, value));
21445 dest = change_address (destmem, SImode, destptr);
21446 emit_insn (gen_strset (destptr, dest, value));
21447 emit_insn (gen_strset (destptr, dest, value));
21448 emit_insn (gen_strset (destptr, dest, value));
21449 emit_insn (gen_strset (destptr, dest, value));
21451 emit_label (label);
21452 LABEL_NUSES (label) = 1;
21456 rtx label = ix86_expand_aligntest (count, 8, true);
21459 dest = change_address (destmem, DImode, destptr);
21460 emit_insn (gen_strset (destptr, dest, value));
21464 dest = change_address (destmem, SImode, destptr);
21465 emit_insn (gen_strset (destptr, dest, value));
21466 emit_insn (gen_strset (destptr, dest, value));
21468 emit_label (label);
21469 LABEL_NUSES (label) = 1;
21473 rtx label = ix86_expand_aligntest (count, 4, true);
21474 dest = change_address (destmem, SImode, destptr);
21475 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
21476 emit_label (label);
21477 LABEL_NUSES (label) = 1;
21481 rtx label = ix86_expand_aligntest (count, 2, true);
21482 dest = change_address (destmem, HImode, destptr);
21483 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
21484 emit_label (label);
21485 LABEL_NUSES (label) = 1;
21489 rtx label = ix86_expand_aligntest (count, 1, true);
21490 dest = change_address (destmem, QImode, destptr);
21491 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
21492 emit_label (label);
21493 LABEL_NUSES (label) = 1;
21497 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21498 DESIRED_ALIGNMENT. */
21500 expand_movmem_prologue (rtx destmem, rtx srcmem,
21501 rtx destptr, rtx srcptr, rtx count,
21502 int align, int desired_alignment)
21504 if (align <= 1 && desired_alignment > 1)
21506 rtx label = ix86_expand_aligntest (destptr, 1, false);
21507 srcmem = change_address (srcmem, QImode, srcptr);
21508 destmem = change_address (destmem, QImode, destptr);
21509 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21510 ix86_adjust_counter (count, 1);
21511 emit_label (label);
21512 LABEL_NUSES (label) = 1;
21514 if (align <= 2 && desired_alignment > 2)
21516 rtx label = ix86_expand_aligntest (destptr, 2, false);
21517 srcmem = change_address (srcmem, HImode, srcptr);
21518 destmem = change_address (destmem, HImode, destptr);
21519 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21520 ix86_adjust_counter (count, 2);
21521 emit_label (label);
21522 LABEL_NUSES (label) = 1;
21524 if (align <= 4 && desired_alignment > 4)
21526 rtx label = ix86_expand_aligntest (destptr, 4, false);
21527 srcmem = change_address (srcmem, SImode, srcptr);
21528 destmem = change_address (destmem, SImode, destptr);
21529 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21530 ix86_adjust_counter (count, 4);
21531 emit_label (label);
21532 LABEL_NUSES (label) = 1;
21534 gcc_assert (desired_alignment <= 8);
21537 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21538 ALIGN_BYTES is how many bytes need to be copied. */
21540 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
21541 int desired_align, int align_bytes)
21544 rtx orig_dst = dst;
21545 rtx orig_src = src;
21547 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
21548 if (src_align_bytes >= 0)
21549 src_align_bytes = desired_align - src_align_bytes;
21550 if (align_bytes & 1)
21552 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
21553 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
21555 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21557 if (align_bytes & 2)
21559 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
21560 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
21561 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
21562 set_mem_align (dst, 2 * BITS_PER_UNIT);
21563 if (src_align_bytes >= 0
21564 && (src_align_bytes & 1) == (align_bytes & 1)
21565 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
21566 set_mem_align (src, 2 * BITS_PER_UNIT);
21568 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21570 if (align_bytes & 4)
21572 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21573 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
21574 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
21575 set_mem_align (dst, 4 * BITS_PER_UNIT);
21576 if (src_align_bytes >= 0)
21578 unsigned int src_align = 0;
21579 if ((src_align_bytes & 3) == (align_bytes & 3))
21581 else if ((src_align_bytes & 1) == (align_bytes & 1))
21583 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21584 set_mem_align (src, src_align * BITS_PER_UNIT);
21587 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21589 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
21590 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
21591 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
21592 set_mem_align (dst, desired_align * BITS_PER_UNIT);
21593 if (src_align_bytes >= 0)
21595 unsigned int src_align = 0;
21596 if ((src_align_bytes & 7) == (align_bytes & 7))
21598 else if ((src_align_bytes & 3) == (align_bytes & 3))
21600 else if ((src_align_bytes & 1) == (align_bytes & 1))
21602 if (src_align > (unsigned int) desired_align)
21603 src_align = desired_align;
21604 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21605 set_mem_align (src, src_align * BITS_PER_UNIT);
21607 if (MEM_SIZE_KNOWN_P (orig_dst))
21608 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
21609 if (MEM_SIZE_KNOWN_P (orig_src))
21610 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
21615 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21616 DESIRED_ALIGNMENT. */
21618 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
21619 int align, int desired_alignment)
21621 if (align <= 1 && desired_alignment > 1)
21623 rtx label = ix86_expand_aligntest (destptr, 1, false);
21624 destmem = change_address (destmem, QImode, destptr);
21625 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
21626 ix86_adjust_counter (count, 1);
21627 emit_label (label);
21628 LABEL_NUSES (label) = 1;
21630 if (align <= 2 && desired_alignment > 2)
21632 rtx label = ix86_expand_aligntest (destptr, 2, false);
21633 destmem = change_address (destmem, HImode, destptr);
21634 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
21635 ix86_adjust_counter (count, 2);
21636 emit_label (label);
21637 LABEL_NUSES (label) = 1;
21639 if (align <= 4 && desired_alignment > 4)
21641 rtx label = ix86_expand_aligntest (destptr, 4, false);
21642 destmem = change_address (destmem, SImode, destptr);
21643 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
21644 ix86_adjust_counter (count, 4);
21645 emit_label (label);
21646 LABEL_NUSES (label) = 1;
21648 gcc_assert (desired_alignment <= 8);
21651 /* Set enough from DST to align DST known to by aligned by ALIGN to
21652 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
21654 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
21655 int desired_align, int align_bytes)
21658 rtx orig_dst = dst;
21659 if (align_bytes & 1)
21661 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
21663 emit_insn (gen_strset (destreg, dst,
21664 gen_lowpart (QImode, value)));
21666 if (align_bytes & 2)
21668 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
21669 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
21670 set_mem_align (dst, 2 * BITS_PER_UNIT);
21672 emit_insn (gen_strset (destreg, dst,
21673 gen_lowpart (HImode, value)));
21675 if (align_bytes & 4)
21677 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21678 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
21679 set_mem_align (dst, 4 * BITS_PER_UNIT);
21681 emit_insn (gen_strset (destreg, dst,
21682 gen_lowpart (SImode, value)));
21684 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
21685 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
21686 set_mem_align (dst, desired_align * BITS_PER_UNIT);
21687 if (MEM_SIZE_KNOWN_P (orig_dst))
21688 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
21692 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
21693 static enum stringop_alg
21694 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
21695 int *dynamic_check)
21697 const struct stringop_algs * algs;
21698 bool optimize_for_speed;
21699 /* Algorithms using the rep prefix want at least edi and ecx;
21700 additionally, memset wants eax and memcpy wants esi. Don't
21701 consider such algorithms if the user has appropriated those
21702 registers for their own purposes. */
21703 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
21705 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
21707 #define ALG_USABLE_P(alg) (rep_prefix_usable \
21708 || (alg != rep_prefix_1_byte \
21709 && alg != rep_prefix_4_byte \
21710 && alg != rep_prefix_8_byte))
21711 const struct processor_costs *cost;
21713 /* Even if the string operation call is cold, we still might spend a lot
21714 of time processing large blocks. */
21715 if (optimize_function_for_size_p (cfun)
21716 || (optimize_insn_for_size_p ()
21717 && expected_size != -1 && expected_size < 256))
21718 optimize_for_speed = false;
21720 optimize_for_speed = true;
21722 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
21724 *dynamic_check = -1;
21726 algs = &cost->memset[TARGET_64BIT != 0];
21728 algs = &cost->memcpy[TARGET_64BIT != 0];
21729 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
21730 return ix86_stringop_alg;
21731 /* rep; movq or rep; movl is the smallest variant. */
21732 else if (!optimize_for_speed)
21734 if (!count || (count & 3))
21735 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
21737 return rep_prefix_usable ? rep_prefix_4_byte : loop;
21739 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
21741 else if (expected_size != -1 && expected_size < 4)
21742 return loop_1_byte;
21743 else if (expected_size != -1)
21746 enum stringop_alg alg = libcall;
21747 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
21749 /* We get here if the algorithms that were not libcall-based
21750 were rep-prefix based and we are unable to use rep prefixes
21751 based on global register usage. Break out of the loop and
21752 use the heuristic below. */
21753 if (algs->size[i].max == 0)
21755 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
21757 enum stringop_alg candidate = algs->size[i].alg;
21759 if (candidate != libcall && ALG_USABLE_P (candidate))
21761 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
21762 last non-libcall inline algorithm. */
21763 if (TARGET_INLINE_ALL_STRINGOPS)
21765 /* When the current size is best to be copied by a libcall,
21766 but we are still forced to inline, run the heuristic below
21767 that will pick code for medium sized blocks. */
21768 if (alg != libcall)
21772 else if (ALG_USABLE_P (candidate))
21776 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
21778 /* When asked to inline the call anyway, try to pick meaningful choice.
21779 We look for maximal size of block that is faster to copy by hand and
21780 take blocks of at most of that size guessing that average size will
21781 be roughly half of the block.
21783 If this turns out to be bad, we might simply specify the preferred
21784 choice in ix86_costs. */
21785 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
21786 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
21789 enum stringop_alg alg;
21791 bool any_alg_usable_p = true;
21793 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
21795 enum stringop_alg candidate = algs->size[i].alg;
21796 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
21798 if (candidate != libcall && candidate
21799 && ALG_USABLE_P (candidate))
21800 max = algs->size[i].max;
21802 /* If there aren't any usable algorithms, then recursing on
21803 smaller sizes isn't going to find anything. Just return the
21804 simple byte-at-a-time copy loop. */
21805 if (!any_alg_usable_p)
21807 /* Pick something reasonable. */
21808 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
21809 *dynamic_check = 128;
21810 return loop_1_byte;
21814 alg = decide_alg (count, max / 2, memset, dynamic_check);
21815 gcc_assert (*dynamic_check == -1);
21816 gcc_assert (alg != libcall);
21817 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
21818 *dynamic_check = max;
21821 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
21822 #undef ALG_USABLE_P
21825 /* Decide on alignment. We know that the operand is already aligned to ALIGN
21826 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
21828 decide_alignment (int align,
21829 enum stringop_alg alg,
21832 int desired_align = 0;
21836 gcc_unreachable ();
21838 case unrolled_loop:
21839 desired_align = GET_MODE_SIZE (Pmode);
21841 case rep_prefix_8_byte:
21844 case rep_prefix_4_byte:
21845 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21846 copying whole cacheline at once. */
21847 if (TARGET_PENTIUMPRO)
21852 case rep_prefix_1_byte:
21853 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21854 copying whole cacheline at once. */
21855 if (TARGET_PENTIUMPRO)
21869 if (desired_align < align)
21870 desired_align = align;
21871 if (expected_size != -1 && expected_size < 4)
21872 desired_align = align;
21873 return desired_align;
21876 /* Return the smallest power of 2 greater than VAL. */
21878 smallest_pow2_greater_than (int val)
21886 /* Expand string move (memcpy) operation. Use i386 string operations
21887 when profitable. expand_setmem contains similar code. The code
21888 depends upon architecture, block size and alignment, but always has
21889 the same overall structure:
21891 1) Prologue guard: Conditional that jumps up to epilogues for small
21892 blocks that can be handled by epilogue alone. This is faster
21893 but also needed for correctness, since prologue assume the block
21894 is larger than the desired alignment.
21896 Optional dynamic check for size and libcall for large
21897 blocks is emitted here too, with -minline-stringops-dynamically.
21899 2) Prologue: copy first few bytes in order to get destination
21900 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
21901 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
21902 copied. We emit either a jump tree on power of two sized
21903 blocks, or a byte loop.
21905 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
21906 with specified algorithm.
21908 4) Epilogue: code copying tail of the block that is too small to be
21909 handled by main body (or up to size guarded by prologue guard). */
21912 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
21913 rtx expected_align_exp, rtx expected_size_exp)
21919 rtx jump_around_label = NULL;
21920 HOST_WIDE_INT align = 1;
21921 unsigned HOST_WIDE_INT count = 0;
21922 HOST_WIDE_INT expected_size = -1;
21923 int size_needed = 0, epilogue_size_needed;
21924 int desired_align = 0, align_bytes = 0;
21925 enum stringop_alg alg;
21927 bool need_zero_guard = false;
21929 if (CONST_INT_P (align_exp))
21930 align = INTVAL (align_exp);
21931 /* i386 can do misaligned access on reasonably increased cost. */
21932 if (CONST_INT_P (expected_align_exp)
21933 && INTVAL (expected_align_exp) > align)
21934 align = INTVAL (expected_align_exp);
21935 /* ALIGN is the minimum of destination and source alignment, but we care here
21936 just about destination alignment. */
21937 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
21938 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
21940 if (CONST_INT_P (count_exp))
21941 count = expected_size = INTVAL (count_exp);
21942 if (CONST_INT_P (expected_size_exp) && count == 0)
21943 expected_size = INTVAL (expected_size_exp);
21945 /* Make sure we don't need to care about overflow later on. */
21946 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21949 /* Step 0: Decide on preferred algorithm, desired alignment and
21950 size of chunks to be copied by main loop. */
21952 alg = decide_alg (count, expected_size, false, &dynamic_check);
21953 desired_align = decide_alignment (align, alg, expected_size);
21955 if (!TARGET_ALIGN_STRINGOPS)
21956 align = desired_align;
21958 if (alg == libcall)
21960 gcc_assert (alg != no_stringop);
21962 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
21963 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21964 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
21969 gcc_unreachable ();
21971 need_zero_guard = true;
21972 size_needed = GET_MODE_SIZE (Pmode);
21974 case unrolled_loop:
21975 need_zero_guard = true;
21976 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
21978 case rep_prefix_8_byte:
21981 case rep_prefix_4_byte:
21984 case rep_prefix_1_byte:
21988 need_zero_guard = true;
21993 epilogue_size_needed = size_needed;
21995 /* Step 1: Prologue guard. */
21997 /* Alignment code needs count to be in register. */
21998 if (CONST_INT_P (count_exp) && desired_align > align)
22000 if (INTVAL (count_exp) > desired_align
22001 && INTVAL (count_exp) > size_needed)
22004 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
22005 if (align_bytes <= 0)
22008 align_bytes = desired_align - align_bytes;
22010 if (align_bytes == 0)
22011 count_exp = force_reg (counter_mode (count_exp), count_exp);
22013 gcc_assert (desired_align >= 1 && align >= 1);
22015 /* Ensure that alignment prologue won't copy past end of block. */
22016 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
22018 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
22019 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22020 Make sure it is power of 2. */
22021 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
22025 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
22027 /* If main algorithm works on QImode, no epilogue is needed.
22028 For small sizes just don't align anything. */
22029 if (size_needed == 1)
22030 desired_align = align;
22037 label = gen_label_rtx ();
22038 emit_cmp_and_jump_insns (count_exp,
22039 GEN_INT (epilogue_size_needed),
22040 LTU, 0, counter_mode (count_exp), 1, label);
22041 if (expected_size == -1 || expected_size < epilogue_size_needed)
22042 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22044 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22048 /* Emit code to decide on runtime whether library call or inline should be
22050 if (dynamic_check != -1)
22052 if (CONST_INT_P (count_exp))
22054 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
22056 emit_block_move_via_libcall (dst, src, count_exp, false);
22057 count_exp = const0_rtx;
22063 rtx hot_label = gen_label_rtx ();
22064 jump_around_label = gen_label_rtx ();
22065 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
22066 LEU, 0, GET_MODE (count_exp), 1, hot_label);
22067 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22068 emit_block_move_via_libcall (dst, src, count_exp, false);
22069 emit_jump (jump_around_label);
22070 emit_label (hot_label);
22074 /* Step 2: Alignment prologue. */
22076 if (desired_align > align)
22078 if (align_bytes == 0)
22080 /* Except for the first move in epilogue, we no longer know
22081 constant offset in aliasing info. It don't seems to worth
22082 the pain to maintain it for the first move, so throw away
22084 src = change_address (src, BLKmode, srcreg);
22085 dst = change_address (dst, BLKmode, destreg);
22086 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
22091 /* If we know how many bytes need to be stored before dst is
22092 sufficiently aligned, maintain aliasing info accurately. */
22093 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
22094 desired_align, align_bytes);
22095 count_exp = plus_constant (count_exp, -align_bytes);
22096 count -= align_bytes;
22098 if (need_zero_guard
22099 && (count < (unsigned HOST_WIDE_INT) size_needed
22100 || (align_bytes == 0
22101 && count < ((unsigned HOST_WIDE_INT) size_needed
22102 + desired_align - align))))
22104 /* It is possible that we copied enough so the main loop will not
22106 gcc_assert (size_needed > 1);
22107 if (label == NULL_RTX)
22108 label = gen_label_rtx ();
22109 emit_cmp_and_jump_insns (count_exp,
22110 GEN_INT (size_needed),
22111 LTU, 0, counter_mode (count_exp), 1, label);
22112 if (expected_size == -1
22113 || expected_size < (desired_align - align) / 2 + size_needed)
22114 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22116 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22119 if (label && size_needed == 1)
22121 emit_label (label);
22122 LABEL_NUSES (label) = 1;
22124 epilogue_size_needed = 1;
22126 else if (label == NULL_RTX)
22127 epilogue_size_needed = size_needed;
22129 /* Step 3: Main loop. */
22135 gcc_unreachable ();
22137 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22138 count_exp, QImode, 1, expected_size);
22141 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22142 count_exp, Pmode, 1, expected_size);
22144 case unrolled_loop:
22145 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22146 registers for 4 temporaries anyway. */
22147 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22148 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
22151 case rep_prefix_8_byte:
22152 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22155 case rep_prefix_4_byte:
22156 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22159 case rep_prefix_1_byte:
22160 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22164 /* Adjust properly the offset of src and dest memory for aliasing. */
22165 if (CONST_INT_P (count_exp))
22167 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
22168 (count / size_needed) * size_needed);
22169 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
22170 (count / size_needed) * size_needed);
22174 src = change_address (src, BLKmode, srcreg);
22175 dst = change_address (dst, BLKmode, destreg);
22178 /* Step 4: Epilogue to copy the remaining bytes. */
22182 /* When the main loop is done, COUNT_EXP might hold original count,
22183 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22184 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22185 bytes. Compensate if needed. */
22187 if (size_needed < epilogue_size_needed)
22190 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
22191 GEN_INT (size_needed - 1), count_exp, 1,
22193 if (tmp != count_exp)
22194 emit_move_insn (count_exp, tmp);
22196 emit_label (label);
22197 LABEL_NUSES (label) = 1;
22200 if (count_exp != const0_rtx && epilogue_size_needed > 1)
22201 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
22202 epilogue_size_needed);
22203 if (jump_around_label)
22204 emit_label (jump_around_label);
22208 /* Helper function for memcpy. For QImode value 0xXY produce
22209 0xXYXYXYXY of wide specified by MODE. This is essentially
22210 a * 0x10101010, but we can do slightly better than
22211 synth_mult by unwinding the sequence by hand on CPUs with
22214 promote_duplicated_reg (enum machine_mode mode, rtx val)
22216 enum machine_mode valmode = GET_MODE (val);
22218 int nops = mode == DImode ? 3 : 2;
22220 gcc_assert (mode == SImode || mode == DImode);
22221 if (val == const0_rtx)
22222 return copy_to_mode_reg (mode, const0_rtx);
22223 if (CONST_INT_P (val))
22225 HOST_WIDE_INT v = INTVAL (val) & 255;
22229 if (mode == DImode)
22230 v |= (v << 16) << 16;
22231 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
22234 if (valmode == VOIDmode)
22236 if (valmode != QImode)
22237 val = gen_lowpart (QImode, val);
22238 if (mode == QImode)
22240 if (!TARGET_PARTIAL_REG_STALL)
22242 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
22243 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
22244 <= (ix86_cost->shift_const + ix86_cost->add) * nops
22245 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
22247 rtx reg = convert_modes (mode, QImode, val, true);
22248 tmp = promote_duplicated_reg (mode, const1_rtx);
22249 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
22254 rtx reg = convert_modes (mode, QImode, val, true);
22256 if (!TARGET_PARTIAL_REG_STALL)
22257 if (mode == SImode)
22258 emit_insn (gen_movsi_insv_1 (reg, reg));
22260 emit_insn (gen_movdi_insv_1 (reg, reg));
22263 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
22264 NULL, 1, OPTAB_DIRECT);
22266 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22268 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
22269 NULL, 1, OPTAB_DIRECT);
22270 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22271 if (mode == SImode)
22273 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
22274 NULL, 1, OPTAB_DIRECT);
22275 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22280 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22281 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22282 alignment from ALIGN to DESIRED_ALIGN. */
22284 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
22289 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
22290 promoted_val = promote_duplicated_reg (DImode, val);
22291 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
22292 promoted_val = promote_duplicated_reg (SImode, val);
22293 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
22294 promoted_val = promote_duplicated_reg (HImode, val);
22296 promoted_val = val;
22298 return promoted_val;
22301 /* Expand string clear operation (bzero). Use i386 string operations when
22302 profitable. See expand_movmem comment for explanation of individual
22303 steps performed. */
22305 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
22306 rtx expected_align_exp, rtx expected_size_exp)
22311 rtx jump_around_label = NULL;
22312 HOST_WIDE_INT align = 1;
22313 unsigned HOST_WIDE_INT count = 0;
22314 HOST_WIDE_INT expected_size = -1;
22315 int size_needed = 0, epilogue_size_needed;
22316 int desired_align = 0, align_bytes = 0;
22317 enum stringop_alg alg;
22318 rtx promoted_val = NULL;
22319 bool force_loopy_epilogue = false;
22321 bool need_zero_guard = false;
22323 if (CONST_INT_P (align_exp))
22324 align = INTVAL (align_exp);
22325 /* i386 can do misaligned access on reasonably increased cost. */
22326 if (CONST_INT_P (expected_align_exp)
22327 && INTVAL (expected_align_exp) > align)
22328 align = INTVAL (expected_align_exp);
22329 if (CONST_INT_P (count_exp))
22330 count = expected_size = INTVAL (count_exp);
22331 if (CONST_INT_P (expected_size_exp) && count == 0)
22332 expected_size = INTVAL (expected_size_exp);
22334 /* Make sure we don't need to care about overflow later on. */
22335 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
22338 /* Step 0: Decide on preferred algorithm, desired alignment and
22339 size of chunks to be copied by main loop. */
22341 alg = decide_alg (count, expected_size, true, &dynamic_check);
22342 desired_align = decide_alignment (align, alg, expected_size);
22344 if (!TARGET_ALIGN_STRINGOPS)
22345 align = desired_align;
22347 if (alg == libcall)
22349 gcc_assert (alg != no_stringop);
22351 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
22352 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
22357 gcc_unreachable ();
22359 need_zero_guard = true;
22360 size_needed = GET_MODE_SIZE (Pmode);
22362 case unrolled_loop:
22363 need_zero_guard = true;
22364 size_needed = GET_MODE_SIZE (Pmode) * 4;
22366 case rep_prefix_8_byte:
22369 case rep_prefix_4_byte:
22372 case rep_prefix_1_byte:
22376 need_zero_guard = true;
22380 epilogue_size_needed = size_needed;
22382 /* Step 1: Prologue guard. */
22384 /* Alignment code needs count to be in register. */
22385 if (CONST_INT_P (count_exp) && desired_align > align)
22387 if (INTVAL (count_exp) > desired_align
22388 && INTVAL (count_exp) > size_needed)
22391 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
22392 if (align_bytes <= 0)
22395 align_bytes = desired_align - align_bytes;
22397 if (align_bytes == 0)
22399 enum machine_mode mode = SImode;
22400 if (TARGET_64BIT && (count & ~0xffffffff))
22402 count_exp = force_reg (mode, count_exp);
22405 /* Do the cheap promotion to allow better CSE across the
22406 main loop and epilogue (ie one load of the big constant in the
22407 front of all code. */
22408 if (CONST_INT_P (val_exp))
22409 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
22410 desired_align, align);
22411 /* Ensure that alignment prologue won't copy past end of block. */
22412 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
22414 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
22415 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22416 Make sure it is power of 2. */
22417 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
22419 /* To improve performance of small blocks, we jump around the VAL
22420 promoting mode. This mean that if the promoted VAL is not constant,
22421 we might not use it in the epilogue and have to use byte
22423 if (epilogue_size_needed > 2 && !promoted_val)
22424 force_loopy_epilogue = true;
22427 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
22429 /* If main algorithm works on QImode, no epilogue is needed.
22430 For small sizes just don't align anything. */
22431 if (size_needed == 1)
22432 desired_align = align;
22439 label = gen_label_rtx ();
22440 emit_cmp_and_jump_insns (count_exp,
22441 GEN_INT (epilogue_size_needed),
22442 LTU, 0, counter_mode (count_exp), 1, label);
22443 if (expected_size == -1 || expected_size <= epilogue_size_needed)
22444 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22446 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22449 if (dynamic_check != -1)
22451 rtx hot_label = gen_label_rtx ();
22452 jump_around_label = gen_label_rtx ();
22453 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
22454 LEU, 0, counter_mode (count_exp), 1, hot_label);
22455 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22456 set_storage_via_libcall (dst, count_exp, val_exp, false);
22457 emit_jump (jump_around_label);
22458 emit_label (hot_label);
22461 /* Step 2: Alignment prologue. */
22463 /* Do the expensive promotion once we branched off the small blocks. */
22465 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
22466 desired_align, align);
22467 gcc_assert (desired_align >= 1 && align >= 1);
22469 if (desired_align > align)
22471 if (align_bytes == 0)
22473 /* Except for the first move in epilogue, we no longer know
22474 constant offset in aliasing info. It don't seems to worth
22475 the pain to maintain it for the first move, so throw away
22477 dst = change_address (dst, BLKmode, destreg);
22478 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
22483 /* If we know how many bytes need to be stored before dst is
22484 sufficiently aligned, maintain aliasing info accurately. */
22485 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
22486 desired_align, align_bytes);
22487 count_exp = plus_constant (count_exp, -align_bytes);
22488 count -= align_bytes;
22490 if (need_zero_guard
22491 && (count < (unsigned HOST_WIDE_INT) size_needed
22492 || (align_bytes == 0
22493 && count < ((unsigned HOST_WIDE_INT) size_needed
22494 + desired_align - align))))
22496 /* It is possible that we copied enough so the main loop will not
22498 gcc_assert (size_needed > 1);
22499 if (label == NULL_RTX)
22500 label = gen_label_rtx ();
22501 emit_cmp_and_jump_insns (count_exp,
22502 GEN_INT (size_needed),
22503 LTU, 0, counter_mode (count_exp), 1, label);
22504 if (expected_size == -1
22505 || expected_size < (desired_align - align) / 2 + size_needed)
22506 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22508 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22511 if (label && size_needed == 1)
22513 emit_label (label);
22514 LABEL_NUSES (label) = 1;
22516 promoted_val = val_exp;
22517 epilogue_size_needed = 1;
22519 else if (label == NULL_RTX)
22520 epilogue_size_needed = size_needed;
22522 /* Step 3: Main loop. */
22528 gcc_unreachable ();
22530 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22531 count_exp, QImode, 1, expected_size);
22534 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22535 count_exp, Pmode, 1, expected_size);
22537 case unrolled_loop:
22538 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
22539 count_exp, Pmode, 4, expected_size);
22541 case rep_prefix_8_byte:
22542 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22545 case rep_prefix_4_byte:
22546 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22549 case rep_prefix_1_byte:
22550 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
22554 /* Adjust properly the offset of src and dest memory for aliasing. */
22555 if (CONST_INT_P (count_exp))
22556 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
22557 (count / size_needed) * size_needed);
22559 dst = change_address (dst, BLKmode, destreg);
22561 /* Step 4: Epilogue to copy the remaining bytes. */
22565 /* When the main loop is done, COUNT_EXP might hold original count,
22566 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22567 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22568 bytes. Compensate if needed. */
22570 if (size_needed < epilogue_size_needed)
22573 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
22574 GEN_INT (size_needed - 1), count_exp, 1,
22576 if (tmp != count_exp)
22577 emit_move_insn (count_exp, tmp);
22579 emit_label (label);
22580 LABEL_NUSES (label) = 1;
22583 if (count_exp != const0_rtx && epilogue_size_needed > 1)
22585 if (force_loopy_epilogue)
22586 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
22587 epilogue_size_needed);
22589 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
22590 epilogue_size_needed);
22592 if (jump_around_label)
22593 emit_label (jump_around_label);
22597 /* Expand the appropriate insns for doing strlen if not just doing
22600 out = result, initialized with the start address
22601 align_rtx = alignment of the address.
22602 scratch = scratch register, initialized with the startaddress when
22603 not aligned, otherwise undefined
22605 This is just the body. It needs the initializations mentioned above and
22606 some address computing at the end. These things are done in i386.md. */
22609 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
22613 rtx align_2_label = NULL_RTX;
22614 rtx align_3_label = NULL_RTX;
22615 rtx align_4_label = gen_label_rtx ();
22616 rtx end_0_label = gen_label_rtx ();
22618 rtx tmpreg = gen_reg_rtx (SImode);
22619 rtx scratch = gen_reg_rtx (SImode);
22623 if (CONST_INT_P (align_rtx))
22624 align = INTVAL (align_rtx);
22626 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
22628 /* Is there a known alignment and is it less than 4? */
22631 rtx scratch1 = gen_reg_rtx (Pmode);
22632 emit_move_insn (scratch1, out);
22633 /* Is there a known alignment and is it not 2? */
22636 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
22637 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
22639 /* Leave just the 3 lower bits. */
22640 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
22641 NULL_RTX, 0, OPTAB_WIDEN);
22643 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
22644 Pmode, 1, align_4_label);
22645 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
22646 Pmode, 1, align_2_label);
22647 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
22648 Pmode, 1, align_3_label);
22652 /* Since the alignment is 2, we have to check 2 or 0 bytes;
22653 check if is aligned to 4 - byte. */
22655 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
22656 NULL_RTX, 0, OPTAB_WIDEN);
22658 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
22659 Pmode, 1, align_4_label);
22662 mem = change_address (src, QImode, out);
22664 /* Now compare the bytes. */
22666 /* Compare the first n unaligned byte on a byte per byte basis. */
22667 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
22668 QImode, 1, end_0_label);
22670 /* Increment the address. */
22671 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22673 /* Not needed with an alignment of 2 */
22676 emit_label (align_2_label);
22678 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
22681 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22683 emit_label (align_3_label);
22686 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
22689 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
22692 /* Generate loop to check 4 bytes at a time. It is not a good idea to
22693 align this loop. It gives only huge programs, but does not help to
22695 emit_label (align_4_label);
22697 mem = change_address (src, SImode, out);
22698 emit_move_insn (scratch, mem);
22699 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
22701 /* This formula yields a nonzero result iff one of the bytes is zero.
22702 This saves three branches inside loop and many cycles. */
22704 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
22705 emit_insn (gen_one_cmplsi2 (scratch, scratch));
22706 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
22707 emit_insn (gen_andsi3 (tmpreg, tmpreg,
22708 gen_int_mode (0x80808080, SImode)));
22709 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
22714 rtx reg = gen_reg_rtx (SImode);
22715 rtx reg2 = gen_reg_rtx (Pmode);
22716 emit_move_insn (reg, tmpreg);
22717 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
22719 /* If zero is not in the first two bytes, move two bytes forward. */
22720 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
22721 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22722 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
22723 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
22724 gen_rtx_IF_THEN_ELSE (SImode, tmp,
22727 /* Emit lea manually to avoid clobbering of flags. */
22728 emit_insn (gen_rtx_SET (SImode, reg2,
22729 gen_rtx_PLUS (Pmode, out, const2_rtx)));
22731 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22732 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
22733 emit_insn (gen_rtx_SET (VOIDmode, out,
22734 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
22740 rtx end_2_label = gen_label_rtx ();
22741 /* Is zero in the first two bytes? */
22743 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
22744 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22745 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
22746 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22747 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
22749 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
22750 JUMP_LABEL (tmp) = end_2_label;
22752 /* Not in the first two. Move two bytes forward. */
22753 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
22754 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
22756 emit_label (end_2_label);
22760 /* Avoid branch in fixing the byte. */
22761 tmpreg = gen_lowpart (QImode, tmpreg);
22762 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
22763 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
22764 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
22765 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
22767 emit_label (end_0_label);
22770 /* Expand strlen. */
22773 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
22775 rtx addr, scratch1, scratch2, scratch3, scratch4;
22777 /* The generic case of strlen expander is long. Avoid it's
22778 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
22780 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
22781 && !TARGET_INLINE_ALL_STRINGOPS
22782 && !optimize_insn_for_size_p ()
22783 && (!CONST_INT_P (align) || INTVAL (align) < 4))
22786 addr = force_reg (Pmode, XEXP (src, 0));
22787 scratch1 = gen_reg_rtx (Pmode);
22789 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
22790 && !optimize_insn_for_size_p ())
22792 /* Well it seems that some optimizer does not combine a call like
22793 foo(strlen(bar), strlen(bar));
22794 when the move and the subtraction is done here. It does calculate
22795 the length just once when these instructions are done inside of
22796 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
22797 often used and I use one fewer register for the lifetime of
22798 output_strlen_unroll() this is better. */
22800 emit_move_insn (out, addr);
22802 ix86_expand_strlensi_unroll_1 (out, src, align);
22804 /* strlensi_unroll_1 returns the address of the zero at the end of
22805 the string, like memchr(), so compute the length by subtracting
22806 the start address. */
22807 emit_insn (ix86_gen_sub3 (out, out, addr));
22813 /* Can't use this if the user has appropriated eax, ecx, or edi. */
22814 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
22817 scratch2 = gen_reg_rtx (Pmode);
22818 scratch3 = gen_reg_rtx (Pmode);
22819 scratch4 = force_reg (Pmode, constm1_rtx);
22821 emit_move_insn (scratch3, addr);
22822 eoschar = force_reg (QImode, eoschar);
22824 src = replace_equiv_address_nv (src, scratch3);
22826 /* If .md starts supporting :P, this can be done in .md. */
22827 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
22828 scratch4), UNSPEC_SCAS);
22829 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
22830 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
22831 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
22836 /* For given symbol (function) construct code to compute address of it's PLT
22837 entry in large x86-64 PIC model. */
22839 construct_plt_address (rtx symbol)
22841 rtx tmp = gen_reg_rtx (Pmode);
22842 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
22844 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
22845 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
22847 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
22848 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
22853 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
22855 rtx pop, bool sibcall)
22857 /* We need to represent that SI and DI registers are clobbered
22859 static int clobbered_registers[] = {
22860 XMM6_REG, XMM7_REG, XMM8_REG,
22861 XMM9_REG, XMM10_REG, XMM11_REG,
22862 XMM12_REG, XMM13_REG, XMM14_REG,
22863 XMM15_REG, SI_REG, DI_REG
22865 rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
22866 rtx use = NULL, call;
22867 unsigned int vec_len;
22869 if (pop == const0_rtx)
22871 gcc_assert (!TARGET_64BIT || !pop);
22873 if (TARGET_MACHO && !TARGET_64BIT)
22876 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
22877 fnaddr = machopic_indirect_call_target (fnaddr);
22882 /* Static functions and indirect calls don't need the pic register. */
22883 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
22884 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
22885 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
22886 use_reg (&use, pic_offset_table_rtx);
22889 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
22891 rtx al = gen_rtx_REG (QImode, AX_REG);
22892 emit_move_insn (al, callarg2);
22893 use_reg (&use, al);
22896 if (ix86_cmodel == CM_LARGE_PIC
22898 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
22899 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
22900 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
22902 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
22903 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
22905 fnaddr = XEXP (fnaddr, 0);
22906 if (GET_MODE (fnaddr) != Pmode)
22907 fnaddr = convert_to_mode (Pmode, fnaddr, 1);
22908 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
22912 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
22914 call = gen_rtx_SET (VOIDmode, retval, call);
22915 vec[vec_len++] = call;
22919 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
22920 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
22921 vec[vec_len++] = pop;
22924 if (TARGET_64BIT_MS_ABI
22925 && (!callarg2 || INTVAL (callarg2) != -2))
22929 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
22930 UNSPEC_MS_TO_SYSV_CALL);
22932 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
22934 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
22936 gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
22938 clobbered_registers[i]));
22941 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
22942 if (TARGET_VZEROUPPER)
22945 if (cfun->machine->callee_pass_avx256_p)
22947 if (cfun->machine->callee_return_avx256_p)
22948 avx256 = callee_return_pass_avx256;
22950 avx256 = callee_pass_avx256;
22952 else if (cfun->machine->callee_return_avx256_p)
22953 avx256 = callee_return_avx256;
22955 avx256 = call_no_avx256;
22957 if (reload_completed)
22958 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
22960 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
22961 gen_rtvec (1, GEN_INT (avx256)),
22962 UNSPEC_CALL_NEEDS_VZEROUPPER);
22966 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
22967 call = emit_call_insn (call);
22969 CALL_INSN_FUNCTION_USAGE (call) = use;
22975 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
22977 rtx pat = PATTERN (insn);
22978 rtvec vec = XVEC (pat, 0);
22979 int len = GET_NUM_ELEM (vec) - 1;
22981 /* Strip off the last entry of the parallel. */
22982 gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
22983 gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
22985 pat = RTVEC_ELT (vec, 0);
22987 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
22989 emit_insn (gen_avx_vzeroupper (vzeroupper));
22990 emit_call_insn (pat);
22993 /* Output the assembly for a call instruction. */
22996 ix86_output_call_insn (rtx insn, rtx call_op)
22998 bool direct_p = constant_call_address_operand (call_op, Pmode);
22999 bool seh_nop_p = false;
23002 if (SIBLING_CALL_P (insn))
23006 /* SEH epilogue detection requires the indirect branch case
23007 to include REX.W. */
23008 else if (TARGET_SEH)
23009 xasm = "rex.W jmp %A0";
23013 output_asm_insn (xasm, &call_op);
23017 /* SEH unwinding can require an extra nop to be emitted in several
23018 circumstances. Determine if we have one of those. */
23023 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
23025 /* If we get to another real insn, we don't need the nop. */
23029 /* If we get to the epilogue note, prevent a catch region from
23030 being adjacent to the standard epilogue sequence. If non-
23031 call-exceptions, we'll have done this during epilogue emission. */
23032 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
23033 && !flag_non_call_exceptions
23034 && !can_throw_internal (insn))
23041 /* If we didn't find a real insn following the call, prevent the
23042 unwinder from looking into the next function. */
23048 xasm = "call\t%P0";
23050 xasm = "call\t%A0";
23052 output_asm_insn (xasm, &call_op);
23060 /* Clear stack slot assignments remembered from previous functions.
23061 This is called from INIT_EXPANDERS once before RTL is emitted for each
23064 static struct machine_function *
23065 ix86_init_machine_status (void)
23067 struct machine_function *f;
23069 f = ggc_alloc_cleared_machine_function ();
23070 f->use_fast_prologue_epilogue_nregs = -1;
23071 f->tls_descriptor_call_expanded_p = 0;
23072 f->call_abi = ix86_abi;
23077 /* Return a MEM corresponding to a stack slot with mode MODE.
23078 Allocate a new slot if necessary.
23080 The RTL for a function can have several slots available: N is
23081 which slot to use. */
23084 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
23086 struct stack_local_entry *s;
23088 gcc_assert (n < MAX_386_STACK_LOCALS);
23090 /* Virtual slot is valid only before vregs are instantiated. */
23091 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
23093 for (s = ix86_stack_locals; s; s = s->next)
23094 if (s->mode == mode && s->n == n)
23095 return validize_mem (copy_rtx (s->rtl));
23097 s = ggc_alloc_stack_local_entry ();
23100 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
23102 s->next = ix86_stack_locals;
23103 ix86_stack_locals = s;
23104 return validize_mem (s->rtl);
23107 /* Calculate the length of the memory address in the instruction encoding.
23108 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23109 or other prefixes. */
23112 memory_address_length (rtx addr)
23114 struct ix86_address parts;
23115 rtx base, index, disp;
23119 if (GET_CODE (addr) == PRE_DEC
23120 || GET_CODE (addr) == POST_INC
23121 || GET_CODE (addr) == PRE_MODIFY
23122 || GET_CODE (addr) == POST_MODIFY)
23125 ok = ix86_decompose_address (addr, &parts);
23128 if (parts.base && GET_CODE (parts.base) == SUBREG)
23129 parts.base = SUBREG_REG (parts.base);
23130 if (parts.index && GET_CODE (parts.index) == SUBREG)
23131 parts.index = SUBREG_REG (parts.index);
23134 index = parts.index;
23137 /* Add length of addr32 prefix. */
23138 len = (GET_CODE (addr) == ZERO_EXTEND
23139 || GET_CODE (addr) == AND);
23142 - esp as the base always wants an index,
23143 - ebp as the base always wants a displacement,
23144 - r12 as the base always wants an index,
23145 - r13 as the base always wants a displacement. */
23147 /* Register Indirect. */
23148 if (base && !index && !disp)
23150 /* esp (for its index) and ebp (for its displacement) need
23151 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23154 && (addr == arg_pointer_rtx
23155 || addr == frame_pointer_rtx
23156 || REGNO (addr) == SP_REG
23157 || REGNO (addr) == BP_REG
23158 || REGNO (addr) == R12_REG
23159 || REGNO (addr) == R13_REG))
23163 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23164 is not disp32, but disp32(%rip), so for disp32
23165 SIB byte is needed, unless print_operand_address
23166 optimizes it into disp32(%rip) or (%rip) is implied
23168 else if (disp && !base && !index)
23175 if (GET_CODE (disp) == CONST)
23176 symbol = XEXP (disp, 0);
23177 if (GET_CODE (symbol) == PLUS
23178 && CONST_INT_P (XEXP (symbol, 1)))
23179 symbol = XEXP (symbol, 0);
23181 if (GET_CODE (symbol) != LABEL_REF
23182 && (GET_CODE (symbol) != SYMBOL_REF
23183 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
23184 && (GET_CODE (symbol) != UNSPEC
23185 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
23186 && XINT (symbol, 1) != UNSPEC_PCREL
23187 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
23194 /* Find the length of the displacement constant. */
23197 if (base && satisfies_constraint_K (disp))
23202 /* ebp always wants a displacement. Similarly r13. */
23203 else if (base && REG_P (base)
23204 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
23207 /* An index requires the two-byte modrm form.... */
23209 /* ...like esp (or r12), which always wants an index. */
23210 || base == arg_pointer_rtx
23211 || base == frame_pointer_rtx
23212 || (base && REG_P (base)
23213 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
23230 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23231 is set, expect that insn have 8bit immediate alternative. */
23233 ix86_attr_length_immediate_default (rtx insn, bool shortform)
23237 extract_insn_cached (insn);
23238 for (i = recog_data.n_operands - 1; i >= 0; --i)
23239 if (CONSTANT_P (recog_data.operand[i]))
23241 enum attr_mode mode = get_attr_mode (insn);
23244 if (shortform && CONST_INT_P (recog_data.operand[i]))
23246 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
23253 ival = trunc_int_for_mode (ival, HImode);
23256 ival = trunc_int_for_mode (ival, SImode);
23261 if (IN_RANGE (ival, -128, 127))
23278 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
23283 fatal_insn ("unknown insn mode", insn);
23288 /* Compute default value for "length_address" attribute. */
23290 ix86_attr_length_address_default (rtx insn)
23294 if (get_attr_type (insn) == TYPE_LEA)
23296 rtx set = PATTERN (insn), addr;
23298 if (GET_CODE (set) == PARALLEL)
23299 set = XVECEXP (set, 0, 0);
23301 gcc_assert (GET_CODE (set) == SET);
23303 addr = SET_SRC (set);
23304 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
23306 if (GET_CODE (addr) == ZERO_EXTEND)
23307 addr = XEXP (addr, 0);
23308 if (GET_CODE (addr) == SUBREG)
23309 addr = SUBREG_REG (addr);
23312 return memory_address_length (addr);
23315 extract_insn_cached (insn);
23316 for (i = recog_data.n_operands - 1; i >= 0; --i)
23317 if (MEM_P (recog_data.operand[i]))
23319 constrain_operands_cached (reload_completed);
23320 if (which_alternative != -1)
23322 const char *constraints = recog_data.constraints[i];
23323 int alt = which_alternative;
23325 while (*constraints == '=' || *constraints == '+')
23328 while (*constraints++ != ',')
23330 /* Skip ignored operands. */
23331 if (*constraints == 'X')
23334 return memory_address_length (XEXP (recog_data.operand[i], 0));
23339 /* Compute default value for "length_vex" attribute. It includes
23340 2 or 3 byte VEX prefix and 1 opcode byte. */
23343 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
23347 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23348 byte VEX prefix. */
23349 if (!has_0f_opcode || has_vex_w)
23352 /* We can always use 2 byte VEX prefix in 32bit. */
23356 extract_insn_cached (insn);
23358 for (i = recog_data.n_operands - 1; i >= 0; --i)
23359 if (REG_P (recog_data.operand[i]))
23361 /* REX.W bit uses 3 byte VEX prefix. */
23362 if (GET_MODE (recog_data.operand[i]) == DImode
23363 && GENERAL_REG_P (recog_data.operand[i]))
23368 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23369 if (MEM_P (recog_data.operand[i])
23370 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
23377 /* Return the maximum number of instructions a cpu can issue. */
23380 ix86_issue_rate (void)
23384 case PROCESSOR_PENTIUM:
23385 case PROCESSOR_ATOM:
23389 case PROCESSOR_PENTIUMPRO:
23390 case PROCESSOR_PENTIUM4:
23391 case PROCESSOR_CORE2_32:
23392 case PROCESSOR_CORE2_64:
23393 case PROCESSOR_COREI7_32:
23394 case PROCESSOR_COREI7_64:
23395 case PROCESSOR_ATHLON:
23397 case PROCESSOR_AMDFAM10:
23398 case PROCESSOR_NOCONA:
23399 case PROCESSOR_GENERIC32:
23400 case PROCESSOR_GENERIC64:
23401 case PROCESSOR_BDVER1:
23402 case PROCESSOR_BDVER2:
23403 case PROCESSOR_BTVER1:
23411 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23412 by DEP_INSN and nothing set by DEP_INSN. */
23415 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
23419 /* Simplify the test for uninteresting insns. */
23420 if (insn_type != TYPE_SETCC
23421 && insn_type != TYPE_ICMOV
23422 && insn_type != TYPE_FCMOV
23423 && insn_type != TYPE_IBR)
23426 if ((set = single_set (dep_insn)) != 0)
23428 set = SET_DEST (set);
23431 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
23432 && XVECLEN (PATTERN (dep_insn), 0) == 2
23433 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
23434 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
23436 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
23437 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
23442 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
23445 /* This test is true if the dependent insn reads the flags but
23446 not any other potentially set register. */
23447 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
23450 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
23456 /* Return true iff USE_INSN has a memory address with operands set by
23460 ix86_agi_dependent (rtx set_insn, rtx use_insn)
23463 extract_insn_cached (use_insn);
23464 for (i = recog_data.n_operands - 1; i >= 0; --i)
23465 if (MEM_P (recog_data.operand[i]))
23467 rtx addr = XEXP (recog_data.operand[i], 0);
23468 return modified_in_p (addr, set_insn) != 0;
23474 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
23476 enum attr_type insn_type, dep_insn_type;
23477 enum attr_memory memory;
23479 int dep_insn_code_number;
23481 /* Anti and output dependencies have zero cost on all CPUs. */
23482 if (REG_NOTE_KIND (link) != 0)
23485 dep_insn_code_number = recog_memoized (dep_insn);
23487 /* If we can't recognize the insns, we can't really do anything. */
23488 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
23491 insn_type = get_attr_type (insn);
23492 dep_insn_type = get_attr_type (dep_insn);
23496 case PROCESSOR_PENTIUM:
23497 /* Address Generation Interlock adds a cycle of latency. */
23498 if (insn_type == TYPE_LEA)
23500 rtx addr = PATTERN (insn);
23502 if (GET_CODE (addr) == PARALLEL)
23503 addr = XVECEXP (addr, 0, 0);
23505 gcc_assert (GET_CODE (addr) == SET);
23507 addr = SET_SRC (addr);
23508 if (modified_in_p (addr, dep_insn))
23511 else if (ix86_agi_dependent (dep_insn, insn))
23514 /* ??? Compares pair with jump/setcc. */
23515 if (ix86_flags_dependent (insn, dep_insn, insn_type))
23518 /* Floating point stores require value to be ready one cycle earlier. */
23519 if (insn_type == TYPE_FMOV
23520 && get_attr_memory (insn) == MEMORY_STORE
23521 && !ix86_agi_dependent (dep_insn, insn))
23525 case PROCESSOR_PENTIUMPRO:
23526 memory = get_attr_memory (insn);
23528 /* INT->FP conversion is expensive. */
23529 if (get_attr_fp_int_src (dep_insn))
23532 /* There is one cycle extra latency between an FP op and a store. */
23533 if (insn_type == TYPE_FMOV
23534 && (set = single_set (dep_insn)) != NULL_RTX
23535 && (set2 = single_set (insn)) != NULL_RTX
23536 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
23537 && MEM_P (SET_DEST (set2)))
23540 /* Show ability of reorder buffer to hide latency of load by executing
23541 in parallel with previous instruction in case
23542 previous instruction is not needed to compute the address. */
23543 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23544 && !ix86_agi_dependent (dep_insn, insn))
23546 /* Claim moves to take one cycle, as core can issue one load
23547 at time and the next load can start cycle later. */
23548 if (dep_insn_type == TYPE_IMOV
23549 || dep_insn_type == TYPE_FMOV)
23557 memory = get_attr_memory (insn);
23559 /* The esp dependency is resolved before the instruction is really
23561 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
23562 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
23565 /* INT->FP conversion is expensive. */
23566 if (get_attr_fp_int_src (dep_insn))
23569 /* Show ability of reorder buffer to hide latency of load by executing
23570 in parallel with previous instruction in case
23571 previous instruction is not needed to compute the address. */
23572 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23573 && !ix86_agi_dependent (dep_insn, insn))
23575 /* Claim moves to take one cycle, as core can issue one load
23576 at time and the next load can start cycle later. */
23577 if (dep_insn_type == TYPE_IMOV
23578 || dep_insn_type == TYPE_FMOV)
23587 case PROCESSOR_ATHLON:
23589 case PROCESSOR_AMDFAM10:
23590 case PROCESSOR_BDVER1:
23591 case PROCESSOR_BDVER2:
23592 case PROCESSOR_BTVER1:
23593 case PROCESSOR_ATOM:
23594 case PROCESSOR_GENERIC32:
23595 case PROCESSOR_GENERIC64:
23596 memory = get_attr_memory (insn);
23598 /* Show ability of reorder buffer to hide latency of load by executing
23599 in parallel with previous instruction in case
23600 previous instruction is not needed to compute the address. */
23601 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
23602 && !ix86_agi_dependent (dep_insn, insn))
23604 enum attr_unit unit = get_attr_unit (insn);
23607 /* Because of the difference between the length of integer and
23608 floating unit pipeline preparation stages, the memory operands
23609 for floating point are cheaper.
23611 ??? For Athlon it the difference is most probably 2. */
23612 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
23615 loadcost = TARGET_ATHLON ? 2 : 0;
23617 if (cost >= loadcost)
23630 /* How many alternative schedules to try. This should be as wide as the
23631 scheduling freedom in the DFA, but no wider. Making this value too
23632 large results extra work for the scheduler. */
23635 ia32_multipass_dfa_lookahead (void)
23639 case PROCESSOR_PENTIUM:
23642 case PROCESSOR_PENTIUMPRO:
23646 case PROCESSOR_CORE2_32:
23647 case PROCESSOR_CORE2_64:
23648 case PROCESSOR_COREI7_32:
23649 case PROCESSOR_COREI7_64:
23650 /* Generally, we want haifa-sched:max_issue() to look ahead as far
23651 as many instructions can be executed on a cycle, i.e.,
23652 issue_rate. I wonder why tuning for many CPUs does not do this. */
23653 return ix86_issue_rate ();
23662 /* Model decoder of Core 2/i7.
23663 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
23664 track the instruction fetch block boundaries and make sure that long
23665 (9+ bytes) instructions are assigned to D0. */
23667 /* Maximum length of an insn that can be handled by
23668 a secondary decoder unit. '8' for Core 2/i7. */
23669 static int core2i7_secondary_decoder_max_insn_size;
23671 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
23672 '16' for Core 2/i7. */
23673 static int core2i7_ifetch_block_size;
23675 /* Maximum number of instructions decoder can handle per cycle.
23676 '6' for Core 2/i7. */
23677 static int core2i7_ifetch_block_max_insns;
23679 typedef struct ix86_first_cycle_multipass_data_ *
23680 ix86_first_cycle_multipass_data_t;
23681 typedef const struct ix86_first_cycle_multipass_data_ *
23682 const_ix86_first_cycle_multipass_data_t;
23684 /* A variable to store target state across calls to max_issue within
23686 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
23687 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
23689 /* Initialize DATA. */
23691 core2i7_first_cycle_multipass_init (void *_data)
23693 ix86_first_cycle_multipass_data_t data
23694 = (ix86_first_cycle_multipass_data_t) _data;
23696 data->ifetch_block_len = 0;
23697 data->ifetch_block_n_insns = 0;
23698 data->ready_try_change = NULL;
23699 data->ready_try_change_size = 0;
23702 /* Advancing the cycle; reset ifetch block counts. */
23704 core2i7_dfa_post_advance_cycle (void)
23706 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
23708 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
23710 data->ifetch_block_len = 0;
23711 data->ifetch_block_n_insns = 0;
23714 static int min_insn_size (rtx);
23716 /* Filter out insns from ready_try that the core will not be able to issue
23717 on current cycle due to decoder. */
23719 core2i7_first_cycle_multipass_filter_ready_try
23720 (const_ix86_first_cycle_multipass_data_t data,
23721 char *ready_try, int n_ready, bool first_cycle_insn_p)
23728 if (ready_try[n_ready])
23731 insn = get_ready_element (n_ready);
23732 insn_size = min_insn_size (insn);
23734 if (/* If this is a too long an insn for a secondary decoder ... */
23735 (!first_cycle_insn_p
23736 && insn_size > core2i7_secondary_decoder_max_insn_size)
23737 /* ... or it would not fit into the ifetch block ... */
23738 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
23739 /* ... or the decoder is full already ... */
23740 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
23741 /* ... mask the insn out. */
23743 ready_try[n_ready] = 1;
23745 if (data->ready_try_change)
23746 SET_BIT (data->ready_try_change, n_ready);
23751 /* Prepare for a new round of multipass lookahead scheduling. */
23753 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
23754 bool first_cycle_insn_p)
23756 ix86_first_cycle_multipass_data_t data
23757 = (ix86_first_cycle_multipass_data_t) _data;
23758 const_ix86_first_cycle_multipass_data_t prev_data
23759 = ix86_first_cycle_multipass_data;
23761 /* Restore the state from the end of the previous round. */
23762 data->ifetch_block_len = prev_data->ifetch_block_len;
23763 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
23765 /* Filter instructions that cannot be issued on current cycle due to
23766 decoder restrictions. */
23767 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
23768 first_cycle_insn_p);
23771 /* INSN is being issued in current solution. Account for its impact on
23772 the decoder model. */
23774 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
23775 rtx insn, const void *_prev_data)
23777 ix86_first_cycle_multipass_data_t data
23778 = (ix86_first_cycle_multipass_data_t) _data;
23779 const_ix86_first_cycle_multipass_data_t prev_data
23780 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
23782 int insn_size = min_insn_size (insn);
23784 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
23785 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
23786 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
23787 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
23789 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
23790 if (!data->ready_try_change)
23792 data->ready_try_change = sbitmap_alloc (n_ready);
23793 data->ready_try_change_size = n_ready;
23795 else if (data->ready_try_change_size < n_ready)
23797 data->ready_try_change = sbitmap_resize (data->ready_try_change,
23799 data->ready_try_change_size = n_ready;
23801 sbitmap_zero (data->ready_try_change);
23803 /* Filter out insns from ready_try that the core will not be able to issue
23804 on current cycle due to decoder. */
23805 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
23809 /* Revert the effect on ready_try. */
23811 core2i7_first_cycle_multipass_backtrack (const void *_data,
23813 int n_ready ATTRIBUTE_UNUSED)
23815 const_ix86_first_cycle_multipass_data_t data
23816 = (const_ix86_first_cycle_multipass_data_t) _data;
23817 unsigned int i = 0;
23818 sbitmap_iterator sbi;
23820 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
23821 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
23827 /* Save the result of multipass lookahead scheduling for the next round. */
23829 core2i7_first_cycle_multipass_end (const void *_data)
23831 const_ix86_first_cycle_multipass_data_t data
23832 = (const_ix86_first_cycle_multipass_data_t) _data;
23833 ix86_first_cycle_multipass_data_t next_data
23834 = ix86_first_cycle_multipass_data;
23838 next_data->ifetch_block_len = data->ifetch_block_len;
23839 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
23843 /* Deallocate target data. */
23845 core2i7_first_cycle_multipass_fini (void *_data)
23847 ix86_first_cycle_multipass_data_t data
23848 = (ix86_first_cycle_multipass_data_t) _data;
23850 if (data->ready_try_change)
23852 sbitmap_free (data->ready_try_change);
23853 data->ready_try_change = NULL;
23854 data->ready_try_change_size = 0;
23858 /* Prepare for scheduling pass. */
23860 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
23861 int verbose ATTRIBUTE_UNUSED,
23862 int max_uid ATTRIBUTE_UNUSED)
23864 /* Install scheduling hooks for current CPU. Some of these hooks are used
23865 in time-critical parts of the scheduler, so we only set them up when
23866 they are actually used. */
23869 case PROCESSOR_CORE2_32:
23870 case PROCESSOR_CORE2_64:
23871 case PROCESSOR_COREI7_32:
23872 case PROCESSOR_COREI7_64:
23873 targetm.sched.dfa_post_advance_cycle
23874 = core2i7_dfa_post_advance_cycle;
23875 targetm.sched.first_cycle_multipass_init
23876 = core2i7_first_cycle_multipass_init;
23877 targetm.sched.first_cycle_multipass_begin
23878 = core2i7_first_cycle_multipass_begin;
23879 targetm.sched.first_cycle_multipass_issue
23880 = core2i7_first_cycle_multipass_issue;
23881 targetm.sched.first_cycle_multipass_backtrack
23882 = core2i7_first_cycle_multipass_backtrack;
23883 targetm.sched.first_cycle_multipass_end
23884 = core2i7_first_cycle_multipass_end;
23885 targetm.sched.first_cycle_multipass_fini
23886 = core2i7_first_cycle_multipass_fini;
23888 /* Set decoder parameters. */
23889 core2i7_secondary_decoder_max_insn_size = 8;
23890 core2i7_ifetch_block_size = 16;
23891 core2i7_ifetch_block_max_insns = 6;
23895 targetm.sched.dfa_post_advance_cycle = NULL;
23896 targetm.sched.first_cycle_multipass_init = NULL;
23897 targetm.sched.first_cycle_multipass_begin = NULL;
23898 targetm.sched.first_cycle_multipass_issue = NULL;
23899 targetm.sched.first_cycle_multipass_backtrack = NULL;
23900 targetm.sched.first_cycle_multipass_end = NULL;
23901 targetm.sched.first_cycle_multipass_fini = NULL;
23907 /* Compute the alignment given to a constant that is being placed in memory.
23908 EXP is the constant and ALIGN is the alignment that the object would
23910 The value of this function is used instead of that alignment to align
23914 ix86_constant_alignment (tree exp, int align)
23916 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
23917 || TREE_CODE (exp) == INTEGER_CST)
23919 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
23921 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
23924 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
23925 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
23926 return BITS_PER_WORD;
23931 /* Compute the alignment for a static variable.
23932 TYPE is the data type, and ALIGN is the alignment that
23933 the object would ordinarily have. The value of this function is used
23934 instead of that alignment to align the object. */
23937 ix86_data_alignment (tree type, int align)
23939 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
23941 if (AGGREGATE_TYPE_P (type)
23942 && TYPE_SIZE (type)
23943 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23944 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
23945 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
23946 && align < max_align)
23949 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23950 to 16byte boundary. */
23953 if (AGGREGATE_TYPE_P (type)
23954 && TYPE_SIZE (type)
23955 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23956 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
23957 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23961 if (TREE_CODE (type) == ARRAY_TYPE)
23963 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23965 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23968 else if (TREE_CODE (type) == COMPLEX_TYPE)
23971 if (TYPE_MODE (type) == DCmode && align < 64)
23973 if ((TYPE_MODE (type) == XCmode
23974 || TYPE_MODE (type) == TCmode) && align < 128)
23977 else if ((TREE_CODE (type) == RECORD_TYPE
23978 || TREE_CODE (type) == UNION_TYPE
23979 || TREE_CODE (type) == QUAL_UNION_TYPE)
23980 && TYPE_FIELDS (type))
23982 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23984 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23987 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23988 || TREE_CODE (type) == INTEGER_TYPE)
23990 if (TYPE_MODE (type) == DFmode && align < 64)
23992 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23999 /* Compute the alignment for a local variable or a stack slot. EXP is
24000 the data type or decl itself, MODE is the widest mode available and
24001 ALIGN is the alignment that the object would ordinarily have. The
24002 value of this macro is used instead of that alignment to align the
24006 ix86_local_alignment (tree exp, enum machine_mode mode,
24007 unsigned int align)
24011 if (exp && DECL_P (exp))
24013 type = TREE_TYPE (exp);
24022 /* Don't do dynamic stack realignment for long long objects with
24023 -mpreferred-stack-boundary=2. */
24026 && ix86_preferred_stack_boundary < 64
24027 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
24028 && (!type || !TYPE_USER_ALIGN (type))
24029 && (!decl || !DECL_USER_ALIGN (decl)))
24032 /* If TYPE is NULL, we are allocating a stack slot for caller-save
24033 register in MODE. We will return the largest alignment of XF
24037 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
24038 align = GET_MODE_ALIGNMENT (DFmode);
24042 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24043 to 16byte boundary. Exact wording is:
24045 An array uses the same alignment as its elements, except that a local or
24046 global array variable of length at least 16 bytes or
24047 a C99 variable-length array variable always has alignment of at least 16 bytes.
24049 This was added to allow use of aligned SSE instructions at arrays. This
24050 rule is meant for static storage (where compiler can not do the analysis
24051 by itself). We follow it for automatic variables only when convenient.
24052 We fully control everything in the function compiled and functions from
24053 other unit can not rely on the alignment.
24055 Exclude va_list type. It is the common case of local array where
24056 we can not benefit from the alignment. */
24057 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
24060 if (AGGREGATE_TYPE_P (type)
24061 && (va_list_type_node == NULL_TREE
24062 || (TYPE_MAIN_VARIANT (type)
24063 != TYPE_MAIN_VARIANT (va_list_type_node)))
24064 && TYPE_SIZE (type)
24065 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24066 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
24067 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
24070 if (TREE_CODE (type) == ARRAY_TYPE)
24072 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
24074 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
24077 else if (TREE_CODE (type) == COMPLEX_TYPE)
24079 if (TYPE_MODE (type) == DCmode && align < 64)
24081 if ((TYPE_MODE (type) == XCmode
24082 || TYPE_MODE (type) == TCmode) && align < 128)
24085 else if ((TREE_CODE (type) == RECORD_TYPE
24086 || TREE_CODE (type) == UNION_TYPE
24087 || TREE_CODE (type) == QUAL_UNION_TYPE)
24088 && TYPE_FIELDS (type))
24090 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
24092 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
24095 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
24096 || TREE_CODE (type) == INTEGER_TYPE)
24099 if (TYPE_MODE (type) == DFmode && align < 64)
24101 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
24107 /* Compute the minimum required alignment for dynamic stack realignment
24108 purposes for a local variable, parameter or a stack slot. EXP is
24109 the data type or decl itself, MODE is its mode and ALIGN is the
24110 alignment that the object would ordinarily have. */
24113 ix86_minimum_alignment (tree exp, enum machine_mode mode,
24114 unsigned int align)
24118 if (exp && DECL_P (exp))
24120 type = TREE_TYPE (exp);
24129 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
24132 /* Don't do dynamic stack realignment for long long objects with
24133 -mpreferred-stack-boundary=2. */
24134 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
24135 && (!type || !TYPE_USER_ALIGN (type))
24136 && (!decl || !DECL_USER_ALIGN (decl)))
24142 /* Find a location for the static chain incoming to a nested function.
24143 This is a register, unless all free registers are used by arguments. */
24146 ix86_static_chain (const_tree fndecl, bool incoming_p)
24150 if (!DECL_STATIC_CHAIN (fndecl))
24155 /* We always use R10 in 64-bit mode. */
24163 /* By default in 32-bit mode we use ECX to pass the static chain. */
24166 fntype = TREE_TYPE (fndecl);
24167 ccvt = ix86_get_callcvt (fntype);
24168 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
24170 /* Fastcall functions use ecx/edx for arguments, which leaves
24171 us with EAX for the static chain.
24172 Thiscall functions use ecx for arguments, which also
24173 leaves us with EAX for the static chain. */
24176 else if (ix86_function_regparm (fntype, fndecl) == 3)
24178 /* For regparm 3, we have no free call-clobbered registers in
24179 which to store the static chain. In order to implement this,
24180 we have the trampoline push the static chain to the stack.
24181 However, we can't push a value below the return address when
24182 we call the nested function directly, so we have to use an
24183 alternate entry point. For this we use ESI, and have the
24184 alternate entry point push ESI, so that things appear the
24185 same once we're executing the nested function. */
24188 if (fndecl == current_function_decl)
24189 ix86_static_chain_on_stack = true;
24190 return gen_frame_mem (SImode,
24191 plus_constant (arg_pointer_rtx, -8));
24197 return gen_rtx_REG (Pmode, regno);
24200 /* Emit RTL insns to initialize the variable parts of a trampoline.
24201 FNDECL is the decl of the target address; M_TRAMP is a MEM for
24202 the trampoline, and CHAIN_VALUE is an RTX for the static chain
24203 to be passed to the target function. */
24206 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
24212 fnaddr = XEXP (DECL_RTL (fndecl), 0);
24218 /* Load the function address to r11. Try to load address using
24219 the shorter movl instead of movabs. We may want to support
24220 movq for kernel mode, but kernel does not use trampolines at
24222 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
24224 fnaddr = copy_to_mode_reg (DImode, fnaddr);
24226 mem = adjust_address (m_tramp, HImode, offset);
24227 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
24229 mem = adjust_address (m_tramp, SImode, offset + 2);
24230 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
24235 mem = adjust_address (m_tramp, HImode, offset);
24236 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
24238 mem = adjust_address (m_tramp, DImode, offset + 2);
24239 emit_move_insn (mem, fnaddr);
24243 /* Load static chain using movabs to r10. Use the
24244 shorter movl instead of movabs for x32. */
24256 mem = adjust_address (m_tramp, HImode, offset);
24257 emit_move_insn (mem, gen_int_mode (opcode, HImode));
24259 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
24260 emit_move_insn (mem, chain_value);
24263 /* Jump to r11; the last (unused) byte is a nop, only there to
24264 pad the write out to a single 32-bit store. */
24265 mem = adjust_address (m_tramp, SImode, offset);
24266 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
24273 /* Depending on the static chain location, either load a register
24274 with a constant, or push the constant to the stack. All of the
24275 instructions are the same size. */
24276 chain = ix86_static_chain (fndecl, true);
24279 switch (REGNO (chain))
24282 opcode = 0xb8; break;
24284 opcode = 0xb9; break;
24286 gcc_unreachable ();
24292 mem = adjust_address (m_tramp, QImode, offset);
24293 emit_move_insn (mem, gen_int_mode (opcode, QImode));
24295 mem = adjust_address (m_tramp, SImode, offset + 1);
24296 emit_move_insn (mem, chain_value);
24299 mem = adjust_address (m_tramp, QImode, offset);
24300 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
24302 mem = adjust_address (m_tramp, SImode, offset + 1);
24304 /* Compute offset from the end of the jmp to the target function.
24305 In the case in which the trampoline stores the static chain on
24306 the stack, we need to skip the first insn which pushes the
24307 (call-saved) register static chain; this push is 1 byte. */
24309 disp = expand_binop (SImode, sub_optab, fnaddr,
24310 plus_constant (XEXP (m_tramp, 0),
24311 offset - (MEM_P (chain) ? 1 : 0)),
24312 NULL_RTX, 1, OPTAB_DIRECT);
24313 emit_move_insn (mem, disp);
24316 gcc_assert (offset <= TRAMPOLINE_SIZE);
24318 #ifdef HAVE_ENABLE_EXECUTE_STACK
24319 #ifdef CHECK_EXECUTE_STACK_ENABLED
24320 if (CHECK_EXECUTE_STACK_ENABLED)
24322 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
24323 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
24327 /* The following file contains several enumerations and data structures
24328 built from the definitions in i386-builtin-types.def. */
24330 #include "i386-builtin-types.inc"
24332 /* Table for the ix86 builtin non-function types. */
24333 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
24335 /* Retrieve an element from the above table, building some of
24336 the types lazily. */
24339 ix86_get_builtin_type (enum ix86_builtin_type tcode)
24341 unsigned int index;
24344 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
24346 type = ix86_builtin_type_tab[(int) tcode];
24350 gcc_assert (tcode > IX86_BT_LAST_PRIM);
24351 if (tcode <= IX86_BT_LAST_VECT)
24353 enum machine_mode mode;
24355 index = tcode - IX86_BT_LAST_PRIM - 1;
24356 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
24357 mode = ix86_builtin_type_vect_mode[index];
24359 type = build_vector_type_for_mode (itype, mode);
24365 index = tcode - IX86_BT_LAST_VECT - 1;
24366 if (tcode <= IX86_BT_LAST_PTR)
24367 quals = TYPE_UNQUALIFIED;
24369 quals = TYPE_QUAL_CONST;
24371 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
24372 if (quals != TYPE_UNQUALIFIED)
24373 itype = build_qualified_type (itype, quals);
24375 type = build_pointer_type (itype);
24378 ix86_builtin_type_tab[(int) tcode] = type;
24382 /* Table for the ix86 builtin function types. */
24383 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
24385 /* Retrieve an element from the above table, building some of
24386 the types lazily. */
24389 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
24393 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
24395 type = ix86_builtin_func_type_tab[(int) tcode];
24399 if (tcode <= IX86_BT_LAST_FUNC)
24401 unsigned start = ix86_builtin_func_start[(int) tcode];
24402 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
24403 tree rtype, atype, args = void_list_node;
24406 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
24407 for (i = after - 1; i > start; --i)
24409 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
24410 args = tree_cons (NULL, atype, args);
24413 type = build_function_type (rtype, args);
24417 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
24418 enum ix86_builtin_func_type icode;
24420 icode = ix86_builtin_func_alias_base[index];
24421 type = ix86_get_builtin_func_type (icode);
24424 ix86_builtin_func_type_tab[(int) tcode] = type;
24429 /* Codes for all the SSE/MMX builtins. */
24432 IX86_BUILTIN_ADDPS,
24433 IX86_BUILTIN_ADDSS,
24434 IX86_BUILTIN_DIVPS,
24435 IX86_BUILTIN_DIVSS,
24436 IX86_BUILTIN_MULPS,
24437 IX86_BUILTIN_MULSS,
24438 IX86_BUILTIN_SUBPS,
24439 IX86_BUILTIN_SUBSS,
24441 IX86_BUILTIN_CMPEQPS,
24442 IX86_BUILTIN_CMPLTPS,
24443 IX86_BUILTIN_CMPLEPS,
24444 IX86_BUILTIN_CMPGTPS,
24445 IX86_BUILTIN_CMPGEPS,
24446 IX86_BUILTIN_CMPNEQPS,
24447 IX86_BUILTIN_CMPNLTPS,
24448 IX86_BUILTIN_CMPNLEPS,
24449 IX86_BUILTIN_CMPNGTPS,
24450 IX86_BUILTIN_CMPNGEPS,
24451 IX86_BUILTIN_CMPORDPS,
24452 IX86_BUILTIN_CMPUNORDPS,
24453 IX86_BUILTIN_CMPEQSS,
24454 IX86_BUILTIN_CMPLTSS,
24455 IX86_BUILTIN_CMPLESS,
24456 IX86_BUILTIN_CMPNEQSS,
24457 IX86_BUILTIN_CMPNLTSS,
24458 IX86_BUILTIN_CMPNLESS,
24459 IX86_BUILTIN_CMPNGTSS,
24460 IX86_BUILTIN_CMPNGESS,
24461 IX86_BUILTIN_CMPORDSS,
24462 IX86_BUILTIN_CMPUNORDSS,
24464 IX86_BUILTIN_COMIEQSS,
24465 IX86_BUILTIN_COMILTSS,
24466 IX86_BUILTIN_COMILESS,
24467 IX86_BUILTIN_COMIGTSS,
24468 IX86_BUILTIN_COMIGESS,
24469 IX86_BUILTIN_COMINEQSS,
24470 IX86_BUILTIN_UCOMIEQSS,
24471 IX86_BUILTIN_UCOMILTSS,
24472 IX86_BUILTIN_UCOMILESS,
24473 IX86_BUILTIN_UCOMIGTSS,
24474 IX86_BUILTIN_UCOMIGESS,
24475 IX86_BUILTIN_UCOMINEQSS,
24477 IX86_BUILTIN_CVTPI2PS,
24478 IX86_BUILTIN_CVTPS2PI,
24479 IX86_BUILTIN_CVTSI2SS,
24480 IX86_BUILTIN_CVTSI642SS,
24481 IX86_BUILTIN_CVTSS2SI,
24482 IX86_BUILTIN_CVTSS2SI64,
24483 IX86_BUILTIN_CVTTPS2PI,
24484 IX86_BUILTIN_CVTTSS2SI,
24485 IX86_BUILTIN_CVTTSS2SI64,
24487 IX86_BUILTIN_MAXPS,
24488 IX86_BUILTIN_MAXSS,
24489 IX86_BUILTIN_MINPS,
24490 IX86_BUILTIN_MINSS,
24492 IX86_BUILTIN_LOADUPS,
24493 IX86_BUILTIN_STOREUPS,
24494 IX86_BUILTIN_MOVSS,
24496 IX86_BUILTIN_MOVHLPS,
24497 IX86_BUILTIN_MOVLHPS,
24498 IX86_BUILTIN_LOADHPS,
24499 IX86_BUILTIN_LOADLPS,
24500 IX86_BUILTIN_STOREHPS,
24501 IX86_BUILTIN_STORELPS,
24503 IX86_BUILTIN_MASKMOVQ,
24504 IX86_BUILTIN_MOVMSKPS,
24505 IX86_BUILTIN_PMOVMSKB,
24507 IX86_BUILTIN_MOVNTPS,
24508 IX86_BUILTIN_MOVNTQ,
24510 IX86_BUILTIN_LOADDQU,
24511 IX86_BUILTIN_STOREDQU,
24513 IX86_BUILTIN_PACKSSWB,
24514 IX86_BUILTIN_PACKSSDW,
24515 IX86_BUILTIN_PACKUSWB,
24517 IX86_BUILTIN_PADDB,
24518 IX86_BUILTIN_PADDW,
24519 IX86_BUILTIN_PADDD,
24520 IX86_BUILTIN_PADDQ,
24521 IX86_BUILTIN_PADDSB,
24522 IX86_BUILTIN_PADDSW,
24523 IX86_BUILTIN_PADDUSB,
24524 IX86_BUILTIN_PADDUSW,
24525 IX86_BUILTIN_PSUBB,
24526 IX86_BUILTIN_PSUBW,
24527 IX86_BUILTIN_PSUBD,
24528 IX86_BUILTIN_PSUBQ,
24529 IX86_BUILTIN_PSUBSB,
24530 IX86_BUILTIN_PSUBSW,
24531 IX86_BUILTIN_PSUBUSB,
24532 IX86_BUILTIN_PSUBUSW,
24535 IX86_BUILTIN_PANDN,
24539 IX86_BUILTIN_PAVGB,
24540 IX86_BUILTIN_PAVGW,
24542 IX86_BUILTIN_PCMPEQB,
24543 IX86_BUILTIN_PCMPEQW,
24544 IX86_BUILTIN_PCMPEQD,
24545 IX86_BUILTIN_PCMPGTB,
24546 IX86_BUILTIN_PCMPGTW,
24547 IX86_BUILTIN_PCMPGTD,
24549 IX86_BUILTIN_PMADDWD,
24551 IX86_BUILTIN_PMAXSW,
24552 IX86_BUILTIN_PMAXUB,
24553 IX86_BUILTIN_PMINSW,
24554 IX86_BUILTIN_PMINUB,
24556 IX86_BUILTIN_PMULHUW,
24557 IX86_BUILTIN_PMULHW,
24558 IX86_BUILTIN_PMULLW,
24560 IX86_BUILTIN_PSADBW,
24561 IX86_BUILTIN_PSHUFW,
24563 IX86_BUILTIN_PSLLW,
24564 IX86_BUILTIN_PSLLD,
24565 IX86_BUILTIN_PSLLQ,
24566 IX86_BUILTIN_PSRAW,
24567 IX86_BUILTIN_PSRAD,
24568 IX86_BUILTIN_PSRLW,
24569 IX86_BUILTIN_PSRLD,
24570 IX86_BUILTIN_PSRLQ,
24571 IX86_BUILTIN_PSLLWI,
24572 IX86_BUILTIN_PSLLDI,
24573 IX86_BUILTIN_PSLLQI,
24574 IX86_BUILTIN_PSRAWI,
24575 IX86_BUILTIN_PSRADI,
24576 IX86_BUILTIN_PSRLWI,
24577 IX86_BUILTIN_PSRLDI,
24578 IX86_BUILTIN_PSRLQI,
24580 IX86_BUILTIN_PUNPCKHBW,
24581 IX86_BUILTIN_PUNPCKHWD,
24582 IX86_BUILTIN_PUNPCKHDQ,
24583 IX86_BUILTIN_PUNPCKLBW,
24584 IX86_BUILTIN_PUNPCKLWD,
24585 IX86_BUILTIN_PUNPCKLDQ,
24587 IX86_BUILTIN_SHUFPS,
24589 IX86_BUILTIN_RCPPS,
24590 IX86_BUILTIN_RCPSS,
24591 IX86_BUILTIN_RSQRTPS,
24592 IX86_BUILTIN_RSQRTPS_NR,
24593 IX86_BUILTIN_RSQRTSS,
24594 IX86_BUILTIN_RSQRTF,
24595 IX86_BUILTIN_SQRTPS,
24596 IX86_BUILTIN_SQRTPS_NR,
24597 IX86_BUILTIN_SQRTSS,
24599 IX86_BUILTIN_UNPCKHPS,
24600 IX86_BUILTIN_UNPCKLPS,
24602 IX86_BUILTIN_ANDPS,
24603 IX86_BUILTIN_ANDNPS,
24605 IX86_BUILTIN_XORPS,
24608 IX86_BUILTIN_LDMXCSR,
24609 IX86_BUILTIN_STMXCSR,
24610 IX86_BUILTIN_SFENCE,
24612 /* 3DNow! Original */
24613 IX86_BUILTIN_FEMMS,
24614 IX86_BUILTIN_PAVGUSB,
24615 IX86_BUILTIN_PF2ID,
24616 IX86_BUILTIN_PFACC,
24617 IX86_BUILTIN_PFADD,
24618 IX86_BUILTIN_PFCMPEQ,
24619 IX86_BUILTIN_PFCMPGE,
24620 IX86_BUILTIN_PFCMPGT,
24621 IX86_BUILTIN_PFMAX,
24622 IX86_BUILTIN_PFMIN,
24623 IX86_BUILTIN_PFMUL,
24624 IX86_BUILTIN_PFRCP,
24625 IX86_BUILTIN_PFRCPIT1,
24626 IX86_BUILTIN_PFRCPIT2,
24627 IX86_BUILTIN_PFRSQIT1,
24628 IX86_BUILTIN_PFRSQRT,
24629 IX86_BUILTIN_PFSUB,
24630 IX86_BUILTIN_PFSUBR,
24631 IX86_BUILTIN_PI2FD,
24632 IX86_BUILTIN_PMULHRW,
24634 /* 3DNow! Athlon Extensions */
24635 IX86_BUILTIN_PF2IW,
24636 IX86_BUILTIN_PFNACC,
24637 IX86_BUILTIN_PFPNACC,
24638 IX86_BUILTIN_PI2FW,
24639 IX86_BUILTIN_PSWAPDSI,
24640 IX86_BUILTIN_PSWAPDSF,
24643 IX86_BUILTIN_ADDPD,
24644 IX86_BUILTIN_ADDSD,
24645 IX86_BUILTIN_DIVPD,
24646 IX86_BUILTIN_DIVSD,
24647 IX86_BUILTIN_MULPD,
24648 IX86_BUILTIN_MULSD,
24649 IX86_BUILTIN_SUBPD,
24650 IX86_BUILTIN_SUBSD,
24652 IX86_BUILTIN_CMPEQPD,
24653 IX86_BUILTIN_CMPLTPD,
24654 IX86_BUILTIN_CMPLEPD,
24655 IX86_BUILTIN_CMPGTPD,
24656 IX86_BUILTIN_CMPGEPD,
24657 IX86_BUILTIN_CMPNEQPD,
24658 IX86_BUILTIN_CMPNLTPD,
24659 IX86_BUILTIN_CMPNLEPD,
24660 IX86_BUILTIN_CMPNGTPD,
24661 IX86_BUILTIN_CMPNGEPD,
24662 IX86_BUILTIN_CMPORDPD,
24663 IX86_BUILTIN_CMPUNORDPD,
24664 IX86_BUILTIN_CMPEQSD,
24665 IX86_BUILTIN_CMPLTSD,
24666 IX86_BUILTIN_CMPLESD,
24667 IX86_BUILTIN_CMPNEQSD,
24668 IX86_BUILTIN_CMPNLTSD,
24669 IX86_BUILTIN_CMPNLESD,
24670 IX86_BUILTIN_CMPORDSD,
24671 IX86_BUILTIN_CMPUNORDSD,
24673 IX86_BUILTIN_COMIEQSD,
24674 IX86_BUILTIN_COMILTSD,
24675 IX86_BUILTIN_COMILESD,
24676 IX86_BUILTIN_COMIGTSD,
24677 IX86_BUILTIN_COMIGESD,
24678 IX86_BUILTIN_COMINEQSD,
24679 IX86_BUILTIN_UCOMIEQSD,
24680 IX86_BUILTIN_UCOMILTSD,
24681 IX86_BUILTIN_UCOMILESD,
24682 IX86_BUILTIN_UCOMIGTSD,
24683 IX86_BUILTIN_UCOMIGESD,
24684 IX86_BUILTIN_UCOMINEQSD,
24686 IX86_BUILTIN_MAXPD,
24687 IX86_BUILTIN_MAXSD,
24688 IX86_BUILTIN_MINPD,
24689 IX86_BUILTIN_MINSD,
24691 IX86_BUILTIN_ANDPD,
24692 IX86_BUILTIN_ANDNPD,
24694 IX86_BUILTIN_XORPD,
24696 IX86_BUILTIN_SQRTPD,
24697 IX86_BUILTIN_SQRTSD,
24699 IX86_BUILTIN_UNPCKHPD,
24700 IX86_BUILTIN_UNPCKLPD,
24702 IX86_BUILTIN_SHUFPD,
24704 IX86_BUILTIN_LOADUPD,
24705 IX86_BUILTIN_STOREUPD,
24706 IX86_BUILTIN_MOVSD,
24708 IX86_BUILTIN_LOADHPD,
24709 IX86_BUILTIN_LOADLPD,
24711 IX86_BUILTIN_CVTDQ2PD,
24712 IX86_BUILTIN_CVTDQ2PS,
24714 IX86_BUILTIN_CVTPD2DQ,
24715 IX86_BUILTIN_CVTPD2PI,
24716 IX86_BUILTIN_CVTPD2PS,
24717 IX86_BUILTIN_CVTTPD2DQ,
24718 IX86_BUILTIN_CVTTPD2PI,
24720 IX86_BUILTIN_CVTPI2PD,
24721 IX86_BUILTIN_CVTSI2SD,
24722 IX86_BUILTIN_CVTSI642SD,
24724 IX86_BUILTIN_CVTSD2SI,
24725 IX86_BUILTIN_CVTSD2SI64,
24726 IX86_BUILTIN_CVTSD2SS,
24727 IX86_BUILTIN_CVTSS2SD,
24728 IX86_BUILTIN_CVTTSD2SI,
24729 IX86_BUILTIN_CVTTSD2SI64,
24731 IX86_BUILTIN_CVTPS2DQ,
24732 IX86_BUILTIN_CVTPS2PD,
24733 IX86_BUILTIN_CVTTPS2DQ,
24735 IX86_BUILTIN_MOVNTI,
24736 IX86_BUILTIN_MOVNTI64,
24737 IX86_BUILTIN_MOVNTPD,
24738 IX86_BUILTIN_MOVNTDQ,
24740 IX86_BUILTIN_MOVQ128,
24743 IX86_BUILTIN_MASKMOVDQU,
24744 IX86_BUILTIN_MOVMSKPD,
24745 IX86_BUILTIN_PMOVMSKB128,
24747 IX86_BUILTIN_PACKSSWB128,
24748 IX86_BUILTIN_PACKSSDW128,
24749 IX86_BUILTIN_PACKUSWB128,
24751 IX86_BUILTIN_PADDB128,
24752 IX86_BUILTIN_PADDW128,
24753 IX86_BUILTIN_PADDD128,
24754 IX86_BUILTIN_PADDQ128,
24755 IX86_BUILTIN_PADDSB128,
24756 IX86_BUILTIN_PADDSW128,
24757 IX86_BUILTIN_PADDUSB128,
24758 IX86_BUILTIN_PADDUSW128,
24759 IX86_BUILTIN_PSUBB128,
24760 IX86_BUILTIN_PSUBW128,
24761 IX86_BUILTIN_PSUBD128,
24762 IX86_BUILTIN_PSUBQ128,
24763 IX86_BUILTIN_PSUBSB128,
24764 IX86_BUILTIN_PSUBSW128,
24765 IX86_BUILTIN_PSUBUSB128,
24766 IX86_BUILTIN_PSUBUSW128,
24768 IX86_BUILTIN_PAND128,
24769 IX86_BUILTIN_PANDN128,
24770 IX86_BUILTIN_POR128,
24771 IX86_BUILTIN_PXOR128,
24773 IX86_BUILTIN_PAVGB128,
24774 IX86_BUILTIN_PAVGW128,
24776 IX86_BUILTIN_PCMPEQB128,
24777 IX86_BUILTIN_PCMPEQW128,
24778 IX86_BUILTIN_PCMPEQD128,
24779 IX86_BUILTIN_PCMPGTB128,
24780 IX86_BUILTIN_PCMPGTW128,
24781 IX86_BUILTIN_PCMPGTD128,
24783 IX86_BUILTIN_PMADDWD128,
24785 IX86_BUILTIN_PMAXSW128,
24786 IX86_BUILTIN_PMAXUB128,
24787 IX86_BUILTIN_PMINSW128,
24788 IX86_BUILTIN_PMINUB128,
24790 IX86_BUILTIN_PMULUDQ,
24791 IX86_BUILTIN_PMULUDQ128,
24792 IX86_BUILTIN_PMULHUW128,
24793 IX86_BUILTIN_PMULHW128,
24794 IX86_BUILTIN_PMULLW128,
24796 IX86_BUILTIN_PSADBW128,
24797 IX86_BUILTIN_PSHUFHW,
24798 IX86_BUILTIN_PSHUFLW,
24799 IX86_BUILTIN_PSHUFD,
24801 IX86_BUILTIN_PSLLDQI128,
24802 IX86_BUILTIN_PSLLWI128,
24803 IX86_BUILTIN_PSLLDI128,
24804 IX86_BUILTIN_PSLLQI128,
24805 IX86_BUILTIN_PSRAWI128,
24806 IX86_BUILTIN_PSRADI128,
24807 IX86_BUILTIN_PSRLDQI128,
24808 IX86_BUILTIN_PSRLWI128,
24809 IX86_BUILTIN_PSRLDI128,
24810 IX86_BUILTIN_PSRLQI128,
24812 IX86_BUILTIN_PSLLDQ128,
24813 IX86_BUILTIN_PSLLW128,
24814 IX86_BUILTIN_PSLLD128,
24815 IX86_BUILTIN_PSLLQ128,
24816 IX86_BUILTIN_PSRAW128,
24817 IX86_BUILTIN_PSRAD128,
24818 IX86_BUILTIN_PSRLW128,
24819 IX86_BUILTIN_PSRLD128,
24820 IX86_BUILTIN_PSRLQ128,
24822 IX86_BUILTIN_PUNPCKHBW128,
24823 IX86_BUILTIN_PUNPCKHWD128,
24824 IX86_BUILTIN_PUNPCKHDQ128,
24825 IX86_BUILTIN_PUNPCKHQDQ128,
24826 IX86_BUILTIN_PUNPCKLBW128,
24827 IX86_BUILTIN_PUNPCKLWD128,
24828 IX86_BUILTIN_PUNPCKLDQ128,
24829 IX86_BUILTIN_PUNPCKLQDQ128,
24831 IX86_BUILTIN_CLFLUSH,
24832 IX86_BUILTIN_MFENCE,
24833 IX86_BUILTIN_LFENCE,
24834 IX86_BUILTIN_PAUSE,
24836 IX86_BUILTIN_BSRSI,
24837 IX86_BUILTIN_BSRDI,
24838 IX86_BUILTIN_RDPMC,
24839 IX86_BUILTIN_RDTSC,
24840 IX86_BUILTIN_RDTSCP,
24841 IX86_BUILTIN_ROLQI,
24842 IX86_BUILTIN_ROLHI,
24843 IX86_BUILTIN_RORQI,
24844 IX86_BUILTIN_RORHI,
24847 IX86_BUILTIN_ADDSUBPS,
24848 IX86_BUILTIN_HADDPS,
24849 IX86_BUILTIN_HSUBPS,
24850 IX86_BUILTIN_MOVSHDUP,
24851 IX86_BUILTIN_MOVSLDUP,
24852 IX86_BUILTIN_ADDSUBPD,
24853 IX86_BUILTIN_HADDPD,
24854 IX86_BUILTIN_HSUBPD,
24855 IX86_BUILTIN_LDDQU,
24857 IX86_BUILTIN_MONITOR,
24858 IX86_BUILTIN_MWAIT,
24861 IX86_BUILTIN_PHADDW,
24862 IX86_BUILTIN_PHADDD,
24863 IX86_BUILTIN_PHADDSW,
24864 IX86_BUILTIN_PHSUBW,
24865 IX86_BUILTIN_PHSUBD,
24866 IX86_BUILTIN_PHSUBSW,
24867 IX86_BUILTIN_PMADDUBSW,
24868 IX86_BUILTIN_PMULHRSW,
24869 IX86_BUILTIN_PSHUFB,
24870 IX86_BUILTIN_PSIGNB,
24871 IX86_BUILTIN_PSIGNW,
24872 IX86_BUILTIN_PSIGND,
24873 IX86_BUILTIN_PALIGNR,
24874 IX86_BUILTIN_PABSB,
24875 IX86_BUILTIN_PABSW,
24876 IX86_BUILTIN_PABSD,
24878 IX86_BUILTIN_PHADDW128,
24879 IX86_BUILTIN_PHADDD128,
24880 IX86_BUILTIN_PHADDSW128,
24881 IX86_BUILTIN_PHSUBW128,
24882 IX86_BUILTIN_PHSUBD128,
24883 IX86_BUILTIN_PHSUBSW128,
24884 IX86_BUILTIN_PMADDUBSW128,
24885 IX86_BUILTIN_PMULHRSW128,
24886 IX86_BUILTIN_PSHUFB128,
24887 IX86_BUILTIN_PSIGNB128,
24888 IX86_BUILTIN_PSIGNW128,
24889 IX86_BUILTIN_PSIGND128,
24890 IX86_BUILTIN_PALIGNR128,
24891 IX86_BUILTIN_PABSB128,
24892 IX86_BUILTIN_PABSW128,
24893 IX86_BUILTIN_PABSD128,
24895 /* AMDFAM10 - SSE4A New Instructions. */
24896 IX86_BUILTIN_MOVNTSD,
24897 IX86_BUILTIN_MOVNTSS,
24898 IX86_BUILTIN_EXTRQI,
24899 IX86_BUILTIN_EXTRQ,
24900 IX86_BUILTIN_INSERTQI,
24901 IX86_BUILTIN_INSERTQ,
24904 IX86_BUILTIN_BLENDPD,
24905 IX86_BUILTIN_BLENDPS,
24906 IX86_BUILTIN_BLENDVPD,
24907 IX86_BUILTIN_BLENDVPS,
24908 IX86_BUILTIN_PBLENDVB128,
24909 IX86_BUILTIN_PBLENDW128,
24914 IX86_BUILTIN_INSERTPS128,
24916 IX86_BUILTIN_MOVNTDQA,
24917 IX86_BUILTIN_MPSADBW128,
24918 IX86_BUILTIN_PACKUSDW128,
24919 IX86_BUILTIN_PCMPEQQ,
24920 IX86_BUILTIN_PHMINPOSUW128,
24922 IX86_BUILTIN_PMAXSB128,
24923 IX86_BUILTIN_PMAXSD128,
24924 IX86_BUILTIN_PMAXUD128,
24925 IX86_BUILTIN_PMAXUW128,
24927 IX86_BUILTIN_PMINSB128,
24928 IX86_BUILTIN_PMINSD128,
24929 IX86_BUILTIN_PMINUD128,
24930 IX86_BUILTIN_PMINUW128,
24932 IX86_BUILTIN_PMOVSXBW128,
24933 IX86_BUILTIN_PMOVSXBD128,
24934 IX86_BUILTIN_PMOVSXBQ128,
24935 IX86_BUILTIN_PMOVSXWD128,
24936 IX86_BUILTIN_PMOVSXWQ128,
24937 IX86_BUILTIN_PMOVSXDQ128,
24939 IX86_BUILTIN_PMOVZXBW128,
24940 IX86_BUILTIN_PMOVZXBD128,
24941 IX86_BUILTIN_PMOVZXBQ128,
24942 IX86_BUILTIN_PMOVZXWD128,
24943 IX86_BUILTIN_PMOVZXWQ128,
24944 IX86_BUILTIN_PMOVZXDQ128,
24946 IX86_BUILTIN_PMULDQ128,
24947 IX86_BUILTIN_PMULLD128,
24949 IX86_BUILTIN_ROUNDSD,
24950 IX86_BUILTIN_ROUNDSS,
24952 IX86_BUILTIN_ROUNDPD,
24953 IX86_BUILTIN_ROUNDPS,
24955 IX86_BUILTIN_FLOORPD,
24956 IX86_BUILTIN_CEILPD,
24957 IX86_BUILTIN_TRUNCPD,
24958 IX86_BUILTIN_RINTPD,
24959 IX86_BUILTIN_ROUNDPD_AZ,
24961 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
24962 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
24963 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
24965 IX86_BUILTIN_FLOORPS,
24966 IX86_BUILTIN_CEILPS,
24967 IX86_BUILTIN_TRUNCPS,
24968 IX86_BUILTIN_RINTPS,
24969 IX86_BUILTIN_ROUNDPS_AZ,
24971 IX86_BUILTIN_FLOORPS_SFIX,
24972 IX86_BUILTIN_CEILPS_SFIX,
24973 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
24975 IX86_BUILTIN_PTESTZ,
24976 IX86_BUILTIN_PTESTC,
24977 IX86_BUILTIN_PTESTNZC,
24979 IX86_BUILTIN_VEC_INIT_V2SI,
24980 IX86_BUILTIN_VEC_INIT_V4HI,
24981 IX86_BUILTIN_VEC_INIT_V8QI,
24982 IX86_BUILTIN_VEC_EXT_V2DF,
24983 IX86_BUILTIN_VEC_EXT_V2DI,
24984 IX86_BUILTIN_VEC_EXT_V4SF,
24985 IX86_BUILTIN_VEC_EXT_V4SI,
24986 IX86_BUILTIN_VEC_EXT_V8HI,
24987 IX86_BUILTIN_VEC_EXT_V2SI,
24988 IX86_BUILTIN_VEC_EXT_V4HI,
24989 IX86_BUILTIN_VEC_EXT_V16QI,
24990 IX86_BUILTIN_VEC_SET_V2DI,
24991 IX86_BUILTIN_VEC_SET_V4SF,
24992 IX86_BUILTIN_VEC_SET_V4SI,
24993 IX86_BUILTIN_VEC_SET_V8HI,
24994 IX86_BUILTIN_VEC_SET_V4HI,
24995 IX86_BUILTIN_VEC_SET_V16QI,
24997 IX86_BUILTIN_VEC_PACK_SFIX,
24998 IX86_BUILTIN_VEC_PACK_SFIX256,
25001 IX86_BUILTIN_CRC32QI,
25002 IX86_BUILTIN_CRC32HI,
25003 IX86_BUILTIN_CRC32SI,
25004 IX86_BUILTIN_CRC32DI,
25006 IX86_BUILTIN_PCMPESTRI128,
25007 IX86_BUILTIN_PCMPESTRM128,
25008 IX86_BUILTIN_PCMPESTRA128,
25009 IX86_BUILTIN_PCMPESTRC128,
25010 IX86_BUILTIN_PCMPESTRO128,
25011 IX86_BUILTIN_PCMPESTRS128,
25012 IX86_BUILTIN_PCMPESTRZ128,
25013 IX86_BUILTIN_PCMPISTRI128,
25014 IX86_BUILTIN_PCMPISTRM128,
25015 IX86_BUILTIN_PCMPISTRA128,
25016 IX86_BUILTIN_PCMPISTRC128,
25017 IX86_BUILTIN_PCMPISTRO128,
25018 IX86_BUILTIN_PCMPISTRS128,
25019 IX86_BUILTIN_PCMPISTRZ128,
25021 IX86_BUILTIN_PCMPGTQ,
25023 /* AES instructions */
25024 IX86_BUILTIN_AESENC128,
25025 IX86_BUILTIN_AESENCLAST128,
25026 IX86_BUILTIN_AESDEC128,
25027 IX86_BUILTIN_AESDECLAST128,
25028 IX86_BUILTIN_AESIMC128,
25029 IX86_BUILTIN_AESKEYGENASSIST128,
25031 /* PCLMUL instruction */
25032 IX86_BUILTIN_PCLMULQDQ128,
25035 IX86_BUILTIN_ADDPD256,
25036 IX86_BUILTIN_ADDPS256,
25037 IX86_BUILTIN_ADDSUBPD256,
25038 IX86_BUILTIN_ADDSUBPS256,
25039 IX86_BUILTIN_ANDPD256,
25040 IX86_BUILTIN_ANDPS256,
25041 IX86_BUILTIN_ANDNPD256,
25042 IX86_BUILTIN_ANDNPS256,
25043 IX86_BUILTIN_BLENDPD256,
25044 IX86_BUILTIN_BLENDPS256,
25045 IX86_BUILTIN_BLENDVPD256,
25046 IX86_BUILTIN_BLENDVPS256,
25047 IX86_BUILTIN_DIVPD256,
25048 IX86_BUILTIN_DIVPS256,
25049 IX86_BUILTIN_DPPS256,
25050 IX86_BUILTIN_HADDPD256,
25051 IX86_BUILTIN_HADDPS256,
25052 IX86_BUILTIN_HSUBPD256,
25053 IX86_BUILTIN_HSUBPS256,
25054 IX86_BUILTIN_MAXPD256,
25055 IX86_BUILTIN_MAXPS256,
25056 IX86_BUILTIN_MINPD256,
25057 IX86_BUILTIN_MINPS256,
25058 IX86_BUILTIN_MULPD256,
25059 IX86_BUILTIN_MULPS256,
25060 IX86_BUILTIN_ORPD256,
25061 IX86_BUILTIN_ORPS256,
25062 IX86_BUILTIN_SHUFPD256,
25063 IX86_BUILTIN_SHUFPS256,
25064 IX86_BUILTIN_SUBPD256,
25065 IX86_BUILTIN_SUBPS256,
25066 IX86_BUILTIN_XORPD256,
25067 IX86_BUILTIN_XORPS256,
25068 IX86_BUILTIN_CMPSD,
25069 IX86_BUILTIN_CMPSS,
25070 IX86_BUILTIN_CMPPD,
25071 IX86_BUILTIN_CMPPS,
25072 IX86_BUILTIN_CMPPD256,
25073 IX86_BUILTIN_CMPPS256,
25074 IX86_BUILTIN_CVTDQ2PD256,
25075 IX86_BUILTIN_CVTDQ2PS256,
25076 IX86_BUILTIN_CVTPD2PS256,
25077 IX86_BUILTIN_CVTPS2DQ256,
25078 IX86_BUILTIN_CVTPS2PD256,
25079 IX86_BUILTIN_CVTTPD2DQ256,
25080 IX86_BUILTIN_CVTPD2DQ256,
25081 IX86_BUILTIN_CVTTPS2DQ256,
25082 IX86_BUILTIN_EXTRACTF128PD256,
25083 IX86_BUILTIN_EXTRACTF128PS256,
25084 IX86_BUILTIN_EXTRACTF128SI256,
25085 IX86_BUILTIN_VZEROALL,
25086 IX86_BUILTIN_VZEROUPPER,
25087 IX86_BUILTIN_VPERMILVARPD,
25088 IX86_BUILTIN_VPERMILVARPS,
25089 IX86_BUILTIN_VPERMILVARPD256,
25090 IX86_BUILTIN_VPERMILVARPS256,
25091 IX86_BUILTIN_VPERMILPD,
25092 IX86_BUILTIN_VPERMILPS,
25093 IX86_BUILTIN_VPERMILPD256,
25094 IX86_BUILTIN_VPERMILPS256,
25095 IX86_BUILTIN_VPERMIL2PD,
25096 IX86_BUILTIN_VPERMIL2PS,
25097 IX86_BUILTIN_VPERMIL2PD256,
25098 IX86_BUILTIN_VPERMIL2PS256,
25099 IX86_BUILTIN_VPERM2F128PD256,
25100 IX86_BUILTIN_VPERM2F128PS256,
25101 IX86_BUILTIN_VPERM2F128SI256,
25102 IX86_BUILTIN_VBROADCASTSS,
25103 IX86_BUILTIN_VBROADCASTSD256,
25104 IX86_BUILTIN_VBROADCASTSS256,
25105 IX86_BUILTIN_VBROADCASTPD256,
25106 IX86_BUILTIN_VBROADCASTPS256,
25107 IX86_BUILTIN_VINSERTF128PD256,
25108 IX86_BUILTIN_VINSERTF128PS256,
25109 IX86_BUILTIN_VINSERTF128SI256,
25110 IX86_BUILTIN_LOADUPD256,
25111 IX86_BUILTIN_LOADUPS256,
25112 IX86_BUILTIN_STOREUPD256,
25113 IX86_BUILTIN_STOREUPS256,
25114 IX86_BUILTIN_LDDQU256,
25115 IX86_BUILTIN_MOVNTDQ256,
25116 IX86_BUILTIN_MOVNTPD256,
25117 IX86_BUILTIN_MOVNTPS256,
25118 IX86_BUILTIN_LOADDQU256,
25119 IX86_BUILTIN_STOREDQU256,
25120 IX86_BUILTIN_MASKLOADPD,
25121 IX86_BUILTIN_MASKLOADPS,
25122 IX86_BUILTIN_MASKSTOREPD,
25123 IX86_BUILTIN_MASKSTOREPS,
25124 IX86_BUILTIN_MASKLOADPD256,
25125 IX86_BUILTIN_MASKLOADPS256,
25126 IX86_BUILTIN_MASKSTOREPD256,
25127 IX86_BUILTIN_MASKSTOREPS256,
25128 IX86_BUILTIN_MOVSHDUP256,
25129 IX86_BUILTIN_MOVSLDUP256,
25130 IX86_BUILTIN_MOVDDUP256,
25132 IX86_BUILTIN_SQRTPD256,
25133 IX86_BUILTIN_SQRTPS256,
25134 IX86_BUILTIN_SQRTPS_NR256,
25135 IX86_BUILTIN_RSQRTPS256,
25136 IX86_BUILTIN_RSQRTPS_NR256,
25138 IX86_BUILTIN_RCPPS256,
25140 IX86_BUILTIN_ROUNDPD256,
25141 IX86_BUILTIN_ROUNDPS256,
25143 IX86_BUILTIN_FLOORPD256,
25144 IX86_BUILTIN_CEILPD256,
25145 IX86_BUILTIN_TRUNCPD256,
25146 IX86_BUILTIN_RINTPD256,
25147 IX86_BUILTIN_ROUNDPD_AZ256,
25149 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
25150 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
25151 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
25153 IX86_BUILTIN_FLOORPS256,
25154 IX86_BUILTIN_CEILPS256,
25155 IX86_BUILTIN_TRUNCPS256,
25156 IX86_BUILTIN_RINTPS256,
25157 IX86_BUILTIN_ROUNDPS_AZ256,
25159 IX86_BUILTIN_FLOORPS_SFIX256,
25160 IX86_BUILTIN_CEILPS_SFIX256,
25161 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
25163 IX86_BUILTIN_UNPCKHPD256,
25164 IX86_BUILTIN_UNPCKLPD256,
25165 IX86_BUILTIN_UNPCKHPS256,
25166 IX86_BUILTIN_UNPCKLPS256,
25168 IX86_BUILTIN_SI256_SI,
25169 IX86_BUILTIN_PS256_PS,
25170 IX86_BUILTIN_PD256_PD,
25171 IX86_BUILTIN_SI_SI256,
25172 IX86_BUILTIN_PS_PS256,
25173 IX86_BUILTIN_PD_PD256,
25175 IX86_BUILTIN_VTESTZPD,
25176 IX86_BUILTIN_VTESTCPD,
25177 IX86_BUILTIN_VTESTNZCPD,
25178 IX86_BUILTIN_VTESTZPS,
25179 IX86_BUILTIN_VTESTCPS,
25180 IX86_BUILTIN_VTESTNZCPS,
25181 IX86_BUILTIN_VTESTZPD256,
25182 IX86_BUILTIN_VTESTCPD256,
25183 IX86_BUILTIN_VTESTNZCPD256,
25184 IX86_BUILTIN_VTESTZPS256,
25185 IX86_BUILTIN_VTESTCPS256,
25186 IX86_BUILTIN_VTESTNZCPS256,
25187 IX86_BUILTIN_PTESTZ256,
25188 IX86_BUILTIN_PTESTC256,
25189 IX86_BUILTIN_PTESTNZC256,
25191 IX86_BUILTIN_MOVMSKPD256,
25192 IX86_BUILTIN_MOVMSKPS256,
25195 IX86_BUILTIN_MPSADBW256,
25196 IX86_BUILTIN_PABSB256,
25197 IX86_BUILTIN_PABSW256,
25198 IX86_BUILTIN_PABSD256,
25199 IX86_BUILTIN_PACKSSDW256,
25200 IX86_BUILTIN_PACKSSWB256,
25201 IX86_BUILTIN_PACKUSDW256,
25202 IX86_BUILTIN_PACKUSWB256,
25203 IX86_BUILTIN_PADDB256,
25204 IX86_BUILTIN_PADDW256,
25205 IX86_BUILTIN_PADDD256,
25206 IX86_BUILTIN_PADDQ256,
25207 IX86_BUILTIN_PADDSB256,
25208 IX86_BUILTIN_PADDSW256,
25209 IX86_BUILTIN_PADDUSB256,
25210 IX86_BUILTIN_PADDUSW256,
25211 IX86_BUILTIN_PALIGNR256,
25212 IX86_BUILTIN_AND256I,
25213 IX86_BUILTIN_ANDNOT256I,
25214 IX86_BUILTIN_PAVGB256,
25215 IX86_BUILTIN_PAVGW256,
25216 IX86_BUILTIN_PBLENDVB256,
25217 IX86_BUILTIN_PBLENDVW256,
25218 IX86_BUILTIN_PCMPEQB256,
25219 IX86_BUILTIN_PCMPEQW256,
25220 IX86_BUILTIN_PCMPEQD256,
25221 IX86_BUILTIN_PCMPEQQ256,
25222 IX86_BUILTIN_PCMPGTB256,
25223 IX86_BUILTIN_PCMPGTW256,
25224 IX86_BUILTIN_PCMPGTD256,
25225 IX86_BUILTIN_PCMPGTQ256,
25226 IX86_BUILTIN_PHADDW256,
25227 IX86_BUILTIN_PHADDD256,
25228 IX86_BUILTIN_PHADDSW256,
25229 IX86_BUILTIN_PHSUBW256,
25230 IX86_BUILTIN_PHSUBD256,
25231 IX86_BUILTIN_PHSUBSW256,
25232 IX86_BUILTIN_PMADDUBSW256,
25233 IX86_BUILTIN_PMADDWD256,
25234 IX86_BUILTIN_PMAXSB256,
25235 IX86_BUILTIN_PMAXSW256,
25236 IX86_BUILTIN_PMAXSD256,
25237 IX86_BUILTIN_PMAXUB256,
25238 IX86_BUILTIN_PMAXUW256,
25239 IX86_BUILTIN_PMAXUD256,
25240 IX86_BUILTIN_PMINSB256,
25241 IX86_BUILTIN_PMINSW256,
25242 IX86_BUILTIN_PMINSD256,
25243 IX86_BUILTIN_PMINUB256,
25244 IX86_BUILTIN_PMINUW256,
25245 IX86_BUILTIN_PMINUD256,
25246 IX86_BUILTIN_PMOVMSKB256,
25247 IX86_BUILTIN_PMOVSXBW256,
25248 IX86_BUILTIN_PMOVSXBD256,
25249 IX86_BUILTIN_PMOVSXBQ256,
25250 IX86_BUILTIN_PMOVSXWD256,
25251 IX86_BUILTIN_PMOVSXWQ256,
25252 IX86_BUILTIN_PMOVSXDQ256,
25253 IX86_BUILTIN_PMOVZXBW256,
25254 IX86_BUILTIN_PMOVZXBD256,
25255 IX86_BUILTIN_PMOVZXBQ256,
25256 IX86_BUILTIN_PMOVZXWD256,
25257 IX86_BUILTIN_PMOVZXWQ256,
25258 IX86_BUILTIN_PMOVZXDQ256,
25259 IX86_BUILTIN_PMULDQ256,
25260 IX86_BUILTIN_PMULHRSW256,
25261 IX86_BUILTIN_PMULHUW256,
25262 IX86_BUILTIN_PMULHW256,
25263 IX86_BUILTIN_PMULLW256,
25264 IX86_BUILTIN_PMULLD256,
25265 IX86_BUILTIN_PMULUDQ256,
25266 IX86_BUILTIN_POR256,
25267 IX86_BUILTIN_PSADBW256,
25268 IX86_BUILTIN_PSHUFB256,
25269 IX86_BUILTIN_PSHUFD256,
25270 IX86_BUILTIN_PSHUFHW256,
25271 IX86_BUILTIN_PSHUFLW256,
25272 IX86_BUILTIN_PSIGNB256,
25273 IX86_BUILTIN_PSIGNW256,
25274 IX86_BUILTIN_PSIGND256,
25275 IX86_BUILTIN_PSLLDQI256,
25276 IX86_BUILTIN_PSLLWI256,
25277 IX86_BUILTIN_PSLLW256,
25278 IX86_BUILTIN_PSLLDI256,
25279 IX86_BUILTIN_PSLLD256,
25280 IX86_BUILTIN_PSLLQI256,
25281 IX86_BUILTIN_PSLLQ256,
25282 IX86_BUILTIN_PSRAWI256,
25283 IX86_BUILTIN_PSRAW256,
25284 IX86_BUILTIN_PSRADI256,
25285 IX86_BUILTIN_PSRAD256,
25286 IX86_BUILTIN_PSRLDQI256,
25287 IX86_BUILTIN_PSRLWI256,
25288 IX86_BUILTIN_PSRLW256,
25289 IX86_BUILTIN_PSRLDI256,
25290 IX86_BUILTIN_PSRLD256,
25291 IX86_BUILTIN_PSRLQI256,
25292 IX86_BUILTIN_PSRLQ256,
25293 IX86_BUILTIN_PSUBB256,
25294 IX86_BUILTIN_PSUBW256,
25295 IX86_BUILTIN_PSUBD256,
25296 IX86_BUILTIN_PSUBQ256,
25297 IX86_BUILTIN_PSUBSB256,
25298 IX86_BUILTIN_PSUBSW256,
25299 IX86_BUILTIN_PSUBUSB256,
25300 IX86_BUILTIN_PSUBUSW256,
25301 IX86_BUILTIN_PUNPCKHBW256,
25302 IX86_BUILTIN_PUNPCKHWD256,
25303 IX86_BUILTIN_PUNPCKHDQ256,
25304 IX86_BUILTIN_PUNPCKHQDQ256,
25305 IX86_BUILTIN_PUNPCKLBW256,
25306 IX86_BUILTIN_PUNPCKLWD256,
25307 IX86_BUILTIN_PUNPCKLDQ256,
25308 IX86_BUILTIN_PUNPCKLQDQ256,
25309 IX86_BUILTIN_PXOR256,
25310 IX86_BUILTIN_MOVNTDQA256,
25311 IX86_BUILTIN_VBROADCASTSS_PS,
25312 IX86_BUILTIN_VBROADCASTSS_PS256,
25313 IX86_BUILTIN_VBROADCASTSD_PD256,
25314 IX86_BUILTIN_VBROADCASTSI256,
25315 IX86_BUILTIN_PBLENDD256,
25316 IX86_BUILTIN_PBLENDD128,
25317 IX86_BUILTIN_PBROADCASTB256,
25318 IX86_BUILTIN_PBROADCASTW256,
25319 IX86_BUILTIN_PBROADCASTD256,
25320 IX86_BUILTIN_PBROADCASTQ256,
25321 IX86_BUILTIN_PBROADCASTB128,
25322 IX86_BUILTIN_PBROADCASTW128,
25323 IX86_BUILTIN_PBROADCASTD128,
25324 IX86_BUILTIN_PBROADCASTQ128,
25325 IX86_BUILTIN_VPERMVARSI256,
25326 IX86_BUILTIN_VPERMDF256,
25327 IX86_BUILTIN_VPERMVARSF256,
25328 IX86_BUILTIN_VPERMDI256,
25329 IX86_BUILTIN_VPERMTI256,
25330 IX86_BUILTIN_VEXTRACT128I256,
25331 IX86_BUILTIN_VINSERT128I256,
25332 IX86_BUILTIN_MASKLOADD,
25333 IX86_BUILTIN_MASKLOADQ,
25334 IX86_BUILTIN_MASKLOADD256,
25335 IX86_BUILTIN_MASKLOADQ256,
25336 IX86_BUILTIN_MASKSTORED,
25337 IX86_BUILTIN_MASKSTOREQ,
25338 IX86_BUILTIN_MASKSTORED256,
25339 IX86_BUILTIN_MASKSTOREQ256,
25340 IX86_BUILTIN_PSLLVV4DI,
25341 IX86_BUILTIN_PSLLVV2DI,
25342 IX86_BUILTIN_PSLLVV8SI,
25343 IX86_BUILTIN_PSLLVV4SI,
25344 IX86_BUILTIN_PSRAVV8SI,
25345 IX86_BUILTIN_PSRAVV4SI,
25346 IX86_BUILTIN_PSRLVV4DI,
25347 IX86_BUILTIN_PSRLVV2DI,
25348 IX86_BUILTIN_PSRLVV8SI,
25349 IX86_BUILTIN_PSRLVV4SI,
25351 IX86_BUILTIN_GATHERSIV2DF,
25352 IX86_BUILTIN_GATHERSIV4DF,
25353 IX86_BUILTIN_GATHERDIV2DF,
25354 IX86_BUILTIN_GATHERDIV4DF,
25355 IX86_BUILTIN_GATHERSIV4SF,
25356 IX86_BUILTIN_GATHERSIV8SF,
25357 IX86_BUILTIN_GATHERDIV4SF,
25358 IX86_BUILTIN_GATHERDIV8SF,
25359 IX86_BUILTIN_GATHERSIV2DI,
25360 IX86_BUILTIN_GATHERSIV4DI,
25361 IX86_BUILTIN_GATHERDIV2DI,
25362 IX86_BUILTIN_GATHERDIV4DI,
25363 IX86_BUILTIN_GATHERSIV4SI,
25364 IX86_BUILTIN_GATHERSIV8SI,
25365 IX86_BUILTIN_GATHERDIV4SI,
25366 IX86_BUILTIN_GATHERDIV8SI,
25368 /* Alternate 4 element gather for the vectorizer where
25369 all operands are 32-byte wide. */
25370 IX86_BUILTIN_GATHERALTSIV4DF,
25371 IX86_BUILTIN_GATHERALTDIV8SF,
25372 IX86_BUILTIN_GATHERALTSIV4DI,
25373 IX86_BUILTIN_GATHERALTDIV8SI,
25375 /* TFmode support builtins. */
25377 IX86_BUILTIN_HUGE_VALQ,
25378 IX86_BUILTIN_FABSQ,
25379 IX86_BUILTIN_COPYSIGNQ,
25381 /* Vectorizer support builtins. */
25382 IX86_BUILTIN_CPYSGNPS,
25383 IX86_BUILTIN_CPYSGNPD,
25384 IX86_BUILTIN_CPYSGNPS256,
25385 IX86_BUILTIN_CPYSGNPD256,
25387 /* FMA4 instructions. */
25388 IX86_BUILTIN_VFMADDSS,
25389 IX86_BUILTIN_VFMADDSD,
25390 IX86_BUILTIN_VFMADDPS,
25391 IX86_BUILTIN_VFMADDPD,
25392 IX86_BUILTIN_VFMADDPS256,
25393 IX86_BUILTIN_VFMADDPD256,
25394 IX86_BUILTIN_VFMADDSUBPS,
25395 IX86_BUILTIN_VFMADDSUBPD,
25396 IX86_BUILTIN_VFMADDSUBPS256,
25397 IX86_BUILTIN_VFMADDSUBPD256,
25399 /* FMA3 instructions. */
25400 IX86_BUILTIN_VFMADDSS3,
25401 IX86_BUILTIN_VFMADDSD3,
25403 /* XOP instructions. */
25404 IX86_BUILTIN_VPCMOV,
25405 IX86_BUILTIN_VPCMOV_V2DI,
25406 IX86_BUILTIN_VPCMOV_V4SI,
25407 IX86_BUILTIN_VPCMOV_V8HI,
25408 IX86_BUILTIN_VPCMOV_V16QI,
25409 IX86_BUILTIN_VPCMOV_V4SF,
25410 IX86_BUILTIN_VPCMOV_V2DF,
25411 IX86_BUILTIN_VPCMOV256,
25412 IX86_BUILTIN_VPCMOV_V4DI256,
25413 IX86_BUILTIN_VPCMOV_V8SI256,
25414 IX86_BUILTIN_VPCMOV_V16HI256,
25415 IX86_BUILTIN_VPCMOV_V32QI256,
25416 IX86_BUILTIN_VPCMOV_V8SF256,
25417 IX86_BUILTIN_VPCMOV_V4DF256,
25419 IX86_BUILTIN_VPPERM,
25421 IX86_BUILTIN_VPMACSSWW,
25422 IX86_BUILTIN_VPMACSWW,
25423 IX86_BUILTIN_VPMACSSWD,
25424 IX86_BUILTIN_VPMACSWD,
25425 IX86_BUILTIN_VPMACSSDD,
25426 IX86_BUILTIN_VPMACSDD,
25427 IX86_BUILTIN_VPMACSSDQL,
25428 IX86_BUILTIN_VPMACSSDQH,
25429 IX86_BUILTIN_VPMACSDQL,
25430 IX86_BUILTIN_VPMACSDQH,
25431 IX86_BUILTIN_VPMADCSSWD,
25432 IX86_BUILTIN_VPMADCSWD,
25434 IX86_BUILTIN_VPHADDBW,
25435 IX86_BUILTIN_VPHADDBD,
25436 IX86_BUILTIN_VPHADDBQ,
25437 IX86_BUILTIN_VPHADDWD,
25438 IX86_BUILTIN_VPHADDWQ,
25439 IX86_BUILTIN_VPHADDDQ,
25440 IX86_BUILTIN_VPHADDUBW,
25441 IX86_BUILTIN_VPHADDUBD,
25442 IX86_BUILTIN_VPHADDUBQ,
25443 IX86_BUILTIN_VPHADDUWD,
25444 IX86_BUILTIN_VPHADDUWQ,
25445 IX86_BUILTIN_VPHADDUDQ,
25446 IX86_BUILTIN_VPHSUBBW,
25447 IX86_BUILTIN_VPHSUBWD,
25448 IX86_BUILTIN_VPHSUBDQ,
25450 IX86_BUILTIN_VPROTB,
25451 IX86_BUILTIN_VPROTW,
25452 IX86_BUILTIN_VPROTD,
25453 IX86_BUILTIN_VPROTQ,
25454 IX86_BUILTIN_VPROTB_IMM,
25455 IX86_BUILTIN_VPROTW_IMM,
25456 IX86_BUILTIN_VPROTD_IMM,
25457 IX86_BUILTIN_VPROTQ_IMM,
25459 IX86_BUILTIN_VPSHLB,
25460 IX86_BUILTIN_VPSHLW,
25461 IX86_BUILTIN_VPSHLD,
25462 IX86_BUILTIN_VPSHLQ,
25463 IX86_BUILTIN_VPSHAB,
25464 IX86_BUILTIN_VPSHAW,
25465 IX86_BUILTIN_VPSHAD,
25466 IX86_BUILTIN_VPSHAQ,
25468 IX86_BUILTIN_VFRCZSS,
25469 IX86_BUILTIN_VFRCZSD,
25470 IX86_BUILTIN_VFRCZPS,
25471 IX86_BUILTIN_VFRCZPD,
25472 IX86_BUILTIN_VFRCZPS256,
25473 IX86_BUILTIN_VFRCZPD256,
25475 IX86_BUILTIN_VPCOMEQUB,
25476 IX86_BUILTIN_VPCOMNEUB,
25477 IX86_BUILTIN_VPCOMLTUB,
25478 IX86_BUILTIN_VPCOMLEUB,
25479 IX86_BUILTIN_VPCOMGTUB,
25480 IX86_BUILTIN_VPCOMGEUB,
25481 IX86_BUILTIN_VPCOMFALSEUB,
25482 IX86_BUILTIN_VPCOMTRUEUB,
25484 IX86_BUILTIN_VPCOMEQUW,
25485 IX86_BUILTIN_VPCOMNEUW,
25486 IX86_BUILTIN_VPCOMLTUW,
25487 IX86_BUILTIN_VPCOMLEUW,
25488 IX86_BUILTIN_VPCOMGTUW,
25489 IX86_BUILTIN_VPCOMGEUW,
25490 IX86_BUILTIN_VPCOMFALSEUW,
25491 IX86_BUILTIN_VPCOMTRUEUW,
25493 IX86_BUILTIN_VPCOMEQUD,
25494 IX86_BUILTIN_VPCOMNEUD,
25495 IX86_BUILTIN_VPCOMLTUD,
25496 IX86_BUILTIN_VPCOMLEUD,
25497 IX86_BUILTIN_VPCOMGTUD,
25498 IX86_BUILTIN_VPCOMGEUD,
25499 IX86_BUILTIN_VPCOMFALSEUD,
25500 IX86_BUILTIN_VPCOMTRUEUD,
25502 IX86_BUILTIN_VPCOMEQUQ,
25503 IX86_BUILTIN_VPCOMNEUQ,
25504 IX86_BUILTIN_VPCOMLTUQ,
25505 IX86_BUILTIN_VPCOMLEUQ,
25506 IX86_BUILTIN_VPCOMGTUQ,
25507 IX86_BUILTIN_VPCOMGEUQ,
25508 IX86_BUILTIN_VPCOMFALSEUQ,
25509 IX86_BUILTIN_VPCOMTRUEUQ,
25511 IX86_BUILTIN_VPCOMEQB,
25512 IX86_BUILTIN_VPCOMNEB,
25513 IX86_BUILTIN_VPCOMLTB,
25514 IX86_BUILTIN_VPCOMLEB,
25515 IX86_BUILTIN_VPCOMGTB,
25516 IX86_BUILTIN_VPCOMGEB,
25517 IX86_BUILTIN_VPCOMFALSEB,
25518 IX86_BUILTIN_VPCOMTRUEB,
25520 IX86_BUILTIN_VPCOMEQW,
25521 IX86_BUILTIN_VPCOMNEW,
25522 IX86_BUILTIN_VPCOMLTW,
25523 IX86_BUILTIN_VPCOMLEW,
25524 IX86_BUILTIN_VPCOMGTW,
25525 IX86_BUILTIN_VPCOMGEW,
25526 IX86_BUILTIN_VPCOMFALSEW,
25527 IX86_BUILTIN_VPCOMTRUEW,
25529 IX86_BUILTIN_VPCOMEQD,
25530 IX86_BUILTIN_VPCOMNED,
25531 IX86_BUILTIN_VPCOMLTD,
25532 IX86_BUILTIN_VPCOMLED,
25533 IX86_BUILTIN_VPCOMGTD,
25534 IX86_BUILTIN_VPCOMGED,
25535 IX86_BUILTIN_VPCOMFALSED,
25536 IX86_BUILTIN_VPCOMTRUED,
25538 IX86_BUILTIN_VPCOMEQQ,
25539 IX86_BUILTIN_VPCOMNEQ,
25540 IX86_BUILTIN_VPCOMLTQ,
25541 IX86_BUILTIN_VPCOMLEQ,
25542 IX86_BUILTIN_VPCOMGTQ,
25543 IX86_BUILTIN_VPCOMGEQ,
25544 IX86_BUILTIN_VPCOMFALSEQ,
25545 IX86_BUILTIN_VPCOMTRUEQ,
25547 /* LWP instructions. */
25548 IX86_BUILTIN_LLWPCB,
25549 IX86_BUILTIN_SLWPCB,
25550 IX86_BUILTIN_LWPVAL32,
25551 IX86_BUILTIN_LWPVAL64,
25552 IX86_BUILTIN_LWPINS32,
25553 IX86_BUILTIN_LWPINS64,
25557 /* BMI instructions. */
25558 IX86_BUILTIN_BEXTR32,
25559 IX86_BUILTIN_BEXTR64,
25562 /* TBM instructions. */
25563 IX86_BUILTIN_BEXTRI32,
25564 IX86_BUILTIN_BEXTRI64,
25566 /* BMI2 instructions. */
25567 IX86_BUILTIN_BZHI32,
25568 IX86_BUILTIN_BZHI64,
25569 IX86_BUILTIN_PDEP32,
25570 IX86_BUILTIN_PDEP64,
25571 IX86_BUILTIN_PEXT32,
25572 IX86_BUILTIN_PEXT64,
25574 /* FSGSBASE instructions. */
25575 IX86_BUILTIN_RDFSBASE32,
25576 IX86_BUILTIN_RDFSBASE64,
25577 IX86_BUILTIN_RDGSBASE32,
25578 IX86_BUILTIN_RDGSBASE64,
25579 IX86_BUILTIN_WRFSBASE32,
25580 IX86_BUILTIN_WRFSBASE64,
25581 IX86_BUILTIN_WRGSBASE32,
25582 IX86_BUILTIN_WRGSBASE64,
25584 /* RDRND instructions. */
25585 IX86_BUILTIN_RDRAND16_STEP,
25586 IX86_BUILTIN_RDRAND32_STEP,
25587 IX86_BUILTIN_RDRAND64_STEP,
25589 /* F16C instructions. */
25590 IX86_BUILTIN_CVTPH2PS,
25591 IX86_BUILTIN_CVTPH2PS256,
25592 IX86_BUILTIN_CVTPS2PH,
25593 IX86_BUILTIN_CVTPS2PH256,
25595 /* CFString built-in for darwin */
25596 IX86_BUILTIN_CFSTRING,
25601 /* Table for the ix86 builtin decls. */
25602 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
25604 /* Table of all of the builtin functions that are possible with different ISA's
25605 but are waiting to be built until a function is declared to use that
25607 struct builtin_isa {
25608 const char *name; /* function name */
25609 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
25610 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
25611 bool const_p; /* true if the declaration is constant */
25612 bool set_and_not_built_p;
25615 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
25618 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
25619 of which isa_flags to use in the ix86_builtins_isa array. Stores the
25620 function decl in the ix86_builtins array. Returns the function decl or
25621 NULL_TREE, if the builtin was not added.
25623 If the front end has a special hook for builtin functions, delay adding
25624 builtin functions that aren't in the current ISA until the ISA is changed
25625 with function specific optimization. Doing so, can save about 300K for the
25626 default compiler. When the builtin is expanded, check at that time whether
25629 If the front end doesn't have a special hook, record all builtins, even if
25630 it isn't an instruction set in the current ISA in case the user uses
25631 function specific options for a different ISA, so that we don't get scope
25632 errors if a builtin is added in the middle of a function scope. */
25635 def_builtin (HOST_WIDE_INT mask, const char *name,
25636 enum ix86_builtin_func_type tcode,
25637 enum ix86_builtins code)
25639 tree decl = NULL_TREE;
25641 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
25643 ix86_builtins_isa[(int) code].isa = mask;
25645 mask &= ~OPTION_MASK_ISA_64BIT;
25647 || (mask & ix86_isa_flags) != 0
25648 || (lang_hooks.builtin_function
25649 == lang_hooks.builtin_function_ext_scope))
25652 tree type = ix86_get_builtin_func_type (tcode);
25653 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
25655 ix86_builtins[(int) code] = decl;
25656 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
25660 ix86_builtins[(int) code] = NULL_TREE;
25661 ix86_builtins_isa[(int) code].tcode = tcode;
25662 ix86_builtins_isa[(int) code].name = name;
25663 ix86_builtins_isa[(int) code].const_p = false;
25664 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
25671 /* Like def_builtin, but also marks the function decl "const". */
25674 def_builtin_const (HOST_WIDE_INT mask, const char *name,
25675 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
25677 tree decl = def_builtin (mask, name, tcode, code);
25679 TREE_READONLY (decl) = 1;
25681 ix86_builtins_isa[(int) code].const_p = true;
25686 /* Add any new builtin functions for a given ISA that may not have been
25687 declared. This saves a bit of space compared to adding all of the
25688 declarations to the tree, even if we didn't use them. */
25691 ix86_add_new_builtins (HOST_WIDE_INT isa)
25695 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
25697 if ((ix86_builtins_isa[i].isa & isa) != 0
25698 && ix86_builtins_isa[i].set_and_not_built_p)
25702 /* Don't define the builtin again. */
25703 ix86_builtins_isa[i].set_and_not_built_p = false;
25705 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
25706 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
25707 type, i, BUILT_IN_MD, NULL,
25710 ix86_builtins[i] = decl;
25711 if (ix86_builtins_isa[i].const_p)
25712 TREE_READONLY (decl) = 1;
25717 /* Bits for builtin_description.flag. */
25719 /* Set when we don't support the comparison natively, and should
25720 swap_comparison in order to support it. */
25721 #define BUILTIN_DESC_SWAP_OPERANDS 1
25723 struct builtin_description
25725 const HOST_WIDE_INT mask;
25726 const enum insn_code icode;
25727 const char *const name;
25728 const enum ix86_builtins code;
25729 const enum rtx_code comparison;
25733 static const struct builtin_description bdesc_comi[] =
25735 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
25736 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
25737 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
25738 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
25739 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
25740 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
25741 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
25742 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
25743 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
25744 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
25745 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
25746 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
25747 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
25748 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
25749 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
25750 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
25751 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
25752 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
25753 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
25754 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
25755 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
25756 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
25757 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
25758 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
25761 static const struct builtin_description bdesc_pcmpestr[] =
25764 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
25765 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
25766 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
25767 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
25768 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
25769 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
25770 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
25773 static const struct builtin_description bdesc_pcmpistr[] =
25776 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
25777 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
25778 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
25779 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
25780 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
25781 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
25782 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
25785 /* Special builtins with variable number of arguments. */
25786 static const struct builtin_description bdesc_special_args[] =
25788 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
25789 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
25790 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
25793 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
25796 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
25799 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
25800 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
25801 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
25803 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
25804 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
25805 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
25806 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
25808 /* SSE or 3DNow!A */
25809 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
25810 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
25813 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
25814 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
25815 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
25816 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
25817 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
25818 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
25819 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
25820 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
25821 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
25822 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
25824 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
25825 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
25828 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
25831 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
25834 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
25835 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
25838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
25839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
25841 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
25842 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
25843 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
25844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
25845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
25847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
25848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
25849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
25850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
25851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
25852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
25853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
25855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
25856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
25857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
25859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
25860 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
25861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
25862 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
25863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
25864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
25865 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
25866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
25869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
25870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
25871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
25872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
25873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
25874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
25875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
25876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
25877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
25879 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
25880 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
25881 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
25882 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
25883 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
25884 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
25887 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
25888 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
25889 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
25890 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
25891 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
25892 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
25893 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
25894 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
25897 /* Builtins with variable number of arguments. */
25898 static const struct builtin_description bdesc_args[] =
25900 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
25901 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
25902 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
25903 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
25904 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
25905 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
25906 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
25909 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25910 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25911 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25912 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25913 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25914 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25916 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25917 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25918 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25919 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25920 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25921 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25922 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25923 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25925 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25926 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25928 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25929 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25930 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25931 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25933 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25934 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25935 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25936 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25937 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25938 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25940 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25941 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25942 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25943 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25944 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
25945 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
25947 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
25948 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
25949 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
25951 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
25953 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
25954 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
25955 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
25956 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
25957 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
25958 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
25960 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
25961 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
25962 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
25963 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
25964 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
25965 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
25967 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
25968 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
25969 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
25970 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
25973 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
25974 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
25975 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
25976 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
25978 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25979 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25980 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25981 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
25982 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
25983 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
25984 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25985 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25986 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25987 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25988 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25989 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25990 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25991 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
25992 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25995 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
25996 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
25997 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
25998 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
25999 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26000 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26003 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
26004 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26005 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26006 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26007 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26008 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26009 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
26010 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
26011 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
26012 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
26013 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
26014 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
26016 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26018 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26019 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26020 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26023 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26024 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26025 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
26028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
26029 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
26030 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26031 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26032 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26033 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
26034 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
26035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
26036 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26037 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
26038 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26039 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
26040 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
26041 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
26042 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26043 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
26044 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
26045 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
26046 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26047 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26048 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26050 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26051 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26052 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26053 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26055 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26056 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26057 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26058 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26060 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26064 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26065 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26066 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
26069 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
26070 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
26072 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
26074 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26075 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26076 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26078 /* SSE MMX or 3Dnow!A */
26079 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26080 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26081 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26083 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26084 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26085 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26086 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26088 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
26089 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
26091 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
26094 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26096 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
26097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
26098 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
26099 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
26100 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
26102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
26103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
26104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
26105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
26106 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
26108 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
26110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
26111 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
26112 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
26113 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
26115 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26116 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
26117 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26119 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26120 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26121 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26122 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26123 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26125 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
26129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
26130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
26131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
26133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
26135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
26136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
26137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26138 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26139 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26140 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
26141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
26142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
26143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
26145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
26146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
26147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26150 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26151 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26154 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26156 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26157 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26159 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26162 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26163 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26165 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
26167 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26168 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26169 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26170 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26171 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26172 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26173 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26174 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26178 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26180 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26182 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26183 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26185 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26186 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
26188 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26189 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26190 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26191 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26197 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26203 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26204 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26205 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26206 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26208 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26209 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26210 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26211 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26212 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26213 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26214 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26215 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
26218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
26219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
26221 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
26224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
26225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
26227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
26229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
26230 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
26231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
26232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
26234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
26235 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26236 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26237 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
26238 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26239 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26240 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
26242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
26243 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26244 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26245 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
26246 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26247 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26248 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
26250 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26251 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26252 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26253 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26255 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
26256 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
26257 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
26259 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
26261 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
26262 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
26264 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26267 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
26268 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
26271 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
26272 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26274 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26275 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26276 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26277 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26278 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26279 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26282 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
26283 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
26284 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26285 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
26286 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
26287 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
26289 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26290 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26291 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26292 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26293 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26294 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26295 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26296 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26297 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26298 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26299 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26300 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26301 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
26302 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
26303 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26304 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26305 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26306 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26307 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26308 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26309 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26310 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26311 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26312 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26315 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
26316 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
26319 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26320 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26321 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
26322 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
26323 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26324 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26325 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26326 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
26327 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
26328 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
26330 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
26331 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
26332 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
26333 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
26334 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
26335 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
26336 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
26337 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
26338 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
26339 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
26340 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
26341 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
26342 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26344 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
26345 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26346 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26347 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26348 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26349 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26350 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26351 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26352 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26353 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26354 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
26355 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26358 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
26359 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
26360 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26361 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26363 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
26364 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
26365 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
26366 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
26368 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
26369 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
26371 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
26372 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
26374 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
26375 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
26376 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
26377 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
26379 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
26380 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
26382 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26383 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26385 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26386 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26387 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26390 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26391 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
26392 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
26393 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26394 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26397 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
26398 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
26399 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
26400 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26403 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
26404 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26406 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26407 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26408 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26409 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26412 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
26415 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26416 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26417 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26418 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26419 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26420 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26421 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26422 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26423 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26424 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26425 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26426 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26427 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26428 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26429 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26430 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26431 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26432 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26433 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26434 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26435 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26436 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26437 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26438 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26439 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26440 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26442 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
26443 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
26444 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
26445 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
26447 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26448 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26449 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
26450 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
26451 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26452 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26453 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26454 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26455 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26456 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26457 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26458 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26459 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26460 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
26461 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
26462 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
26463 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
26464 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
26465 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
26466 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26467 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
26468 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
26469 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
26470 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26471 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26472 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26473 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
26474 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
26475 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
26476 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26477 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
26478 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
26479 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
26480 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
26482 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26483 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26484 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26486 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26487 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26488 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26489 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26490 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26492 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26494 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26495 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
26497 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
26498 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
26499 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
26500 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
26502 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26503 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
26505 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
26506 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
26508 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
26509 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
26510 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
26511 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
26513 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
26514 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
26516 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26517 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26519 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26520 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26521 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26522 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26524 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
26525 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
26526 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
26527 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
26528 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
26529 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
26531 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26532 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26533 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
26534 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26535 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26536 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
26537 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26538 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26539 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
26540 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26542 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
26543 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26544 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26545 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
26547 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
26548 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
26550 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26551 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26553 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
26556 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
26557 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
26558 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
26559 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
26560 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
26561 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
26562 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
26563 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
26564 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26565 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26566 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26567 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26568 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26569 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26570 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26571 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26572 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
26573 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26574 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26575 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26576 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26577 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
26578 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
26579 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26580 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26581 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26582 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26583 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26584 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26585 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26586 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26587 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26588 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26589 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26590 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26591 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26592 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26593 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
26594 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
26595 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26596 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26597 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26598 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26599 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26600 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26601 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26602 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26603 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26604 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26605 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26606 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26607 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
26608 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
26609 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
26610 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
26611 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
26612 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
26613 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
26614 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
26615 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
26616 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
26617 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
26618 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
26619 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
26620 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mulv4siv4di3 , "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
26621 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26622 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26623 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26624 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26625 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26626 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
26627 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26628 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
26629 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26630 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
26631 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
26632 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
26633 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26634 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26635 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26636 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
26637 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26638 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26639 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26640 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26641 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
26642 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
26643 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26644 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26645 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26646 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26647 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
26648 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
26649 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
26650 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
26651 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
26652 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
26653 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
26654 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26655 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26656 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26657 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26658 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26659 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26660 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26661 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26662 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26663 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26664 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26665 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26666 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
26667 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
26668 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26669 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26670 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26671 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26672 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
26673 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
26674 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
26675 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
26676 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
26677 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
26678 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
26679 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
26680 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
26681 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
26682 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26683 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
26684 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26685 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26686 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26687 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26688 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
26689 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
26690 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
26691 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
26692 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26693 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26694 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26695 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26696 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26697 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26698 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
26699 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26700 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
26701 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26703 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
26706 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26707 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26708 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
26711 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26712 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26715 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
26716 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
26717 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
26718 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
26721 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26722 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26723 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26724 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26725 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26726 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26729 /* FMA4 and XOP. */
26730 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
26731 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
26732 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
26733 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
26734 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
26735 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
26736 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
26737 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
26738 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
26739 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
26740 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
26741 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
26742 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
26743 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
26744 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
26745 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
26746 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
26747 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
26748 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
26749 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
26750 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
26751 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
26752 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
26753 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
26754 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
26755 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
26756 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
26757 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
26758 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
26759 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
26760 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
26761 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
26762 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
26763 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
26764 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
26765 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
26766 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
26767 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
26768 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
26769 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
26770 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
26771 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
26772 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
26773 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
26774 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
26775 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
26776 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
26777 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
26778 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
26779 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
26780 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
26781 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
26783 static const struct builtin_description bdesc_multi_arg[] =
26785 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
26786 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
26787 UNKNOWN, (int)MULTI_ARG_3_SF },
26788 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
26789 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
26790 UNKNOWN, (int)MULTI_ARG_3_DF },
26792 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
26793 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
26794 UNKNOWN, (int)MULTI_ARG_3_SF },
26795 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
26796 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
26797 UNKNOWN, (int)MULTI_ARG_3_DF },
26799 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
26800 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
26801 UNKNOWN, (int)MULTI_ARG_3_SF },
26802 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
26803 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
26804 UNKNOWN, (int)MULTI_ARG_3_DF },
26805 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
26806 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
26807 UNKNOWN, (int)MULTI_ARG_3_SF2 },
26808 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
26809 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
26810 UNKNOWN, (int)MULTI_ARG_3_DF2 },
26812 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
26813 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
26814 UNKNOWN, (int)MULTI_ARG_3_SF },
26815 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
26816 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
26817 UNKNOWN, (int)MULTI_ARG_3_DF },
26818 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
26819 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
26820 UNKNOWN, (int)MULTI_ARG_3_SF2 },
26821 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
26822 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
26823 UNKNOWN, (int)MULTI_ARG_3_DF2 },
26825 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
26826 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
26827 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
26828 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
26829 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
26830 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
26831 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
26833 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
26834 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
26835 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
26836 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
26837 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
26838 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
26839 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
26841 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
26843 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
26844 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
26845 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26846 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26847 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
26848 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
26849 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26850 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26851 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26852 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
26853 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26854 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
26856 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
26857 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
26858 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
26859 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
26860 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
26861 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
26862 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
26863 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
26864 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
26865 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
26866 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
26867 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
26868 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
26869 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
26870 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
26871 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
26873 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
26874 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
26875 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
26876 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
26877 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
26878 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
26880 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
26881 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
26882 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
26883 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
26884 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
26885 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
26886 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
26887 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
26888 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
26889 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
26890 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
26891 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
26892 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
26893 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
26894 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
26896 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
26897 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
26898 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
26899 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
26900 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
26901 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
26902 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
26904 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
26905 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
26906 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
26907 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
26908 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
26909 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
26910 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
26912 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
26913 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
26914 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
26915 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
26916 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
26917 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
26918 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
26920 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
26921 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
26922 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
26923 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
26924 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
26925 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
26926 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
26928 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
26929 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
26930 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
26931 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
26932 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
26933 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
26934 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
26936 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
26937 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
26938 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
26939 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
26940 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
26941 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
26942 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
26944 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
26945 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
26946 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
26947 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
26948 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
26949 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
26950 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
26952 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
26953 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
26954 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
26955 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
26956 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
26957 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
26958 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
26960 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
26961 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
26962 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
26963 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
26964 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
26965 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
26966 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
26967 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
26969 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
26970 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
26971 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
26972 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
26973 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
26974 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
26975 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
26976 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
26978 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
26979 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
26980 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
26981 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
26985 /* TM vector builtins. */
26987 /* Reuse the existing x86-specific `struct builtin_description' cause
26988 we're lazy. Add casts to make them fit. */
26989 static const struct builtin_description bdesc_tm[] =
26991 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
26992 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
26993 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
26994 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
26995 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
26996 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
26997 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
26999 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27000 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27001 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27002 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27003 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27004 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27005 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27007 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27008 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27009 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27010 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27011 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27012 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27013 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27015 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
27016 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
27017 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
27020 /* TM callbacks. */
27022 /* Return the builtin decl needed to load a vector of TYPE. */
27025 ix86_builtin_tm_load (tree type)
27027 if (TREE_CODE (type) == VECTOR_TYPE)
27029 switch (tree_low_cst (TYPE_SIZE (type), 1))
27032 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
27034 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
27036 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
27042 /* Return the builtin decl needed to store a vector of TYPE. */
27045 ix86_builtin_tm_store (tree type)
27047 if (TREE_CODE (type) == VECTOR_TYPE)
27049 switch (tree_low_cst (TYPE_SIZE (type), 1))
27052 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
27054 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
27056 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
27062 /* Initialize the transactional memory vector load/store builtins. */
27065 ix86_init_tm_builtins (void)
27067 enum ix86_builtin_func_type ftype;
27068 const struct builtin_description *d;
27071 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
27072 tree attrs_log, attrs_type_log;
27077 /* If there are no builtins defined, we must be compiling in a
27078 language without trans-mem support. */
27079 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
27082 /* Use whatever attributes a normal TM load has. */
27083 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
27084 attrs_load = DECL_ATTRIBUTES (decl);
27085 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27086 /* Use whatever attributes a normal TM store has. */
27087 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
27088 attrs_store = DECL_ATTRIBUTES (decl);
27089 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27090 /* Use whatever attributes a normal TM log has. */
27091 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
27092 attrs_log = DECL_ATTRIBUTES (decl);
27093 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27095 for (i = 0, d = bdesc_tm;
27096 i < ARRAY_SIZE (bdesc_tm);
27099 if ((d->mask & ix86_isa_flags) != 0
27100 || (lang_hooks.builtin_function
27101 == lang_hooks.builtin_function_ext_scope))
27103 tree type, attrs, attrs_type;
27104 enum built_in_function code = (enum built_in_function) d->code;
27106 ftype = (enum ix86_builtin_func_type) d->flag;
27107 type = ix86_get_builtin_func_type (ftype);
27109 if (BUILTIN_TM_LOAD_P (code))
27111 attrs = attrs_load;
27112 attrs_type = attrs_type_load;
27114 else if (BUILTIN_TM_STORE_P (code))
27116 attrs = attrs_store;
27117 attrs_type = attrs_type_store;
27122 attrs_type = attrs_type_log;
27124 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
27125 /* The builtin without the prefix for
27126 calling it directly. */
27127 d->name + strlen ("__builtin_"),
27129 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
27130 set the TYPE_ATTRIBUTES. */
27131 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
27133 set_builtin_decl (code, decl, false);
27138 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
27139 in the current target ISA to allow the user to compile particular modules
27140 with different target specific options that differ from the command line
27143 ix86_init_mmx_sse_builtins (void)
27145 const struct builtin_description * d;
27146 enum ix86_builtin_func_type ftype;
27149 /* Add all special builtins with variable number of operands. */
27150 for (i = 0, d = bdesc_special_args;
27151 i < ARRAY_SIZE (bdesc_special_args);
27157 ftype = (enum ix86_builtin_func_type) d->flag;
27158 def_builtin (d->mask, d->name, ftype, d->code);
27161 /* Add all builtins with variable number of operands. */
27162 for (i = 0, d = bdesc_args;
27163 i < ARRAY_SIZE (bdesc_args);
27169 ftype = (enum ix86_builtin_func_type) d->flag;
27170 def_builtin_const (d->mask, d->name, ftype, d->code);
27173 /* pcmpestr[im] insns. */
27174 for (i = 0, d = bdesc_pcmpestr;
27175 i < ARRAY_SIZE (bdesc_pcmpestr);
27178 if (d->code == IX86_BUILTIN_PCMPESTRM128)
27179 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
27181 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
27182 def_builtin_const (d->mask, d->name, ftype, d->code);
27185 /* pcmpistr[im] insns. */
27186 for (i = 0, d = bdesc_pcmpistr;
27187 i < ARRAY_SIZE (bdesc_pcmpistr);
27190 if (d->code == IX86_BUILTIN_PCMPISTRM128)
27191 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
27193 ftype = INT_FTYPE_V16QI_V16QI_INT;
27194 def_builtin_const (d->mask, d->name, ftype, d->code);
27197 /* comi/ucomi insns. */
27198 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27200 if (d->mask == OPTION_MASK_ISA_SSE2)
27201 ftype = INT_FTYPE_V2DF_V2DF;
27203 ftype = INT_FTYPE_V4SF_V4SF;
27204 def_builtin_const (d->mask, d->name, ftype, d->code);
27208 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
27209 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
27210 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
27211 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
27213 /* SSE or 3DNow!A */
27214 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27215 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
27216 IX86_BUILTIN_MASKMOVQ);
27219 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
27220 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
27222 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
27223 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
27224 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
27225 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
27228 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
27229 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
27230 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
27231 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
27234 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
27235 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
27236 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
27237 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
27238 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
27239 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
27240 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
27241 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
27242 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
27243 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
27244 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
27245 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
27248 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
27249 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
27252 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
27253 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
27254 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
27255 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
27256 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
27257 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
27258 IX86_BUILTIN_RDRAND64_STEP);
27261 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
27262 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
27263 IX86_BUILTIN_GATHERSIV2DF);
27265 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
27266 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
27267 IX86_BUILTIN_GATHERSIV4DF);
27269 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
27270 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
27271 IX86_BUILTIN_GATHERDIV2DF);
27273 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
27274 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
27275 IX86_BUILTIN_GATHERDIV4DF);
27277 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
27278 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
27279 IX86_BUILTIN_GATHERSIV4SF);
27281 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
27282 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
27283 IX86_BUILTIN_GATHERSIV8SF);
27285 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
27286 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
27287 IX86_BUILTIN_GATHERDIV4SF);
27289 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
27290 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
27291 IX86_BUILTIN_GATHERDIV8SF);
27293 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
27294 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
27295 IX86_BUILTIN_GATHERSIV2DI);
27297 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
27298 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
27299 IX86_BUILTIN_GATHERSIV4DI);
27301 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
27302 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
27303 IX86_BUILTIN_GATHERDIV2DI);
27305 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
27306 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
27307 IX86_BUILTIN_GATHERDIV4DI);
27309 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
27310 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
27311 IX86_BUILTIN_GATHERSIV4SI);
27313 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
27314 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
27315 IX86_BUILTIN_GATHERSIV8SI);
27317 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
27318 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
27319 IX86_BUILTIN_GATHERDIV4SI);
27321 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
27322 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
27323 IX86_BUILTIN_GATHERDIV8SI);
27325 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
27326 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
27327 IX86_BUILTIN_GATHERALTSIV4DF);
27329 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
27330 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
27331 IX86_BUILTIN_GATHERALTDIV8SF);
27333 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
27334 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
27335 IX86_BUILTIN_GATHERALTSIV4DI);
27337 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
27338 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
27339 IX86_BUILTIN_GATHERALTDIV8SI);
27341 /* MMX access to the vec_init patterns. */
27342 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
27343 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
27345 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
27346 V4HI_FTYPE_HI_HI_HI_HI,
27347 IX86_BUILTIN_VEC_INIT_V4HI);
27349 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
27350 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
27351 IX86_BUILTIN_VEC_INIT_V8QI);
27353 /* Access to the vec_extract patterns. */
27354 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
27355 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
27356 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
27357 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
27358 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
27359 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
27360 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
27361 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
27362 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
27363 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
27365 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27366 "__builtin_ia32_vec_ext_v4hi",
27367 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
27369 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
27370 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
27372 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
27373 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
27375 /* Access to the vec_set patterns. */
27376 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
27377 "__builtin_ia32_vec_set_v2di",
27378 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
27380 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
27381 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
27383 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
27384 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
27386 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
27387 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
27389 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27390 "__builtin_ia32_vec_set_v4hi",
27391 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
27393 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
27394 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
27396 /* Add FMA4 multi-arg argument instructions */
27397 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27402 ftype = (enum ix86_builtin_func_type) d->flag;
27403 def_builtin_const (d->mask, d->name, ftype, d->code);
27407 /* Internal method for ix86_init_builtins. */
27410 ix86_init_builtins_va_builtins_abi (void)
27412 tree ms_va_ref, sysv_va_ref;
27413 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
27414 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
27415 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
27416 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
27420 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
27421 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
27422 ms_va_ref = build_reference_type (ms_va_list_type_node);
27424 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
27427 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
27428 fnvoid_va_start_ms =
27429 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
27430 fnvoid_va_end_sysv =
27431 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
27432 fnvoid_va_start_sysv =
27433 build_varargs_function_type_list (void_type_node, sysv_va_ref,
27435 fnvoid_va_copy_ms =
27436 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
27438 fnvoid_va_copy_sysv =
27439 build_function_type_list (void_type_node, sysv_va_ref,
27440 sysv_va_ref, NULL_TREE);
27442 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
27443 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
27444 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
27445 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
27446 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
27447 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
27448 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
27449 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27450 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
27451 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27452 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
27453 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27457 ix86_init_builtin_types (void)
27459 tree float128_type_node, float80_type_node;
27461 /* The __float80 type. */
27462 float80_type_node = long_double_type_node;
27463 if (TYPE_MODE (float80_type_node) != XFmode)
27465 /* The __float80 type. */
27466 float80_type_node = make_node (REAL_TYPE);
27468 TYPE_PRECISION (float80_type_node) = 80;
27469 layout_type (float80_type_node);
27471 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
27473 /* The __float128 type. */
27474 float128_type_node = make_node (REAL_TYPE);
27475 TYPE_PRECISION (float128_type_node) = 128;
27476 layout_type (float128_type_node);
27477 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
27479 /* This macro is built by i386-builtin-types.awk. */
27480 DEFINE_BUILTIN_PRIMITIVE_TYPES;
27484 ix86_init_builtins (void)
27488 ix86_init_builtin_types ();
27490 /* TFmode support builtins. */
27491 def_builtin_const (0, "__builtin_infq",
27492 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
27493 def_builtin_const (0, "__builtin_huge_valq",
27494 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
27496 /* We will expand them to normal call if SSE2 isn't available since
27497 they are used by libgcc. */
27498 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
27499 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
27500 BUILT_IN_MD, "__fabstf2", NULL_TREE);
27501 TREE_READONLY (t) = 1;
27502 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
27504 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
27505 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
27506 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
27507 TREE_READONLY (t) = 1;
27508 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
27510 ix86_init_tm_builtins ();
27511 ix86_init_mmx_sse_builtins ();
27514 ix86_init_builtins_va_builtins_abi ();
27516 #ifdef SUBTARGET_INIT_BUILTINS
27517 SUBTARGET_INIT_BUILTINS;
27521 /* Return the ix86 builtin for CODE. */
27524 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
27526 if (code >= IX86_BUILTIN_MAX)
27527 return error_mark_node;
27529 return ix86_builtins[code];
27532 /* Errors in the source file can cause expand_expr to return const0_rtx
27533 where we expect a vector. To avoid crashing, use one of the vector
27534 clear instructions. */
27536 safe_vector_operand (rtx x, enum machine_mode mode)
27538 if (x == const0_rtx)
27539 x = CONST0_RTX (mode);
27543 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
27546 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
27549 tree arg0 = CALL_EXPR_ARG (exp, 0);
27550 tree arg1 = CALL_EXPR_ARG (exp, 1);
27551 rtx op0 = expand_normal (arg0);
27552 rtx op1 = expand_normal (arg1);
27553 enum machine_mode tmode = insn_data[icode].operand[0].mode;
27554 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
27555 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
27557 if (VECTOR_MODE_P (mode0))
27558 op0 = safe_vector_operand (op0, mode0);
27559 if (VECTOR_MODE_P (mode1))
27560 op1 = safe_vector_operand (op1, mode1);
27562 if (optimize || !target
27563 || GET_MODE (target) != tmode
27564 || !insn_data[icode].operand[0].predicate (target, tmode))
27565 target = gen_reg_rtx (tmode);
27567 if (GET_MODE (op1) == SImode && mode1 == TImode)
27569 rtx x = gen_reg_rtx (V4SImode);
27570 emit_insn (gen_sse2_loadd (x, op1));
27571 op1 = gen_lowpart (TImode, x);
27574 if (!insn_data[icode].operand[1].predicate (op0, mode0))
27575 op0 = copy_to_mode_reg (mode0, op0);
27576 if (!insn_data[icode].operand[2].predicate (op1, mode1))
27577 op1 = copy_to_mode_reg (mode1, op1);
27579 pat = GEN_FCN (icode) (target, op0, op1);
27588 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
27591 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
27592 enum ix86_builtin_func_type m_type,
27593 enum rtx_code sub_code)
27598 bool comparison_p = false;
27600 bool last_arg_constant = false;
27601 int num_memory = 0;
27604 enum machine_mode mode;
27607 enum machine_mode tmode = insn_data[icode].operand[0].mode;
27611 case MULTI_ARG_4_DF2_DI_I:
27612 case MULTI_ARG_4_DF2_DI_I1:
27613 case MULTI_ARG_4_SF2_SI_I:
27614 case MULTI_ARG_4_SF2_SI_I1:
27616 last_arg_constant = true;
27619 case MULTI_ARG_3_SF:
27620 case MULTI_ARG_3_DF:
27621 case MULTI_ARG_3_SF2:
27622 case MULTI_ARG_3_DF2:
27623 case MULTI_ARG_3_DI:
27624 case MULTI_ARG_3_SI:
27625 case MULTI_ARG_3_SI_DI:
27626 case MULTI_ARG_3_HI:
27627 case MULTI_ARG_3_HI_SI:
27628 case MULTI_ARG_3_QI:
27629 case MULTI_ARG_3_DI2:
27630 case MULTI_ARG_3_SI2:
27631 case MULTI_ARG_3_HI2:
27632 case MULTI_ARG_3_QI2:
27636 case MULTI_ARG_2_SF:
27637 case MULTI_ARG_2_DF:
27638 case MULTI_ARG_2_DI:
27639 case MULTI_ARG_2_SI:
27640 case MULTI_ARG_2_HI:
27641 case MULTI_ARG_2_QI:
27645 case MULTI_ARG_2_DI_IMM:
27646 case MULTI_ARG_2_SI_IMM:
27647 case MULTI_ARG_2_HI_IMM:
27648 case MULTI_ARG_2_QI_IMM:
27650 last_arg_constant = true;
27653 case MULTI_ARG_1_SF:
27654 case MULTI_ARG_1_DF:
27655 case MULTI_ARG_1_SF2:
27656 case MULTI_ARG_1_DF2:
27657 case MULTI_ARG_1_DI:
27658 case MULTI_ARG_1_SI:
27659 case MULTI_ARG_1_HI:
27660 case MULTI_ARG_1_QI:
27661 case MULTI_ARG_1_SI_DI:
27662 case MULTI_ARG_1_HI_DI:
27663 case MULTI_ARG_1_HI_SI:
27664 case MULTI_ARG_1_QI_DI:
27665 case MULTI_ARG_1_QI_SI:
27666 case MULTI_ARG_1_QI_HI:
27670 case MULTI_ARG_2_DI_CMP:
27671 case MULTI_ARG_2_SI_CMP:
27672 case MULTI_ARG_2_HI_CMP:
27673 case MULTI_ARG_2_QI_CMP:
27675 comparison_p = true;
27678 case MULTI_ARG_2_SF_TF:
27679 case MULTI_ARG_2_DF_TF:
27680 case MULTI_ARG_2_DI_TF:
27681 case MULTI_ARG_2_SI_TF:
27682 case MULTI_ARG_2_HI_TF:
27683 case MULTI_ARG_2_QI_TF:
27689 gcc_unreachable ();
27692 if (optimize || !target
27693 || GET_MODE (target) != tmode
27694 || !insn_data[icode].operand[0].predicate (target, tmode))
27695 target = gen_reg_rtx (tmode);
27697 gcc_assert (nargs <= 4);
27699 for (i = 0; i < nargs; i++)
27701 tree arg = CALL_EXPR_ARG (exp, i);
27702 rtx op = expand_normal (arg);
27703 int adjust = (comparison_p) ? 1 : 0;
27704 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
27706 if (last_arg_constant && i == nargs - 1)
27708 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
27710 enum insn_code new_icode = icode;
27713 case CODE_FOR_xop_vpermil2v2df3:
27714 case CODE_FOR_xop_vpermil2v4sf3:
27715 case CODE_FOR_xop_vpermil2v4df3:
27716 case CODE_FOR_xop_vpermil2v8sf3:
27717 error ("the last argument must be a 2-bit immediate");
27718 return gen_reg_rtx (tmode);
27719 case CODE_FOR_xop_rotlv2di3:
27720 new_icode = CODE_FOR_rotlv2di3;
27722 case CODE_FOR_xop_rotlv4si3:
27723 new_icode = CODE_FOR_rotlv4si3;
27725 case CODE_FOR_xop_rotlv8hi3:
27726 new_icode = CODE_FOR_rotlv8hi3;
27728 case CODE_FOR_xop_rotlv16qi3:
27729 new_icode = CODE_FOR_rotlv16qi3;
27731 if (CONST_INT_P (op))
27733 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
27734 op = GEN_INT (INTVAL (op) & mask);
27735 gcc_checking_assert
27736 (insn_data[icode].operand[i + 1].predicate (op, mode));
27740 gcc_checking_assert
27742 && insn_data[new_icode].operand[0].mode == tmode
27743 && insn_data[new_icode].operand[1].mode == tmode
27744 && insn_data[new_icode].operand[2].mode == mode
27745 && insn_data[new_icode].operand[0].predicate
27746 == insn_data[icode].operand[0].predicate
27747 && insn_data[new_icode].operand[1].predicate
27748 == insn_data[icode].operand[1].predicate);
27754 gcc_unreachable ();
27761 if (VECTOR_MODE_P (mode))
27762 op = safe_vector_operand (op, mode);
27764 /* If we aren't optimizing, only allow one memory operand to be
27766 if (memory_operand (op, mode))
27769 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
27772 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
27774 op = force_reg (mode, op);
27778 args[i].mode = mode;
27784 pat = GEN_FCN (icode) (target, args[0].op);
27789 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
27790 GEN_INT ((int)sub_code));
27791 else if (! comparison_p)
27792 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
27795 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
27799 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
27804 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
27808 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
27812 gcc_unreachable ();
27822 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
27823 insns with vec_merge. */
27826 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
27830 tree arg0 = CALL_EXPR_ARG (exp, 0);
27831 rtx op1, op0 = expand_normal (arg0);
27832 enum machine_mode tmode = insn_data[icode].operand[0].mode;
27833 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
27835 if (optimize || !target
27836 || GET_MODE (target) != tmode
27837 || !insn_data[icode].operand[0].predicate (target, tmode))
27838 target = gen_reg_rtx (tmode);
27840 if (VECTOR_MODE_P (mode0))
27841 op0 = safe_vector_operand (op0, mode0);
27843 if ((optimize && !register_operand (op0, mode0))
27844 || !insn_data[icode].operand[1].predicate (op0, mode0))
27845 op0 = copy_to_mode_reg (mode0, op0);
27848 if (!insn_data[icode].operand[2].predicate (op1, mode0))
27849 op1 = copy_to_mode_reg (mode0, op1);
27851 pat = GEN_FCN (icode) (target, op0, op1);
27858 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
27861 ix86_expand_sse_compare (const struct builtin_description *d,
27862 tree exp, rtx target, bool swap)
27865 tree arg0 = CALL_EXPR_ARG (exp, 0);
27866 tree arg1 = CALL_EXPR_ARG (exp, 1);
27867 rtx op0 = expand_normal (arg0);
27868 rtx op1 = expand_normal (arg1);
27870 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
27871 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
27872 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
27873 enum rtx_code comparison = d->comparison;
27875 if (VECTOR_MODE_P (mode0))
27876 op0 = safe_vector_operand (op0, mode0);
27877 if (VECTOR_MODE_P (mode1))
27878 op1 = safe_vector_operand (op1, mode1);
27880 /* Swap operands if we have a comparison that isn't available in
27884 rtx tmp = gen_reg_rtx (mode1);
27885 emit_move_insn (tmp, op1);
27890 if (optimize || !target
27891 || GET_MODE (target) != tmode
27892 || !insn_data[d->icode].operand[0].predicate (target, tmode))
27893 target = gen_reg_rtx (tmode);
27895 if ((optimize && !register_operand (op0, mode0))
27896 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
27897 op0 = copy_to_mode_reg (mode0, op0);
27898 if ((optimize && !register_operand (op1, mode1))
27899 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
27900 op1 = copy_to_mode_reg (mode1, op1);
27902 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
27903 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
27910 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
27913 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
27917 tree arg0 = CALL_EXPR_ARG (exp, 0);
27918 tree arg1 = CALL_EXPR_ARG (exp, 1);
27919 rtx op0 = expand_normal (arg0);
27920 rtx op1 = expand_normal (arg1);
27921 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
27922 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
27923 enum rtx_code comparison = d->comparison;
27925 if (VECTOR_MODE_P (mode0))
27926 op0 = safe_vector_operand (op0, mode0);
27927 if (VECTOR_MODE_P (mode1))
27928 op1 = safe_vector_operand (op1, mode1);
27930 /* Swap operands if we have a comparison that isn't available in
27932 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
27939 target = gen_reg_rtx (SImode);
27940 emit_move_insn (target, const0_rtx);
27941 target = gen_rtx_SUBREG (QImode, target, 0);
27943 if ((optimize && !register_operand (op0, mode0))
27944 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
27945 op0 = copy_to_mode_reg (mode0, op0);
27946 if ((optimize && !register_operand (op1, mode1))
27947 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
27948 op1 = copy_to_mode_reg (mode1, op1);
27950 pat = GEN_FCN (d->icode) (op0, op1);
27954 emit_insn (gen_rtx_SET (VOIDmode,
27955 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
27956 gen_rtx_fmt_ee (comparison, QImode,
27960 return SUBREG_REG (target);
27963 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
27966 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
27970 tree arg0 = CALL_EXPR_ARG (exp, 0);
27971 rtx op1, op0 = expand_normal (arg0);
27972 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
27973 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
27975 if (optimize || target == 0
27976 || GET_MODE (target) != tmode
27977 || !insn_data[d->icode].operand[0].predicate (target, tmode))
27978 target = gen_reg_rtx (tmode);
27980 if (VECTOR_MODE_P (mode0))
27981 op0 = safe_vector_operand (op0, mode0);
27983 if ((optimize && !register_operand (op0, mode0))
27984 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
27985 op0 = copy_to_mode_reg (mode0, op0);
27987 op1 = GEN_INT (d->comparison);
27989 pat = GEN_FCN (d->icode) (target, op0, op1);
27997 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
27998 tree exp, rtx target)
28001 tree arg0 = CALL_EXPR_ARG (exp, 0);
28002 tree arg1 = CALL_EXPR_ARG (exp, 1);
28003 rtx op0 = expand_normal (arg0);
28004 rtx op1 = expand_normal (arg1);
28006 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
28007 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
28008 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
28010 if (optimize || target == 0
28011 || GET_MODE (target) != tmode
28012 || !insn_data[d->icode].operand[0].predicate (target, tmode))
28013 target = gen_reg_rtx (tmode);
28015 op0 = safe_vector_operand (op0, mode0);
28016 op1 = safe_vector_operand (op1, mode1);
28018 if ((optimize && !register_operand (op0, mode0))
28019 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28020 op0 = copy_to_mode_reg (mode0, op0);
28021 if ((optimize && !register_operand (op1, mode1))
28022 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28023 op1 = copy_to_mode_reg (mode1, op1);
28025 op2 = GEN_INT (d->comparison);
28027 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
28034 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
28037 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
28041 tree arg0 = CALL_EXPR_ARG (exp, 0);
28042 tree arg1 = CALL_EXPR_ARG (exp, 1);
28043 rtx op0 = expand_normal (arg0);
28044 rtx op1 = expand_normal (arg1);
28045 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
28046 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
28047 enum rtx_code comparison = d->comparison;
28049 if (VECTOR_MODE_P (mode0))
28050 op0 = safe_vector_operand (op0, mode0);
28051 if (VECTOR_MODE_P (mode1))
28052 op1 = safe_vector_operand (op1, mode1);
28054 target = gen_reg_rtx (SImode);
28055 emit_move_insn (target, const0_rtx);
28056 target = gen_rtx_SUBREG (QImode, target, 0);
28058 if ((optimize && !register_operand (op0, mode0))
28059 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28060 op0 = copy_to_mode_reg (mode0, op0);
28061 if ((optimize && !register_operand (op1, mode1))
28062 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28063 op1 = copy_to_mode_reg (mode1, op1);
28065 pat = GEN_FCN (d->icode) (op0, op1);
28069 emit_insn (gen_rtx_SET (VOIDmode,
28070 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28071 gen_rtx_fmt_ee (comparison, QImode,
28075 return SUBREG_REG (target);
28078 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
28081 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
28082 tree exp, rtx target)
28085 tree arg0 = CALL_EXPR_ARG (exp, 0);
28086 tree arg1 = CALL_EXPR_ARG (exp, 1);
28087 tree arg2 = CALL_EXPR_ARG (exp, 2);
28088 tree arg3 = CALL_EXPR_ARG (exp, 3);
28089 tree arg4 = CALL_EXPR_ARG (exp, 4);
28090 rtx scratch0, scratch1;
28091 rtx op0 = expand_normal (arg0);
28092 rtx op1 = expand_normal (arg1);
28093 rtx op2 = expand_normal (arg2);
28094 rtx op3 = expand_normal (arg3);
28095 rtx op4 = expand_normal (arg4);
28096 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
28098 tmode0 = insn_data[d->icode].operand[0].mode;
28099 tmode1 = insn_data[d->icode].operand[1].mode;
28100 modev2 = insn_data[d->icode].operand[2].mode;
28101 modei3 = insn_data[d->icode].operand[3].mode;
28102 modev4 = insn_data[d->icode].operand[4].mode;
28103 modei5 = insn_data[d->icode].operand[5].mode;
28104 modeimm = insn_data[d->icode].operand[6].mode;
28106 if (VECTOR_MODE_P (modev2))
28107 op0 = safe_vector_operand (op0, modev2);
28108 if (VECTOR_MODE_P (modev4))
28109 op2 = safe_vector_operand (op2, modev4);
28111 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
28112 op0 = copy_to_mode_reg (modev2, op0);
28113 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
28114 op1 = copy_to_mode_reg (modei3, op1);
28115 if ((optimize && !register_operand (op2, modev4))
28116 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
28117 op2 = copy_to_mode_reg (modev4, op2);
28118 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
28119 op3 = copy_to_mode_reg (modei5, op3);
28121 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
28123 error ("the fifth argument must be an 8-bit immediate");
28127 if (d->code == IX86_BUILTIN_PCMPESTRI128)
28129 if (optimize || !target
28130 || GET_MODE (target) != tmode0
28131 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
28132 target = gen_reg_rtx (tmode0);
28134 scratch1 = gen_reg_rtx (tmode1);
28136 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
28138 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
28140 if (optimize || !target
28141 || GET_MODE (target) != tmode1
28142 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
28143 target = gen_reg_rtx (tmode1);
28145 scratch0 = gen_reg_rtx (tmode0);
28147 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
28151 gcc_assert (d->flag);
28153 scratch0 = gen_reg_rtx (tmode0);
28154 scratch1 = gen_reg_rtx (tmode1);
28156 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
28166 target = gen_reg_rtx (SImode);
28167 emit_move_insn (target, const0_rtx);
28168 target = gen_rtx_SUBREG (QImode, target, 0);
28171 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28172 gen_rtx_fmt_ee (EQ, QImode,
28173 gen_rtx_REG ((enum machine_mode) d->flag,
28176 return SUBREG_REG (target);
28183 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
28186 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
28187 tree exp, rtx target)
28190 tree arg0 = CALL_EXPR_ARG (exp, 0);
28191 tree arg1 = CALL_EXPR_ARG (exp, 1);
28192 tree arg2 = CALL_EXPR_ARG (exp, 2);
28193 rtx scratch0, scratch1;
28194 rtx op0 = expand_normal (arg0);
28195 rtx op1 = expand_normal (arg1);
28196 rtx op2 = expand_normal (arg2);
28197 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
28199 tmode0 = insn_data[d->icode].operand[0].mode;
28200 tmode1 = insn_data[d->icode].operand[1].mode;
28201 modev2 = insn_data[d->icode].operand[2].mode;
28202 modev3 = insn_data[d->icode].operand[3].mode;
28203 modeimm = insn_data[d->icode].operand[4].mode;
28205 if (VECTOR_MODE_P (modev2))
28206 op0 = safe_vector_operand (op0, modev2);
28207 if (VECTOR_MODE_P (modev3))
28208 op1 = safe_vector_operand (op1, modev3);
28210 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
28211 op0 = copy_to_mode_reg (modev2, op0);
28212 if ((optimize && !register_operand (op1, modev3))
28213 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
28214 op1 = copy_to_mode_reg (modev3, op1);
28216 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
28218 error ("the third argument must be an 8-bit immediate");
28222 if (d->code == IX86_BUILTIN_PCMPISTRI128)
28224 if (optimize || !target
28225 || GET_MODE (target) != tmode0
28226 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
28227 target = gen_reg_rtx (tmode0);
28229 scratch1 = gen_reg_rtx (tmode1);
28231 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
28233 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
28235 if (optimize || !target
28236 || GET_MODE (target) != tmode1
28237 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
28238 target = gen_reg_rtx (tmode1);
28240 scratch0 = gen_reg_rtx (tmode0);
28242 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
28246 gcc_assert (d->flag);
28248 scratch0 = gen_reg_rtx (tmode0);
28249 scratch1 = gen_reg_rtx (tmode1);
28251 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
28261 target = gen_reg_rtx (SImode);
28262 emit_move_insn (target, const0_rtx);
28263 target = gen_rtx_SUBREG (QImode, target, 0);
28266 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28267 gen_rtx_fmt_ee (EQ, QImode,
28268 gen_rtx_REG ((enum machine_mode) d->flag,
28271 return SUBREG_REG (target);
28277 /* Subroutine of ix86_expand_builtin to take care of insns with
28278 variable number of operands. */
28281 ix86_expand_args_builtin (const struct builtin_description *d,
28282 tree exp, rtx target)
28284 rtx pat, real_target;
28285 unsigned int i, nargs;
28286 unsigned int nargs_constant = 0;
28287 int num_memory = 0;
28291 enum machine_mode mode;
28293 bool last_arg_count = false;
28294 enum insn_code icode = d->icode;
28295 const struct insn_data_d *insn_p = &insn_data[icode];
28296 enum machine_mode tmode = insn_p->operand[0].mode;
28297 enum machine_mode rmode = VOIDmode;
28299 enum rtx_code comparison = d->comparison;
28301 switch ((enum ix86_builtin_func_type) d->flag)
28303 case V2DF_FTYPE_V2DF_ROUND:
28304 case V4DF_FTYPE_V4DF_ROUND:
28305 case V4SF_FTYPE_V4SF_ROUND:
28306 case V8SF_FTYPE_V8SF_ROUND:
28307 case V4SI_FTYPE_V4SF_ROUND:
28308 case V8SI_FTYPE_V8SF_ROUND:
28309 return ix86_expand_sse_round (d, exp, target);
28310 case V4SI_FTYPE_V2DF_V2DF_ROUND:
28311 case V8SI_FTYPE_V4DF_V4DF_ROUND:
28312 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
28313 case INT_FTYPE_V8SF_V8SF_PTEST:
28314 case INT_FTYPE_V4DI_V4DI_PTEST:
28315 case INT_FTYPE_V4DF_V4DF_PTEST:
28316 case INT_FTYPE_V4SF_V4SF_PTEST:
28317 case INT_FTYPE_V2DI_V2DI_PTEST:
28318 case INT_FTYPE_V2DF_V2DF_PTEST:
28319 return ix86_expand_sse_ptest (d, exp, target);
28320 case FLOAT128_FTYPE_FLOAT128:
28321 case FLOAT_FTYPE_FLOAT:
28322 case INT_FTYPE_INT:
28323 case UINT64_FTYPE_INT:
28324 case UINT16_FTYPE_UINT16:
28325 case INT64_FTYPE_INT64:
28326 case INT64_FTYPE_V4SF:
28327 case INT64_FTYPE_V2DF:
28328 case INT_FTYPE_V16QI:
28329 case INT_FTYPE_V8QI:
28330 case INT_FTYPE_V8SF:
28331 case INT_FTYPE_V4DF:
28332 case INT_FTYPE_V4SF:
28333 case INT_FTYPE_V2DF:
28334 case INT_FTYPE_V32QI:
28335 case V16QI_FTYPE_V16QI:
28336 case V8SI_FTYPE_V8SF:
28337 case V8SI_FTYPE_V4SI:
28338 case V8HI_FTYPE_V8HI:
28339 case V8HI_FTYPE_V16QI:
28340 case V8QI_FTYPE_V8QI:
28341 case V8SF_FTYPE_V8SF:
28342 case V8SF_FTYPE_V8SI:
28343 case V8SF_FTYPE_V4SF:
28344 case V8SF_FTYPE_V8HI:
28345 case V4SI_FTYPE_V4SI:
28346 case V4SI_FTYPE_V16QI:
28347 case V4SI_FTYPE_V4SF:
28348 case V4SI_FTYPE_V8SI:
28349 case V4SI_FTYPE_V8HI:
28350 case V4SI_FTYPE_V4DF:
28351 case V4SI_FTYPE_V2DF:
28352 case V4HI_FTYPE_V4HI:
28353 case V4DF_FTYPE_V4DF:
28354 case V4DF_FTYPE_V4SI:
28355 case V4DF_FTYPE_V4SF:
28356 case V4DF_FTYPE_V2DF:
28357 case V4SF_FTYPE_V4SF:
28358 case V4SF_FTYPE_V4SI:
28359 case V4SF_FTYPE_V8SF:
28360 case V4SF_FTYPE_V4DF:
28361 case V4SF_FTYPE_V8HI:
28362 case V4SF_FTYPE_V2DF:
28363 case V2DI_FTYPE_V2DI:
28364 case V2DI_FTYPE_V16QI:
28365 case V2DI_FTYPE_V8HI:
28366 case V2DI_FTYPE_V4SI:
28367 case V2DF_FTYPE_V2DF:
28368 case V2DF_FTYPE_V4SI:
28369 case V2DF_FTYPE_V4DF:
28370 case V2DF_FTYPE_V4SF:
28371 case V2DF_FTYPE_V2SI:
28372 case V2SI_FTYPE_V2SI:
28373 case V2SI_FTYPE_V4SF:
28374 case V2SI_FTYPE_V2SF:
28375 case V2SI_FTYPE_V2DF:
28376 case V2SF_FTYPE_V2SF:
28377 case V2SF_FTYPE_V2SI:
28378 case V32QI_FTYPE_V32QI:
28379 case V32QI_FTYPE_V16QI:
28380 case V16HI_FTYPE_V16HI:
28381 case V16HI_FTYPE_V8HI:
28382 case V8SI_FTYPE_V8SI:
28383 case V16HI_FTYPE_V16QI:
28384 case V8SI_FTYPE_V16QI:
28385 case V4DI_FTYPE_V16QI:
28386 case V8SI_FTYPE_V8HI:
28387 case V4DI_FTYPE_V8HI:
28388 case V4DI_FTYPE_V4SI:
28389 case V4DI_FTYPE_V2DI:
28392 case V4SF_FTYPE_V4SF_VEC_MERGE:
28393 case V2DF_FTYPE_V2DF_VEC_MERGE:
28394 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
28395 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
28396 case V16QI_FTYPE_V16QI_V16QI:
28397 case V16QI_FTYPE_V8HI_V8HI:
28398 case V8QI_FTYPE_V8QI_V8QI:
28399 case V8QI_FTYPE_V4HI_V4HI:
28400 case V8HI_FTYPE_V8HI_V8HI:
28401 case V8HI_FTYPE_V16QI_V16QI:
28402 case V8HI_FTYPE_V4SI_V4SI:
28403 case V8SF_FTYPE_V8SF_V8SF:
28404 case V8SF_FTYPE_V8SF_V8SI:
28405 case V4SI_FTYPE_V4SI_V4SI:
28406 case V4SI_FTYPE_V8HI_V8HI:
28407 case V4SI_FTYPE_V4SF_V4SF:
28408 case V4SI_FTYPE_V2DF_V2DF:
28409 case V4HI_FTYPE_V4HI_V4HI:
28410 case V4HI_FTYPE_V8QI_V8QI:
28411 case V4HI_FTYPE_V2SI_V2SI:
28412 case V4DF_FTYPE_V4DF_V4DF:
28413 case V4DF_FTYPE_V4DF_V4DI:
28414 case V4SF_FTYPE_V4SF_V4SF:
28415 case V4SF_FTYPE_V4SF_V4SI:
28416 case V4SF_FTYPE_V4SF_V2SI:
28417 case V4SF_FTYPE_V4SF_V2DF:
28418 case V4SF_FTYPE_V4SF_DI:
28419 case V4SF_FTYPE_V4SF_SI:
28420 case V2DI_FTYPE_V2DI_V2DI:
28421 case V2DI_FTYPE_V16QI_V16QI:
28422 case V2DI_FTYPE_V4SI_V4SI:
28423 case V2DI_FTYPE_V2DI_V16QI:
28424 case V2DI_FTYPE_V2DF_V2DF:
28425 case V2SI_FTYPE_V2SI_V2SI:
28426 case V2SI_FTYPE_V4HI_V4HI:
28427 case V2SI_FTYPE_V2SF_V2SF:
28428 case V2DF_FTYPE_V2DF_V2DF:
28429 case V2DF_FTYPE_V2DF_V4SF:
28430 case V2DF_FTYPE_V2DF_V2DI:
28431 case V2DF_FTYPE_V2DF_DI:
28432 case V2DF_FTYPE_V2DF_SI:
28433 case V2SF_FTYPE_V2SF_V2SF:
28434 case V1DI_FTYPE_V1DI_V1DI:
28435 case V1DI_FTYPE_V8QI_V8QI:
28436 case V1DI_FTYPE_V2SI_V2SI:
28437 case V32QI_FTYPE_V16HI_V16HI:
28438 case V16HI_FTYPE_V8SI_V8SI:
28439 case V32QI_FTYPE_V32QI_V32QI:
28440 case V16HI_FTYPE_V32QI_V32QI:
28441 case V16HI_FTYPE_V16HI_V16HI:
28442 case V8SI_FTYPE_V4DF_V4DF:
28443 case V8SI_FTYPE_V8SI_V8SI:
28444 case V8SI_FTYPE_V16HI_V16HI:
28445 case V4DI_FTYPE_V4DI_V4DI:
28446 case V4DI_FTYPE_V8SI_V8SI:
28447 if (comparison == UNKNOWN)
28448 return ix86_expand_binop_builtin (icode, exp, target);
28451 case V4SF_FTYPE_V4SF_V4SF_SWAP:
28452 case V2DF_FTYPE_V2DF_V2DF_SWAP:
28453 gcc_assert (comparison != UNKNOWN);
28457 case V16HI_FTYPE_V16HI_V8HI_COUNT:
28458 case V16HI_FTYPE_V16HI_SI_COUNT:
28459 case V8SI_FTYPE_V8SI_V4SI_COUNT:
28460 case V8SI_FTYPE_V8SI_SI_COUNT:
28461 case V4DI_FTYPE_V4DI_V2DI_COUNT:
28462 case V4DI_FTYPE_V4DI_INT_COUNT:
28463 case V8HI_FTYPE_V8HI_V8HI_COUNT:
28464 case V8HI_FTYPE_V8HI_SI_COUNT:
28465 case V4SI_FTYPE_V4SI_V4SI_COUNT:
28466 case V4SI_FTYPE_V4SI_SI_COUNT:
28467 case V4HI_FTYPE_V4HI_V4HI_COUNT:
28468 case V4HI_FTYPE_V4HI_SI_COUNT:
28469 case V2DI_FTYPE_V2DI_V2DI_COUNT:
28470 case V2DI_FTYPE_V2DI_SI_COUNT:
28471 case V2SI_FTYPE_V2SI_V2SI_COUNT:
28472 case V2SI_FTYPE_V2SI_SI_COUNT:
28473 case V1DI_FTYPE_V1DI_V1DI_COUNT:
28474 case V1DI_FTYPE_V1DI_SI_COUNT:
28476 last_arg_count = true;
28478 case UINT64_FTYPE_UINT64_UINT64:
28479 case UINT_FTYPE_UINT_UINT:
28480 case UINT_FTYPE_UINT_USHORT:
28481 case UINT_FTYPE_UINT_UCHAR:
28482 case UINT16_FTYPE_UINT16_INT:
28483 case UINT8_FTYPE_UINT8_INT:
28486 case V2DI_FTYPE_V2DI_INT_CONVERT:
28489 nargs_constant = 1;
28491 case V4DI_FTYPE_V4DI_INT_CONVERT:
28494 nargs_constant = 1;
28496 case V8HI_FTYPE_V8HI_INT:
28497 case V8HI_FTYPE_V8SF_INT:
28498 case V8HI_FTYPE_V4SF_INT:
28499 case V8SF_FTYPE_V8SF_INT:
28500 case V4SI_FTYPE_V4SI_INT:
28501 case V4SI_FTYPE_V8SI_INT:
28502 case V4HI_FTYPE_V4HI_INT:
28503 case V4DF_FTYPE_V4DF_INT:
28504 case V4SF_FTYPE_V4SF_INT:
28505 case V4SF_FTYPE_V8SF_INT:
28506 case V2DI_FTYPE_V2DI_INT:
28507 case V2DF_FTYPE_V2DF_INT:
28508 case V2DF_FTYPE_V4DF_INT:
28509 case V16HI_FTYPE_V16HI_INT:
28510 case V8SI_FTYPE_V8SI_INT:
28511 case V4DI_FTYPE_V4DI_INT:
28512 case V2DI_FTYPE_V4DI_INT:
28514 nargs_constant = 1;
28516 case V16QI_FTYPE_V16QI_V16QI_V16QI:
28517 case V8SF_FTYPE_V8SF_V8SF_V8SF:
28518 case V4DF_FTYPE_V4DF_V4DF_V4DF:
28519 case V4SF_FTYPE_V4SF_V4SF_V4SF:
28520 case V2DF_FTYPE_V2DF_V2DF_V2DF:
28521 case V32QI_FTYPE_V32QI_V32QI_V32QI:
28524 case V32QI_FTYPE_V32QI_V32QI_INT:
28525 case V16HI_FTYPE_V16HI_V16HI_INT:
28526 case V16QI_FTYPE_V16QI_V16QI_INT:
28527 case V4DI_FTYPE_V4DI_V4DI_INT:
28528 case V8HI_FTYPE_V8HI_V8HI_INT:
28529 case V8SI_FTYPE_V8SI_V8SI_INT:
28530 case V8SI_FTYPE_V8SI_V4SI_INT:
28531 case V8SF_FTYPE_V8SF_V8SF_INT:
28532 case V8SF_FTYPE_V8SF_V4SF_INT:
28533 case V4SI_FTYPE_V4SI_V4SI_INT:
28534 case V4DF_FTYPE_V4DF_V4DF_INT:
28535 case V4DF_FTYPE_V4DF_V2DF_INT:
28536 case V4SF_FTYPE_V4SF_V4SF_INT:
28537 case V2DI_FTYPE_V2DI_V2DI_INT:
28538 case V4DI_FTYPE_V4DI_V2DI_INT:
28539 case V2DF_FTYPE_V2DF_V2DF_INT:
28541 nargs_constant = 1;
28543 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
28546 nargs_constant = 1;
28548 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
28551 nargs_constant = 1;
28553 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
28556 nargs_constant = 1;
28558 case V2DI_FTYPE_V2DI_UINT_UINT:
28560 nargs_constant = 2;
28562 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
28563 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
28564 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
28565 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
28567 nargs_constant = 1;
28569 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
28571 nargs_constant = 2;
28574 gcc_unreachable ();
28577 gcc_assert (nargs <= ARRAY_SIZE (args));
28579 if (comparison != UNKNOWN)
28581 gcc_assert (nargs == 2);
28582 return ix86_expand_sse_compare (d, exp, target, swap);
28585 if (rmode == VOIDmode || rmode == tmode)
28589 || GET_MODE (target) != tmode
28590 || !insn_p->operand[0].predicate (target, tmode))
28591 target = gen_reg_rtx (tmode);
28592 real_target = target;
28596 target = gen_reg_rtx (rmode);
28597 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
28600 for (i = 0; i < nargs; i++)
28602 tree arg = CALL_EXPR_ARG (exp, i);
28603 rtx op = expand_normal (arg);
28604 enum machine_mode mode = insn_p->operand[i + 1].mode;
28605 bool match = insn_p->operand[i + 1].predicate (op, mode);
28607 if (last_arg_count && (i + 1) == nargs)
28609 /* SIMD shift insns take either an 8-bit immediate or
28610 register as count. But builtin functions take int as
28611 count. If count doesn't match, we put it in register. */
28614 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
28615 if (!insn_p->operand[i + 1].predicate (op, mode))
28616 op = copy_to_reg (op);
28619 else if ((nargs - i) <= nargs_constant)
28624 case CODE_FOR_avx2_inserti128:
28625 case CODE_FOR_avx2_extracti128:
28626 error ("the last argument must be an 1-bit immediate");
28629 case CODE_FOR_sse4_1_roundsd:
28630 case CODE_FOR_sse4_1_roundss:
28632 case CODE_FOR_sse4_1_roundpd:
28633 case CODE_FOR_sse4_1_roundps:
28634 case CODE_FOR_avx_roundpd256:
28635 case CODE_FOR_avx_roundps256:
28637 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
28638 case CODE_FOR_sse4_1_roundps_sfix:
28639 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
28640 case CODE_FOR_avx_roundps_sfix256:
28642 case CODE_FOR_sse4_1_blendps:
28643 case CODE_FOR_avx_blendpd256:
28644 case CODE_FOR_avx_vpermilv4df:
28645 error ("the last argument must be a 4-bit immediate");
28648 case CODE_FOR_sse4_1_blendpd:
28649 case CODE_FOR_avx_vpermilv2df:
28650 case CODE_FOR_xop_vpermil2v2df3:
28651 case CODE_FOR_xop_vpermil2v4sf3:
28652 case CODE_FOR_xop_vpermil2v4df3:
28653 case CODE_FOR_xop_vpermil2v8sf3:
28654 error ("the last argument must be a 2-bit immediate");
28657 case CODE_FOR_avx_vextractf128v4df:
28658 case CODE_FOR_avx_vextractf128v8sf:
28659 case CODE_FOR_avx_vextractf128v8si:
28660 case CODE_FOR_avx_vinsertf128v4df:
28661 case CODE_FOR_avx_vinsertf128v8sf:
28662 case CODE_FOR_avx_vinsertf128v8si:
28663 error ("the last argument must be a 1-bit immediate");
28666 case CODE_FOR_avx_vmcmpv2df3:
28667 case CODE_FOR_avx_vmcmpv4sf3:
28668 case CODE_FOR_avx_cmpv2df3:
28669 case CODE_FOR_avx_cmpv4sf3:
28670 case CODE_FOR_avx_cmpv4df3:
28671 case CODE_FOR_avx_cmpv8sf3:
28672 error ("the last argument must be a 5-bit immediate");
28676 switch (nargs_constant)
28679 if ((nargs - i) == nargs_constant)
28681 error ("the next to last argument must be an 8-bit immediate");
28685 error ("the last argument must be an 8-bit immediate");
28688 gcc_unreachable ();
28695 if (VECTOR_MODE_P (mode))
28696 op = safe_vector_operand (op, mode);
28698 /* If we aren't optimizing, only allow one memory operand to
28700 if (memory_operand (op, mode))
28703 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
28705 if (optimize || !match || num_memory > 1)
28706 op = copy_to_mode_reg (mode, op);
28710 op = copy_to_reg (op);
28711 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
28716 args[i].mode = mode;
28722 pat = GEN_FCN (icode) (real_target, args[0].op);
28725 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
28728 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
28732 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
28733 args[2].op, args[3].op);
28736 gcc_unreachable ();
28746 /* Subroutine of ix86_expand_builtin to take care of special insns
28747 with variable number of operands. */
28750 ix86_expand_special_args_builtin (const struct builtin_description *d,
28751 tree exp, rtx target)
28755 unsigned int i, nargs, arg_adjust, memory;
28759 enum machine_mode mode;
28761 enum insn_code icode = d->icode;
28762 bool last_arg_constant = false;
28763 const struct insn_data_d *insn_p = &insn_data[icode];
28764 enum machine_mode tmode = insn_p->operand[0].mode;
28765 enum { load, store } klass;
28767 switch ((enum ix86_builtin_func_type) d->flag)
28769 case VOID_FTYPE_VOID:
28770 if (icode == CODE_FOR_avx_vzeroupper)
28771 target = GEN_INT (vzeroupper_intrinsic);
28772 emit_insn (GEN_FCN (icode) (target));
28774 case VOID_FTYPE_UINT64:
28775 case VOID_FTYPE_UNSIGNED:
28780 case UINT64_FTYPE_VOID:
28781 case UNSIGNED_FTYPE_VOID:
28786 case UINT64_FTYPE_PUNSIGNED:
28787 case V2DI_FTYPE_PV2DI:
28788 case V4DI_FTYPE_PV4DI:
28789 case V32QI_FTYPE_PCCHAR:
28790 case V16QI_FTYPE_PCCHAR:
28791 case V8SF_FTYPE_PCV4SF:
28792 case V8SF_FTYPE_PCFLOAT:
28793 case V4SF_FTYPE_PCFLOAT:
28794 case V4DF_FTYPE_PCV2DF:
28795 case V4DF_FTYPE_PCDOUBLE:
28796 case V2DF_FTYPE_PCDOUBLE:
28797 case VOID_FTYPE_PVOID:
28802 case VOID_FTYPE_PV2SF_V4SF:
28803 case VOID_FTYPE_PV4DI_V4DI:
28804 case VOID_FTYPE_PV2DI_V2DI:
28805 case VOID_FTYPE_PCHAR_V32QI:
28806 case VOID_FTYPE_PCHAR_V16QI:
28807 case VOID_FTYPE_PFLOAT_V8SF:
28808 case VOID_FTYPE_PFLOAT_V4SF:
28809 case VOID_FTYPE_PDOUBLE_V4DF:
28810 case VOID_FTYPE_PDOUBLE_V2DF:
28811 case VOID_FTYPE_PLONGLONG_LONGLONG:
28812 case VOID_FTYPE_PULONGLONG_ULONGLONG:
28813 case VOID_FTYPE_PINT_INT:
28816 /* Reserve memory operand for target. */
28817 memory = ARRAY_SIZE (args);
28819 case V4SF_FTYPE_V4SF_PCV2SF:
28820 case V2DF_FTYPE_V2DF_PCDOUBLE:
28825 case V8SF_FTYPE_PCV8SF_V8SI:
28826 case V4DF_FTYPE_PCV4DF_V4DI:
28827 case V4SF_FTYPE_PCV4SF_V4SI:
28828 case V2DF_FTYPE_PCV2DF_V2DI:
28829 case V8SI_FTYPE_PCV8SI_V8SI:
28830 case V4DI_FTYPE_PCV4DI_V4DI:
28831 case V4SI_FTYPE_PCV4SI_V4SI:
28832 case V2DI_FTYPE_PCV2DI_V2DI:
28837 case VOID_FTYPE_PV8SF_V8SI_V8SF:
28838 case VOID_FTYPE_PV4DF_V4DI_V4DF:
28839 case VOID_FTYPE_PV4SF_V4SI_V4SF:
28840 case VOID_FTYPE_PV2DF_V2DI_V2DF:
28841 case VOID_FTYPE_PV8SI_V8SI_V8SI:
28842 case VOID_FTYPE_PV4DI_V4DI_V4DI:
28843 case VOID_FTYPE_PV4SI_V4SI_V4SI:
28844 case VOID_FTYPE_PV2DI_V2DI_V2DI:
28847 /* Reserve memory operand for target. */
28848 memory = ARRAY_SIZE (args);
28850 case VOID_FTYPE_UINT_UINT_UINT:
28851 case VOID_FTYPE_UINT64_UINT_UINT:
28852 case UCHAR_FTYPE_UINT_UINT_UINT:
28853 case UCHAR_FTYPE_UINT64_UINT_UINT:
28856 memory = ARRAY_SIZE (args);
28857 last_arg_constant = true;
28860 gcc_unreachable ();
28863 gcc_assert (nargs <= ARRAY_SIZE (args));
28865 if (klass == store)
28867 arg = CALL_EXPR_ARG (exp, 0);
28868 op = expand_normal (arg);
28869 gcc_assert (target == 0);
28872 if (GET_MODE (op) != Pmode)
28873 op = convert_to_mode (Pmode, op, 1);
28874 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
28877 target = force_reg (tmode, op);
28885 || GET_MODE (target) != tmode
28886 || !insn_p->operand[0].predicate (target, tmode))
28887 target = gen_reg_rtx (tmode);
28890 for (i = 0; i < nargs; i++)
28892 enum machine_mode mode = insn_p->operand[i + 1].mode;
28895 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
28896 op = expand_normal (arg);
28897 match = insn_p->operand[i + 1].predicate (op, mode);
28899 if (last_arg_constant && (i + 1) == nargs)
28903 if (icode == CODE_FOR_lwp_lwpvalsi3
28904 || icode == CODE_FOR_lwp_lwpinssi3
28905 || icode == CODE_FOR_lwp_lwpvaldi3
28906 || icode == CODE_FOR_lwp_lwpinsdi3)
28907 error ("the last argument must be a 32-bit immediate");
28909 error ("the last argument must be an 8-bit immediate");
28917 /* This must be the memory operand. */
28918 if (GET_MODE (op) != Pmode)
28919 op = convert_to_mode (Pmode, op, 1);
28920 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
28921 gcc_assert (GET_MODE (op) == mode
28922 || GET_MODE (op) == VOIDmode);
28926 /* This must be register. */
28927 if (VECTOR_MODE_P (mode))
28928 op = safe_vector_operand (op, mode);
28930 gcc_assert (GET_MODE (op) == mode
28931 || GET_MODE (op) == VOIDmode);
28932 op = copy_to_mode_reg (mode, op);
28937 args[i].mode = mode;
28943 pat = GEN_FCN (icode) (target);
28946 pat = GEN_FCN (icode) (target, args[0].op);
28949 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
28952 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
28955 gcc_unreachable ();
28961 return klass == store ? 0 : target;
28964 /* Return the integer constant in ARG. Constrain it to be in the range
28965 of the subparts of VEC_TYPE; issue an error if not. */
28968 get_element_number (tree vec_type, tree arg)
28970 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
28972 if (!host_integerp (arg, 1)
28973 || (elt = tree_low_cst (arg, 1), elt > max))
28975 error ("selector must be an integer constant in the range 0..%wi", max);
28982 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
28983 ix86_expand_vector_init. We DO have language-level syntax for this, in
28984 the form of (type){ init-list }. Except that since we can't place emms
28985 instructions from inside the compiler, we can't allow the use of MMX
28986 registers unless the user explicitly asks for it. So we do *not* define
28987 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
28988 we have builtins invoked by mmintrin.h that gives us license to emit
28989 these sorts of instructions. */
28992 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
28994 enum machine_mode tmode = TYPE_MODE (type);
28995 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
28996 int i, n_elt = GET_MODE_NUNITS (tmode);
28997 rtvec v = rtvec_alloc (n_elt);
28999 gcc_assert (VECTOR_MODE_P (tmode));
29000 gcc_assert (call_expr_nargs (exp) == n_elt);
29002 for (i = 0; i < n_elt; ++i)
29004 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
29005 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
29008 if (!target || !register_operand (target, tmode))
29009 target = gen_reg_rtx (tmode);
29011 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
29015 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29016 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
29017 had a language-level syntax for referencing vector elements. */
29020 ix86_expand_vec_ext_builtin (tree exp, rtx target)
29022 enum machine_mode tmode, mode0;
29027 arg0 = CALL_EXPR_ARG (exp, 0);
29028 arg1 = CALL_EXPR_ARG (exp, 1);
29030 op0 = expand_normal (arg0);
29031 elt = get_element_number (TREE_TYPE (arg0), arg1);
29033 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
29034 mode0 = TYPE_MODE (TREE_TYPE (arg0));
29035 gcc_assert (VECTOR_MODE_P (mode0));
29037 op0 = force_reg (mode0, op0);
29039 if (optimize || !target || !register_operand (target, tmode))
29040 target = gen_reg_rtx (tmode);
29042 ix86_expand_vector_extract (true, target, op0, elt);
29047 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29048 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
29049 a language-level syntax for referencing vector elements. */
29052 ix86_expand_vec_set_builtin (tree exp)
29054 enum machine_mode tmode, mode1;
29055 tree arg0, arg1, arg2;
29057 rtx op0, op1, target;
29059 arg0 = CALL_EXPR_ARG (exp, 0);
29060 arg1 = CALL_EXPR_ARG (exp, 1);
29061 arg2 = CALL_EXPR_ARG (exp, 2);
29063 tmode = TYPE_MODE (TREE_TYPE (arg0));
29064 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
29065 gcc_assert (VECTOR_MODE_P (tmode));
29067 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
29068 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
29069 elt = get_element_number (TREE_TYPE (arg0), arg2);
29071 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
29072 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
29074 op0 = force_reg (tmode, op0);
29075 op1 = force_reg (mode1, op1);
29077 /* OP0 is the source of these builtin functions and shouldn't be
29078 modified. Create a copy, use it and return it as target. */
29079 target = gen_reg_rtx (tmode);
29080 emit_move_insn (target, op0);
29081 ix86_expand_vector_set (true, target, op1, elt);
29086 /* Expand an expression EXP that calls a built-in function,
29087 with result going to TARGET if that's convenient
29088 (and in mode MODE if that's convenient).
29089 SUBTARGET may be used as the target for computing one of EXP's operands.
29090 IGNORE is nonzero if the value is to be ignored. */
29093 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
29094 enum machine_mode mode ATTRIBUTE_UNUSED,
29095 int ignore ATTRIBUTE_UNUSED)
29097 const struct builtin_description *d;
29099 enum insn_code icode;
29100 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
29101 tree arg0, arg1, arg2, arg3, arg4;
29102 rtx op0, op1, op2, op3, op4, pat;
29103 enum machine_mode mode0, mode1, mode2, mode3, mode4;
29104 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
29106 /* Determine whether the builtin function is available under the current ISA.
29107 Originally the builtin was not created if it wasn't applicable to the
29108 current ISA based on the command line switches. With function specific
29109 options, we need to check in the context of the function making the call
29110 whether it is supported. */
29111 if (ix86_builtins_isa[fcode].isa
29112 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
29114 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
29115 NULL, (enum fpmath_unit) 0, false);
29118 error ("%qE needs unknown isa option", fndecl);
29121 gcc_assert (opts != NULL);
29122 error ("%qE needs isa option %s", fndecl, opts);
29130 case IX86_BUILTIN_MASKMOVQ:
29131 case IX86_BUILTIN_MASKMOVDQU:
29132 icode = (fcode == IX86_BUILTIN_MASKMOVQ
29133 ? CODE_FOR_mmx_maskmovq
29134 : CODE_FOR_sse2_maskmovdqu);
29135 /* Note the arg order is different from the operand order. */
29136 arg1 = CALL_EXPR_ARG (exp, 0);
29137 arg2 = CALL_EXPR_ARG (exp, 1);
29138 arg0 = CALL_EXPR_ARG (exp, 2);
29139 op0 = expand_normal (arg0);
29140 op1 = expand_normal (arg1);
29141 op2 = expand_normal (arg2);
29142 mode0 = insn_data[icode].operand[0].mode;
29143 mode1 = insn_data[icode].operand[1].mode;
29144 mode2 = insn_data[icode].operand[2].mode;
29146 if (GET_MODE (op0) != Pmode)
29147 op0 = convert_to_mode (Pmode, op0, 1);
29148 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
29150 if (!insn_data[icode].operand[0].predicate (op0, mode0))
29151 op0 = copy_to_mode_reg (mode0, op0);
29152 if (!insn_data[icode].operand[1].predicate (op1, mode1))
29153 op1 = copy_to_mode_reg (mode1, op1);
29154 if (!insn_data[icode].operand[2].predicate (op2, mode2))
29155 op2 = copy_to_mode_reg (mode2, op2);
29156 pat = GEN_FCN (icode) (op0, op1, op2);
29162 case IX86_BUILTIN_LDMXCSR:
29163 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
29164 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
29165 emit_move_insn (target, op0);
29166 emit_insn (gen_sse_ldmxcsr (target));
29169 case IX86_BUILTIN_STMXCSR:
29170 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
29171 emit_insn (gen_sse_stmxcsr (target));
29172 return copy_to_mode_reg (SImode, target);
29174 case IX86_BUILTIN_CLFLUSH:
29175 arg0 = CALL_EXPR_ARG (exp, 0);
29176 op0 = expand_normal (arg0);
29177 icode = CODE_FOR_sse2_clflush;
29178 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
29180 if (GET_MODE (op0) != Pmode)
29181 op0 = convert_to_mode (Pmode, op0, 1);
29182 op0 = force_reg (Pmode, op0);
29185 emit_insn (gen_sse2_clflush (op0));
29188 case IX86_BUILTIN_MONITOR:
29189 arg0 = CALL_EXPR_ARG (exp, 0);
29190 arg1 = CALL_EXPR_ARG (exp, 1);
29191 arg2 = CALL_EXPR_ARG (exp, 2);
29192 op0 = expand_normal (arg0);
29193 op1 = expand_normal (arg1);
29194 op2 = expand_normal (arg2);
29197 if (GET_MODE (op0) != Pmode)
29198 op0 = convert_to_mode (Pmode, op0, 1);
29199 op0 = force_reg (Pmode, op0);
29202 op1 = copy_to_mode_reg (SImode, op1);
29204 op2 = copy_to_mode_reg (SImode, op2);
29205 emit_insn (ix86_gen_monitor (op0, op1, op2));
29208 case IX86_BUILTIN_MWAIT:
29209 arg0 = CALL_EXPR_ARG (exp, 0);
29210 arg1 = CALL_EXPR_ARG (exp, 1);
29211 op0 = expand_normal (arg0);
29212 op1 = expand_normal (arg1);
29214 op0 = copy_to_mode_reg (SImode, op0);
29216 op1 = copy_to_mode_reg (SImode, op1);
29217 emit_insn (gen_sse3_mwait (op0, op1));
29220 case IX86_BUILTIN_VEC_INIT_V2SI:
29221 case IX86_BUILTIN_VEC_INIT_V4HI:
29222 case IX86_BUILTIN_VEC_INIT_V8QI:
29223 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
29225 case IX86_BUILTIN_VEC_EXT_V2DF:
29226 case IX86_BUILTIN_VEC_EXT_V2DI:
29227 case IX86_BUILTIN_VEC_EXT_V4SF:
29228 case IX86_BUILTIN_VEC_EXT_V4SI:
29229 case IX86_BUILTIN_VEC_EXT_V8HI:
29230 case IX86_BUILTIN_VEC_EXT_V2SI:
29231 case IX86_BUILTIN_VEC_EXT_V4HI:
29232 case IX86_BUILTIN_VEC_EXT_V16QI:
29233 return ix86_expand_vec_ext_builtin (exp, target);
29235 case IX86_BUILTIN_VEC_SET_V2DI:
29236 case IX86_BUILTIN_VEC_SET_V4SF:
29237 case IX86_BUILTIN_VEC_SET_V4SI:
29238 case IX86_BUILTIN_VEC_SET_V8HI:
29239 case IX86_BUILTIN_VEC_SET_V4HI:
29240 case IX86_BUILTIN_VEC_SET_V16QI:
29241 return ix86_expand_vec_set_builtin (exp);
29243 case IX86_BUILTIN_INFQ:
29244 case IX86_BUILTIN_HUGE_VALQ:
29246 REAL_VALUE_TYPE inf;
29250 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
29252 tmp = validize_mem (force_const_mem (mode, tmp));
29255 target = gen_reg_rtx (mode);
29257 emit_move_insn (target, tmp);
29261 case IX86_BUILTIN_LLWPCB:
29262 arg0 = CALL_EXPR_ARG (exp, 0);
29263 op0 = expand_normal (arg0);
29264 icode = CODE_FOR_lwp_llwpcb;
29265 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
29267 if (GET_MODE (op0) != Pmode)
29268 op0 = convert_to_mode (Pmode, op0, 1);
29269 op0 = force_reg (Pmode, op0);
29271 emit_insn (gen_lwp_llwpcb (op0));
29274 case IX86_BUILTIN_SLWPCB:
29275 icode = CODE_FOR_lwp_slwpcb;
29277 || !insn_data[icode].operand[0].predicate (target, Pmode))
29278 target = gen_reg_rtx (Pmode);
29279 emit_insn (gen_lwp_slwpcb (target));
29282 case IX86_BUILTIN_BEXTRI32:
29283 case IX86_BUILTIN_BEXTRI64:
29284 arg0 = CALL_EXPR_ARG (exp, 0);
29285 arg1 = CALL_EXPR_ARG (exp, 1);
29286 op0 = expand_normal (arg0);
29287 op1 = expand_normal (arg1);
29288 icode = (fcode == IX86_BUILTIN_BEXTRI32
29289 ? CODE_FOR_tbm_bextri_si
29290 : CODE_FOR_tbm_bextri_di);
29291 if (!CONST_INT_P (op1))
29293 error ("last argument must be an immediate");
29298 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
29299 unsigned char lsb_index = INTVAL (op1) & 0xFF;
29300 op1 = GEN_INT (length);
29301 op2 = GEN_INT (lsb_index);
29302 pat = GEN_FCN (icode) (target, op0, op1, op2);
29308 case IX86_BUILTIN_RDRAND16_STEP:
29309 icode = CODE_FOR_rdrandhi_1;
29313 case IX86_BUILTIN_RDRAND32_STEP:
29314 icode = CODE_FOR_rdrandsi_1;
29318 case IX86_BUILTIN_RDRAND64_STEP:
29319 icode = CODE_FOR_rdranddi_1;
29323 op0 = gen_reg_rtx (mode0);
29324 emit_insn (GEN_FCN (icode) (op0));
29326 arg0 = CALL_EXPR_ARG (exp, 0);
29327 op1 = expand_normal (arg0);
29328 if (!address_operand (op1, VOIDmode))
29330 op1 = convert_memory_address (Pmode, op1);
29331 op1 = copy_addr_to_reg (op1);
29333 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
29335 op1 = gen_reg_rtx (SImode);
29336 emit_move_insn (op1, CONST1_RTX (SImode));
29338 /* Emit SImode conditional move. */
29339 if (mode0 == HImode)
29341 op2 = gen_reg_rtx (SImode);
29342 emit_insn (gen_zero_extendhisi2 (op2, op0));
29344 else if (mode0 == SImode)
29347 op2 = gen_rtx_SUBREG (SImode, op0, 0);
29350 target = gen_reg_rtx (SImode);
29352 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
29354 emit_insn (gen_rtx_SET (VOIDmode, target,
29355 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
29358 case IX86_BUILTIN_GATHERSIV2DF:
29359 icode = CODE_FOR_avx2_gathersiv2df;
29361 case IX86_BUILTIN_GATHERSIV4DF:
29362 icode = CODE_FOR_avx2_gathersiv4df;
29364 case IX86_BUILTIN_GATHERDIV2DF:
29365 icode = CODE_FOR_avx2_gatherdiv2df;
29367 case IX86_BUILTIN_GATHERDIV4DF:
29368 icode = CODE_FOR_avx2_gatherdiv4df;
29370 case IX86_BUILTIN_GATHERSIV4SF:
29371 icode = CODE_FOR_avx2_gathersiv4sf;
29373 case IX86_BUILTIN_GATHERSIV8SF:
29374 icode = CODE_FOR_avx2_gathersiv8sf;
29376 case IX86_BUILTIN_GATHERDIV4SF:
29377 icode = CODE_FOR_avx2_gatherdiv4sf;
29379 case IX86_BUILTIN_GATHERDIV8SF:
29380 icode = CODE_FOR_avx2_gatherdiv8sf;
29382 case IX86_BUILTIN_GATHERSIV2DI:
29383 icode = CODE_FOR_avx2_gathersiv2di;
29385 case IX86_BUILTIN_GATHERSIV4DI:
29386 icode = CODE_FOR_avx2_gathersiv4di;
29388 case IX86_BUILTIN_GATHERDIV2DI:
29389 icode = CODE_FOR_avx2_gatherdiv2di;
29391 case IX86_BUILTIN_GATHERDIV4DI:
29392 icode = CODE_FOR_avx2_gatherdiv4di;
29394 case IX86_BUILTIN_GATHERSIV4SI:
29395 icode = CODE_FOR_avx2_gathersiv4si;
29397 case IX86_BUILTIN_GATHERSIV8SI:
29398 icode = CODE_FOR_avx2_gathersiv8si;
29400 case IX86_BUILTIN_GATHERDIV4SI:
29401 icode = CODE_FOR_avx2_gatherdiv4si;
29403 case IX86_BUILTIN_GATHERDIV8SI:
29404 icode = CODE_FOR_avx2_gatherdiv8si;
29406 case IX86_BUILTIN_GATHERALTSIV4DF:
29407 icode = CODE_FOR_avx2_gathersiv4df;
29409 case IX86_BUILTIN_GATHERALTDIV8SF:
29410 icode = CODE_FOR_avx2_gatherdiv8sf;
29412 case IX86_BUILTIN_GATHERALTSIV4DI:
29413 icode = CODE_FOR_avx2_gathersiv4di;
29415 case IX86_BUILTIN_GATHERALTDIV8SI:
29416 icode = CODE_FOR_avx2_gatherdiv8si;
29420 arg0 = CALL_EXPR_ARG (exp, 0);
29421 arg1 = CALL_EXPR_ARG (exp, 1);
29422 arg2 = CALL_EXPR_ARG (exp, 2);
29423 arg3 = CALL_EXPR_ARG (exp, 3);
29424 arg4 = CALL_EXPR_ARG (exp, 4);
29425 op0 = expand_normal (arg0);
29426 op1 = expand_normal (arg1);
29427 op2 = expand_normal (arg2);
29428 op3 = expand_normal (arg3);
29429 op4 = expand_normal (arg4);
29430 /* Note the arg order is different from the operand order. */
29431 mode0 = insn_data[icode].operand[1].mode;
29432 mode2 = insn_data[icode].operand[3].mode;
29433 mode3 = insn_data[icode].operand[4].mode;
29434 mode4 = insn_data[icode].operand[5].mode;
29436 if (target == NULL_RTX
29437 || GET_MODE (target) != insn_data[icode].operand[0].mode)
29438 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
29440 subtarget = target;
29442 if (fcode == IX86_BUILTIN_GATHERALTSIV4DF
29443 || fcode == IX86_BUILTIN_GATHERALTSIV4DI)
29445 rtx half = gen_reg_rtx (V4SImode);
29446 if (!nonimmediate_operand (op2, V8SImode))
29447 op2 = copy_to_mode_reg (V8SImode, op2);
29448 emit_insn (gen_vec_extract_lo_v8si (half, op2));
29451 else if (fcode == IX86_BUILTIN_GATHERALTDIV8SF
29452 || fcode == IX86_BUILTIN_GATHERALTDIV8SI)
29454 rtx (*gen) (rtx, rtx);
29455 rtx half = gen_reg_rtx (mode0);
29456 if (mode0 == V4SFmode)
29457 gen = gen_vec_extract_lo_v8sf;
29459 gen = gen_vec_extract_lo_v8si;
29460 if (!nonimmediate_operand (op0, GET_MODE (op0)))
29461 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
29462 emit_insn (gen (half, op0));
29464 if (!nonimmediate_operand (op3, GET_MODE (op3)))
29465 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
29466 emit_insn (gen (half, op3));
29470 /* Force memory operand only with base register here. But we
29471 don't want to do it on memory operand for other builtin
29473 if (GET_MODE (op1) != Pmode)
29474 op1 = convert_to_mode (Pmode, op1, 1);
29475 op1 = force_reg (Pmode, op1);
29477 if (!insn_data[icode].operand[1].predicate (op0, mode0))
29478 op0 = copy_to_mode_reg (mode0, op0);
29479 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
29480 op1 = copy_to_mode_reg (Pmode, op1);
29481 if (!insn_data[icode].operand[3].predicate (op2, mode2))
29482 op2 = copy_to_mode_reg (mode2, op2);
29483 if (!insn_data[icode].operand[4].predicate (op3, mode3))
29484 op3 = copy_to_mode_reg (mode3, op3);
29485 if (!insn_data[icode].operand[5].predicate (op4, mode4))
29487 error ("last argument must be scale 1, 2, 4, 8");
29491 /* Optimize. If mask is known to have all high bits set,
29492 replace op0 with pc_rtx to signal that the instruction
29493 overwrites the whole destination and doesn't use its
29494 previous contents. */
29497 if (TREE_CODE (arg3) == VECTOR_CST)
29500 unsigned int negative = 0;
29501 for (elt = TREE_VECTOR_CST_ELTS (arg3);
29502 elt; elt = TREE_CHAIN (elt))
29504 tree cst = TREE_VALUE (elt);
29505 if (TREE_CODE (cst) == INTEGER_CST
29506 && tree_int_cst_sign_bit (cst))
29508 else if (TREE_CODE (cst) == REAL_CST
29509 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
29512 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
29515 else if (TREE_CODE (arg3) == SSA_NAME)
29517 /* Recognize also when mask is like:
29518 __v2df src = _mm_setzero_pd ();
29519 __v2df mask = _mm_cmpeq_pd (src, src);
29521 __v8sf src = _mm256_setzero_ps ();
29522 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
29523 as that is a cheaper way to load all ones into
29524 a register than having to load a constant from
29526 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
29527 if (is_gimple_call (def_stmt))
29529 tree fndecl = gimple_call_fndecl (def_stmt);
29531 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
29532 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
29534 case IX86_BUILTIN_CMPPD:
29535 case IX86_BUILTIN_CMPPS:
29536 case IX86_BUILTIN_CMPPD256:
29537 case IX86_BUILTIN_CMPPS256:
29538 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
29541 case IX86_BUILTIN_CMPEQPD:
29542 case IX86_BUILTIN_CMPEQPS:
29543 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
29544 && initializer_zerop (gimple_call_arg (def_stmt,
29555 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
29560 if (fcode == IX86_BUILTIN_GATHERDIV8SF
29561 || fcode == IX86_BUILTIN_GATHERDIV8SI)
29563 enum machine_mode tmode = GET_MODE (subtarget) == V8SFmode
29564 ? V4SFmode : V4SImode;
29565 if (target == NULL_RTX)
29566 target = gen_reg_rtx (tmode);
29567 if (tmode == V4SFmode)
29568 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
29570 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
29573 target = subtarget;
29581 for (i = 0, d = bdesc_special_args;
29582 i < ARRAY_SIZE (bdesc_special_args);
29584 if (d->code == fcode)
29585 return ix86_expand_special_args_builtin (d, exp, target);
29587 for (i = 0, d = bdesc_args;
29588 i < ARRAY_SIZE (bdesc_args);
29590 if (d->code == fcode)
29593 case IX86_BUILTIN_FABSQ:
29594 case IX86_BUILTIN_COPYSIGNQ:
29596 /* Emit a normal call if SSE2 isn't available. */
29597 return expand_call (exp, target, ignore);
29599 return ix86_expand_args_builtin (d, exp, target);
29602 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
29603 if (d->code == fcode)
29604 return ix86_expand_sse_comi (d, exp, target);
29606 for (i = 0, d = bdesc_pcmpestr;
29607 i < ARRAY_SIZE (bdesc_pcmpestr);
29609 if (d->code == fcode)
29610 return ix86_expand_sse_pcmpestr (d, exp, target);
29612 for (i = 0, d = bdesc_pcmpistr;
29613 i < ARRAY_SIZE (bdesc_pcmpistr);
29615 if (d->code == fcode)
29616 return ix86_expand_sse_pcmpistr (d, exp, target);
29618 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
29619 if (d->code == fcode)
29620 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
29621 (enum ix86_builtin_func_type)
29622 d->flag, d->comparison);
29624 gcc_unreachable ();
29627 /* Returns a function decl for a vectorized version of the builtin function
29628 with builtin function code FN and the result vector type TYPE, or NULL_TREE
29629 if it is not available. */
29632 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
29635 enum machine_mode in_mode, out_mode;
29637 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29639 if (TREE_CODE (type_out) != VECTOR_TYPE
29640 || TREE_CODE (type_in) != VECTOR_TYPE
29641 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
29644 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29645 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29646 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29647 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29651 case BUILT_IN_SQRT:
29652 if (out_mode == DFmode && in_mode == DFmode)
29654 if (out_n == 2 && in_n == 2)
29655 return ix86_builtins[IX86_BUILTIN_SQRTPD];
29656 else if (out_n == 4 && in_n == 4)
29657 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
29661 case BUILT_IN_SQRTF:
29662 if (out_mode == SFmode && in_mode == SFmode)
29664 if (out_n == 4 && in_n == 4)
29665 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
29666 else if (out_n == 8 && in_n == 8)
29667 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
29671 case BUILT_IN_IFLOOR:
29672 case BUILT_IN_LFLOOR:
29673 case BUILT_IN_LLFLOOR:
29674 /* The round insn does not trap on denormals. */
29675 if (flag_trapping_math || !TARGET_ROUND)
29678 if (out_mode == SImode && in_mode == DFmode)
29680 if (out_n == 4 && in_n == 2)
29681 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX];
29682 else if (out_n == 8 && in_n == 4)
29683 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256];
29687 case BUILT_IN_IFLOORF:
29688 case BUILT_IN_LFLOORF:
29689 case BUILT_IN_LLFLOORF:
29690 /* The round insn does not trap on denormals. */
29691 if (flag_trapping_math || !TARGET_ROUND)
29694 if (out_mode == SImode && in_mode == SFmode)
29696 if (out_n == 4 && in_n == 4)
29697 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX];
29698 else if (out_n == 8 && in_n == 8)
29699 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX256];
29703 case BUILT_IN_ICEIL:
29704 case BUILT_IN_LCEIL:
29705 case BUILT_IN_LLCEIL:
29706 /* The round insn does not trap on denormals. */
29707 if (flag_trapping_math || !TARGET_ROUND)
29710 if (out_mode == SImode && in_mode == DFmode)
29712 if (out_n == 4 && in_n == 2)
29713 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX];
29714 else if (out_n == 8 && in_n == 4)
29715 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256];
29719 case BUILT_IN_ICEILF:
29720 case BUILT_IN_LCEILF:
29721 case BUILT_IN_LLCEILF:
29722 /* The round insn does not trap on denormals. */
29723 if (flag_trapping_math || !TARGET_ROUND)
29726 if (out_mode == SImode && in_mode == SFmode)
29728 if (out_n == 4 && in_n == 4)
29729 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX];
29730 else if (out_n == 8 && in_n == 8)
29731 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX256];
29735 case BUILT_IN_IRINT:
29736 case BUILT_IN_LRINT:
29737 case BUILT_IN_LLRINT:
29738 if (out_mode == SImode && in_mode == DFmode)
29740 if (out_n == 4 && in_n == 2)
29741 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
29742 else if (out_n == 8 && in_n == 4)
29743 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX256];
29747 case BUILT_IN_IRINTF:
29748 case BUILT_IN_LRINTF:
29749 case BUILT_IN_LLRINTF:
29750 if (out_mode == SImode && in_mode == SFmode)
29752 if (out_n == 4 && in_n == 4)
29753 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
29754 else if (out_n == 8 && in_n == 8)
29755 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
29759 case BUILT_IN_IROUND:
29760 case BUILT_IN_LROUND:
29761 case BUILT_IN_LLROUND:
29762 /* The round insn does not trap on denormals. */
29763 if (flag_trapping_math || !TARGET_ROUND)
29766 if (out_mode == SImode && in_mode == DFmode)
29768 if (out_n == 4 && in_n == 2)
29769 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX];
29770 else if (out_n == 8 && in_n == 4)
29771 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256];
29775 case BUILT_IN_IROUNDF:
29776 case BUILT_IN_LROUNDF:
29777 case BUILT_IN_LLROUNDF:
29778 /* The round insn does not trap on denormals. */
29779 if (flag_trapping_math || !TARGET_ROUND)
29782 if (out_mode == SImode && in_mode == SFmode)
29784 if (out_n == 4 && in_n == 4)
29785 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX];
29786 else if (out_n == 8 && in_n == 8)
29787 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX256];
29791 case BUILT_IN_COPYSIGN:
29792 if (out_mode == DFmode && in_mode == DFmode)
29794 if (out_n == 2 && in_n == 2)
29795 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
29796 else if (out_n == 4 && in_n == 4)
29797 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
29801 case BUILT_IN_COPYSIGNF:
29802 if (out_mode == SFmode && in_mode == SFmode)
29804 if (out_n == 4 && in_n == 4)
29805 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
29806 else if (out_n == 8 && in_n == 8)
29807 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
29811 case BUILT_IN_FLOOR:
29812 /* The round insn does not trap on denormals. */
29813 if (flag_trapping_math || !TARGET_ROUND)
29816 if (out_mode == DFmode && in_mode == DFmode)
29818 if (out_n == 2 && in_n == 2)
29819 return ix86_builtins[IX86_BUILTIN_FLOORPD];
29820 else if (out_n == 4 && in_n == 4)
29821 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
29825 case BUILT_IN_FLOORF:
29826 /* The round insn does not trap on denormals. */
29827 if (flag_trapping_math || !TARGET_ROUND)
29830 if (out_mode == SFmode && in_mode == SFmode)
29832 if (out_n == 4 && in_n == 4)
29833 return ix86_builtins[IX86_BUILTIN_FLOORPS];
29834 else if (out_n == 8 && in_n == 8)
29835 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
29839 case BUILT_IN_CEIL:
29840 /* The round insn does not trap on denormals. */
29841 if (flag_trapping_math || !TARGET_ROUND)
29844 if (out_mode == DFmode && in_mode == DFmode)
29846 if (out_n == 2 && in_n == 2)
29847 return ix86_builtins[IX86_BUILTIN_CEILPD];
29848 else if (out_n == 4 && in_n == 4)
29849 return ix86_builtins[IX86_BUILTIN_CEILPD256];
29853 case BUILT_IN_CEILF:
29854 /* The round insn does not trap on denormals. */
29855 if (flag_trapping_math || !TARGET_ROUND)
29858 if (out_mode == SFmode && in_mode == SFmode)
29860 if (out_n == 4 && in_n == 4)
29861 return ix86_builtins[IX86_BUILTIN_CEILPS];
29862 else if (out_n == 8 && in_n == 8)
29863 return ix86_builtins[IX86_BUILTIN_CEILPS256];
29867 case BUILT_IN_TRUNC:
29868 /* The round insn does not trap on denormals. */
29869 if (flag_trapping_math || !TARGET_ROUND)
29872 if (out_mode == DFmode && in_mode == DFmode)
29874 if (out_n == 2 && in_n == 2)
29875 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
29876 else if (out_n == 4 && in_n == 4)
29877 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
29881 case BUILT_IN_TRUNCF:
29882 /* The round insn does not trap on denormals. */
29883 if (flag_trapping_math || !TARGET_ROUND)
29886 if (out_mode == SFmode && in_mode == SFmode)
29888 if (out_n == 4 && in_n == 4)
29889 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
29890 else if (out_n == 8 && in_n == 8)
29891 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
29895 case BUILT_IN_RINT:
29896 /* The round insn does not trap on denormals. */
29897 if (flag_trapping_math || !TARGET_ROUND)
29900 if (out_mode == DFmode && in_mode == DFmode)
29902 if (out_n == 2 && in_n == 2)
29903 return ix86_builtins[IX86_BUILTIN_RINTPD];
29904 else if (out_n == 4 && in_n == 4)
29905 return ix86_builtins[IX86_BUILTIN_RINTPD256];
29909 case BUILT_IN_RINTF:
29910 /* The round insn does not trap on denormals. */
29911 if (flag_trapping_math || !TARGET_ROUND)
29914 if (out_mode == SFmode && in_mode == SFmode)
29916 if (out_n == 4 && in_n == 4)
29917 return ix86_builtins[IX86_BUILTIN_RINTPS];
29918 else if (out_n == 8 && in_n == 8)
29919 return ix86_builtins[IX86_BUILTIN_RINTPS256];
29923 case BUILT_IN_ROUND:
29924 /* The round insn does not trap on denormals. */
29925 if (flag_trapping_math || !TARGET_ROUND)
29928 if (out_mode == DFmode && in_mode == DFmode)
29930 if (out_n == 2 && in_n == 2)
29931 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
29932 else if (out_n == 4 && in_n == 4)
29933 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
29937 case BUILT_IN_ROUNDF:
29938 /* The round insn does not trap on denormals. */
29939 if (flag_trapping_math || !TARGET_ROUND)
29942 if (out_mode == SFmode && in_mode == SFmode)
29944 if (out_n == 4 && in_n == 4)
29945 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
29946 else if (out_n == 8 && in_n == 8)
29947 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
29952 if (out_mode == DFmode && in_mode == DFmode)
29954 if (out_n == 2 && in_n == 2)
29955 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
29956 if (out_n == 4 && in_n == 4)
29957 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
29961 case BUILT_IN_FMAF:
29962 if (out_mode == SFmode && in_mode == SFmode)
29964 if (out_n == 4 && in_n == 4)
29965 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
29966 if (out_n == 8 && in_n == 8)
29967 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
29975 /* Dispatch to a handler for a vectorization library. */
29976 if (ix86_veclib_handler)
29977 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
29983 /* Handler for an SVML-style interface to
29984 a library with vectorized intrinsics. */
29987 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
29990 tree fntype, new_fndecl, args;
29993 enum machine_mode el_mode, in_mode;
29996 /* The SVML is suitable for unsafe math only. */
29997 if (!flag_unsafe_math_optimizations)
30000 el_mode = TYPE_MODE (TREE_TYPE (type_out));
30001 n = TYPE_VECTOR_SUBPARTS (type_out);
30002 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30003 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30004 if (el_mode != in_mode
30012 case BUILT_IN_LOG10:
30014 case BUILT_IN_TANH:
30016 case BUILT_IN_ATAN:
30017 case BUILT_IN_ATAN2:
30018 case BUILT_IN_ATANH:
30019 case BUILT_IN_CBRT:
30020 case BUILT_IN_SINH:
30022 case BUILT_IN_ASINH:
30023 case BUILT_IN_ASIN:
30024 case BUILT_IN_COSH:
30026 case BUILT_IN_ACOSH:
30027 case BUILT_IN_ACOS:
30028 if (el_mode != DFmode || n != 2)
30032 case BUILT_IN_EXPF:
30033 case BUILT_IN_LOGF:
30034 case BUILT_IN_LOG10F:
30035 case BUILT_IN_POWF:
30036 case BUILT_IN_TANHF:
30037 case BUILT_IN_TANF:
30038 case BUILT_IN_ATANF:
30039 case BUILT_IN_ATAN2F:
30040 case BUILT_IN_ATANHF:
30041 case BUILT_IN_CBRTF:
30042 case BUILT_IN_SINHF:
30043 case BUILT_IN_SINF:
30044 case BUILT_IN_ASINHF:
30045 case BUILT_IN_ASINF:
30046 case BUILT_IN_COSHF:
30047 case BUILT_IN_COSF:
30048 case BUILT_IN_ACOSHF:
30049 case BUILT_IN_ACOSF:
30050 if (el_mode != SFmode || n != 4)
30058 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
30060 if (fn == BUILT_IN_LOGF)
30061 strcpy (name, "vmlsLn4");
30062 else if (fn == BUILT_IN_LOG)
30063 strcpy (name, "vmldLn2");
30066 sprintf (name, "vmls%s", bname+10);
30067 name[strlen (name)-1] = '4';
30070 sprintf (name, "vmld%s2", bname+10);
30072 /* Convert to uppercase. */
30076 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
30078 args = TREE_CHAIN (args))
30082 fntype = build_function_type_list (type_out, type_in, NULL);
30084 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
30086 /* Build a function declaration for the vectorized function. */
30087 new_fndecl = build_decl (BUILTINS_LOCATION,
30088 FUNCTION_DECL, get_identifier (name), fntype);
30089 TREE_PUBLIC (new_fndecl) = 1;
30090 DECL_EXTERNAL (new_fndecl) = 1;
30091 DECL_IS_NOVOPS (new_fndecl) = 1;
30092 TREE_READONLY (new_fndecl) = 1;
30097 /* Handler for an ACML-style interface to
30098 a library with vectorized intrinsics. */
30101 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
30103 char name[20] = "__vr.._";
30104 tree fntype, new_fndecl, args;
30107 enum machine_mode el_mode, in_mode;
30110 /* The ACML is 64bits only and suitable for unsafe math only as
30111 it does not correctly support parts of IEEE with the required
30112 precision such as denormals. */
30114 || !flag_unsafe_math_optimizations)
30117 el_mode = TYPE_MODE (TREE_TYPE (type_out));
30118 n = TYPE_VECTOR_SUBPARTS (type_out);
30119 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30120 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30121 if (el_mode != in_mode
30131 case BUILT_IN_LOG2:
30132 case BUILT_IN_LOG10:
30135 if (el_mode != DFmode
30140 case BUILT_IN_SINF:
30141 case BUILT_IN_COSF:
30142 case BUILT_IN_EXPF:
30143 case BUILT_IN_POWF:
30144 case BUILT_IN_LOGF:
30145 case BUILT_IN_LOG2F:
30146 case BUILT_IN_LOG10F:
30149 if (el_mode != SFmode
30158 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
30159 sprintf (name + 7, "%s", bname+10);
30162 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
30164 args = TREE_CHAIN (args))
30168 fntype = build_function_type_list (type_out, type_in, NULL);
30170 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
30172 /* Build a function declaration for the vectorized function. */
30173 new_fndecl = build_decl (BUILTINS_LOCATION,
30174 FUNCTION_DECL, get_identifier (name), fntype);
30175 TREE_PUBLIC (new_fndecl) = 1;
30176 DECL_EXTERNAL (new_fndecl) = 1;
30177 DECL_IS_NOVOPS (new_fndecl) = 1;
30178 TREE_READONLY (new_fndecl) = 1;
30183 /* Returns a decl of a function that implements gather load with
30184 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
30185 Return NULL_TREE if it is not available. */
30188 ix86_vectorize_builtin_gather (const_tree mem_vectype,
30189 const_tree index_type, int scale)
30192 enum ix86_builtins code;
30197 if ((TREE_CODE (index_type) != INTEGER_TYPE
30198 && !POINTER_TYPE_P (index_type))
30199 || (TYPE_MODE (index_type) != SImode
30200 && TYPE_MODE (index_type) != DImode))
30203 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
30206 /* v*gather* insn sign extends index to pointer mode. */
30207 if (TYPE_PRECISION (index_type) < POINTER_SIZE
30208 && TYPE_UNSIGNED (index_type))
30213 || (scale & (scale - 1)) != 0)
30216 si = TYPE_MODE (index_type) == SImode;
30217 switch (TYPE_MODE (mem_vectype))
30220 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
30223 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
30226 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
30229 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
30232 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
30235 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
30238 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
30241 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
30247 return ix86_builtins[code];
30250 /* Returns a code for a target-specific builtin that implements
30251 reciprocal of the function, or NULL_TREE if not available. */
30254 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
30255 bool sqrt ATTRIBUTE_UNUSED)
30257 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
30258 && flag_finite_math_only && !flag_trapping_math
30259 && flag_unsafe_math_optimizations))
30263 /* Machine dependent builtins. */
30266 /* Vectorized version of sqrt to rsqrt conversion. */
30267 case IX86_BUILTIN_SQRTPS_NR:
30268 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
30270 case IX86_BUILTIN_SQRTPS_NR256:
30271 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
30277 /* Normal builtins. */
30280 /* Sqrt to rsqrt conversion. */
30281 case BUILT_IN_SQRTF:
30282 return ix86_builtins[IX86_BUILTIN_RSQRTF];
30289 /* Helper for avx_vpermilps256_operand et al. This is also used by
30290 the expansion functions to turn the parallel back into a mask.
30291 The return value is 0 for no match and the imm8+1 for a match. */
30294 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
30296 unsigned i, nelt = GET_MODE_NUNITS (mode);
30298 unsigned char ipar[8];
30300 if (XVECLEN (par, 0) != (int) nelt)
30303 /* Validate that all of the elements are constants, and not totally
30304 out of range. Copy the data into an integral array to make the
30305 subsequent checks easier. */
30306 for (i = 0; i < nelt; ++i)
30308 rtx er = XVECEXP (par, 0, i);
30309 unsigned HOST_WIDE_INT ei;
30311 if (!CONST_INT_P (er))
30322 /* In the 256-bit DFmode case, we can only move elements within
30324 for (i = 0; i < 2; ++i)
30328 mask |= ipar[i] << i;
30330 for (i = 2; i < 4; ++i)
30334 mask |= (ipar[i] - 2) << i;
30339 /* In the 256-bit SFmode case, we have full freedom of movement
30340 within the low 128-bit lane, but the high 128-bit lane must
30341 mirror the exact same pattern. */
30342 for (i = 0; i < 4; ++i)
30343 if (ipar[i] + 4 != ipar[i + 4])
30350 /* In the 128-bit case, we've full freedom in the placement of
30351 the elements from the source operand. */
30352 for (i = 0; i < nelt; ++i)
30353 mask |= ipar[i] << (i * (nelt / 2));
30357 gcc_unreachable ();
30360 /* Make sure success has a non-zero value by adding one. */
30364 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
30365 the expansion functions to turn the parallel back into a mask.
30366 The return value is 0 for no match and the imm8+1 for a match. */
30369 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
30371 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
30373 unsigned char ipar[8];
30375 if (XVECLEN (par, 0) != (int) nelt)
30378 /* Validate that all of the elements are constants, and not totally
30379 out of range. Copy the data into an integral array to make the
30380 subsequent checks easier. */
30381 for (i = 0; i < nelt; ++i)
30383 rtx er = XVECEXP (par, 0, i);
30384 unsigned HOST_WIDE_INT ei;
30386 if (!CONST_INT_P (er))
30389 if (ei >= 2 * nelt)
30394 /* Validate that the halves of the permute are halves. */
30395 for (i = 0; i < nelt2 - 1; ++i)
30396 if (ipar[i] + 1 != ipar[i + 1])
30398 for (i = nelt2; i < nelt - 1; ++i)
30399 if (ipar[i] + 1 != ipar[i + 1])
30402 /* Reconstruct the mask. */
30403 for (i = 0; i < 2; ++i)
30405 unsigned e = ipar[i * nelt2];
30409 mask |= e << (i * 4);
30412 /* Make sure success has a non-zero value by adding one. */
30416 /* Store OPERAND to the memory after reload is completed. This means
30417 that we can't easily use assign_stack_local. */
30419 ix86_force_to_memory (enum machine_mode mode, rtx operand)
30423 gcc_assert (reload_completed);
30424 if (ix86_using_red_zone ())
30426 result = gen_rtx_MEM (mode,
30427 gen_rtx_PLUS (Pmode,
30429 GEN_INT (-RED_ZONE_SIZE)));
30430 emit_move_insn (result, operand);
30432 else if (TARGET_64BIT)
30438 operand = gen_lowpart (DImode, operand);
30442 gen_rtx_SET (VOIDmode,
30443 gen_rtx_MEM (DImode,
30444 gen_rtx_PRE_DEC (DImode,
30445 stack_pointer_rtx)),
30449 gcc_unreachable ();
30451 result = gen_rtx_MEM (mode, stack_pointer_rtx);
30460 split_double_mode (mode, &operand, 1, operands, operands + 1);
30462 gen_rtx_SET (VOIDmode,
30463 gen_rtx_MEM (SImode,
30464 gen_rtx_PRE_DEC (Pmode,
30465 stack_pointer_rtx)),
30468 gen_rtx_SET (VOIDmode,
30469 gen_rtx_MEM (SImode,
30470 gen_rtx_PRE_DEC (Pmode,
30471 stack_pointer_rtx)),
30476 /* Store HImodes as SImodes. */
30477 operand = gen_lowpart (SImode, operand);
30481 gen_rtx_SET (VOIDmode,
30482 gen_rtx_MEM (GET_MODE (operand),
30483 gen_rtx_PRE_DEC (SImode,
30484 stack_pointer_rtx)),
30488 gcc_unreachable ();
30490 result = gen_rtx_MEM (mode, stack_pointer_rtx);
30495 /* Free operand from the memory. */
30497 ix86_free_from_memory (enum machine_mode mode)
30499 if (!ix86_using_red_zone ())
30503 if (mode == DImode || TARGET_64BIT)
30507 /* Use LEA to deallocate stack space. In peephole2 it will be converted
30508 to pop or add instruction if registers are available. */
30509 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
30510 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
30515 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
30517 Put float CONST_DOUBLE in the constant pool instead of fp regs.
30518 QImode must go into class Q_REGS.
30519 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
30520 movdf to do mem-to-mem moves through integer regs. */
30523 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
30525 enum machine_mode mode = GET_MODE (x);
30527 /* We're only allowed to return a subclass of CLASS. Many of the
30528 following checks fail for NO_REGS, so eliminate that early. */
30529 if (regclass == NO_REGS)
30532 /* All classes can load zeros. */
30533 if (x == CONST0_RTX (mode))
30536 /* Force constants into memory if we are loading a (nonzero) constant into
30537 an MMX or SSE register. This is because there are no MMX/SSE instructions
30538 to load from a constant. */
30540 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
30543 /* Prefer SSE regs only, if we can use them for math. */
30544 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
30545 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
30547 /* Floating-point constants need more complex checks. */
30548 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
30550 /* General regs can load everything. */
30551 if (reg_class_subset_p (regclass, GENERAL_REGS))
30554 /* Floats can load 0 and 1 plus some others. Note that we eliminated
30555 zero above. We only want to wind up preferring 80387 registers if
30556 we plan on doing computation with them. */
30558 && standard_80387_constant_p (x) > 0)
30560 /* Limit class to non-sse. */
30561 if (regclass == FLOAT_SSE_REGS)
30563 if (regclass == FP_TOP_SSE_REGS)
30565 if (regclass == FP_SECOND_SSE_REGS)
30566 return FP_SECOND_REG;
30567 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
30574 /* Generally when we see PLUS here, it's the function invariant
30575 (plus soft-fp const_int). Which can only be computed into general
30577 if (GET_CODE (x) == PLUS)
30578 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
30580 /* QImode constants are easy to load, but non-constant QImode data
30581 must go into Q_REGS. */
30582 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
30584 if (reg_class_subset_p (regclass, Q_REGS))
30586 if (reg_class_subset_p (Q_REGS, regclass))
30594 /* Discourage putting floating-point values in SSE registers unless
30595 SSE math is being used, and likewise for the 387 registers. */
30597 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
30599 enum machine_mode mode = GET_MODE (x);
30601 /* Restrict the output reload class to the register bank that we are doing
30602 math on. If we would like not to return a subset of CLASS, reject this
30603 alternative: if reload cannot do this, it will still use its choice. */
30604 mode = GET_MODE (x);
30605 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
30606 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
30608 if (X87_FLOAT_MODE_P (mode))
30610 if (regclass == FP_TOP_SSE_REGS)
30612 else if (regclass == FP_SECOND_SSE_REGS)
30613 return FP_SECOND_REG;
30615 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
30622 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
30623 enum machine_mode mode, secondary_reload_info *sri)
30625 /* Double-word spills from general registers to non-offsettable memory
30626 references (zero-extended addresses) require special handling. */
30629 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
30630 && rclass == GENERAL_REGS
30631 && !offsettable_memref_p (x))
30634 ? CODE_FOR_reload_noff_load
30635 : CODE_FOR_reload_noff_store);
30636 /* Add the cost of moving address to a temporary. */
30637 sri->extra_cost = 1;
30642 /* QImode spills from non-QI registers require
30643 intermediate register on 32bit targets. */
30645 && !in_p && mode == QImode
30646 && (rclass == GENERAL_REGS
30647 || rclass == LEGACY_REGS
30648 || rclass == INDEX_REGS))
30657 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
30658 regno = true_regnum (x);
30660 /* Return Q_REGS if the operand is in memory. */
30665 /* This condition handles corner case where an expression involving
30666 pointers gets vectorized. We're trying to use the address of a
30667 stack slot as a vector initializer.
30669 (set (reg:V2DI 74 [ vect_cst_.2 ])
30670 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
30672 Eventually frame gets turned into sp+offset like this:
30674 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30675 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30676 (const_int 392 [0x188]))))
30678 That later gets turned into:
30680 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30681 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30682 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
30684 We'll have the following reload recorded:
30686 Reload 0: reload_in (DI) =
30687 (plus:DI (reg/f:DI 7 sp)
30688 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
30689 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30690 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
30691 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
30692 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30693 reload_reg_rtx: (reg:V2DI 22 xmm1)
30695 Which isn't going to work since SSE instructions can't handle scalar
30696 additions. Returning GENERAL_REGS forces the addition into integer
30697 register and reload can handle subsequent reloads without problems. */
30699 if (in_p && GET_CODE (x) == PLUS
30700 && SSE_CLASS_P (rclass)
30701 && SCALAR_INT_MODE_P (mode))
30702 return GENERAL_REGS;
30707 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
30710 ix86_class_likely_spilled_p (reg_class_t rclass)
30721 case SSE_FIRST_REG:
30723 case FP_SECOND_REG:
30733 /* If we are copying between general and FP registers, we need a memory
30734 location. The same is true for SSE and MMX registers.
30736 To optimize register_move_cost performance, allow inline variant.
30738 The macro can't work reliably when one of the CLASSES is class containing
30739 registers from multiple units (SSE, MMX, integer). We avoid this by never
30740 combining those units in single alternative in the machine description.
30741 Ensure that this constraint holds to avoid unexpected surprises.
30743 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
30744 enforce these sanity checks. */
30747 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
30748 enum machine_mode mode, int strict)
30750 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
30751 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
30752 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
30753 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
30754 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
30755 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
30757 gcc_assert (!strict);
30761 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
30764 /* ??? This is a lie. We do have moves between mmx/general, and for
30765 mmx/sse2. But by saying we need secondary memory we discourage the
30766 register allocator from using the mmx registers unless needed. */
30767 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
30770 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
30772 /* SSE1 doesn't have any direct moves from other classes. */
30776 /* If the target says that inter-unit moves are more expensive
30777 than moving through memory, then don't generate them. */
30778 if (!TARGET_INTER_UNIT_MOVES)
30781 /* Between SSE and general, we have moves no larger than word size. */
30782 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
30790 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
30791 enum machine_mode mode, int strict)
30793 return inline_secondary_memory_needed (class1, class2, mode, strict);
30796 /* Implement the TARGET_CLASS_MAX_NREGS hook.
30798 On the 80386, this is the size of MODE in words,
30799 except in the FP regs, where a single reg is always enough. */
30801 static unsigned char
30802 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
30804 if (MAYBE_INTEGER_CLASS_P (rclass))
30806 if (mode == XFmode)
30807 return (TARGET_64BIT ? 2 : 3);
30808 else if (mode == XCmode)
30809 return (TARGET_64BIT ? 4 : 6);
30811 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
30815 if (COMPLEX_MODE_P (mode))
30822 /* Return true if the registers in CLASS cannot represent the change from
30823 modes FROM to TO. */
30826 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
30827 enum reg_class regclass)
30832 /* x87 registers can't do subreg at all, as all values are reformatted
30833 to extended precision. */
30834 if (MAYBE_FLOAT_CLASS_P (regclass))
30837 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
30839 /* Vector registers do not support QI or HImode loads. If we don't
30840 disallow a change to these modes, reload will assume it's ok to
30841 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
30842 the vec_dupv4hi pattern. */
30843 if (GET_MODE_SIZE (from) < 4)
30846 /* Vector registers do not support subreg with nonzero offsets, which
30847 are otherwise valid for integer registers. Since we can't see
30848 whether we have a nonzero offset from here, prohibit all
30849 nonparadoxical subregs changing size. */
30850 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
30857 /* Return the cost of moving data of mode M between a
30858 register and memory. A value of 2 is the default; this cost is
30859 relative to those in `REGISTER_MOVE_COST'.
30861 This function is used extensively by register_move_cost that is used to
30862 build tables at startup. Make it inline in this case.
30863 When IN is 2, return maximum of in and out move cost.
30865 If moving between registers and memory is more expensive than
30866 between two registers, you should define this macro to express the
30869 Model also increased moving costs of QImode registers in non
30873 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
30877 if (FLOAT_CLASS_P (regclass))
30895 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
30896 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
30898 if (SSE_CLASS_P (regclass))
30901 switch (GET_MODE_SIZE (mode))
30916 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
30917 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
30919 if (MMX_CLASS_P (regclass))
30922 switch (GET_MODE_SIZE (mode))
30934 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
30935 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
30937 switch (GET_MODE_SIZE (mode))
30940 if (Q_CLASS_P (regclass) || TARGET_64BIT)
30943 return ix86_cost->int_store[0];
30944 if (TARGET_PARTIAL_REG_DEPENDENCY
30945 && optimize_function_for_speed_p (cfun))
30946 cost = ix86_cost->movzbl_load;
30948 cost = ix86_cost->int_load[0];
30950 return MAX (cost, ix86_cost->int_store[0]);
30956 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
30958 return ix86_cost->movzbl_load;
30960 return ix86_cost->int_store[0] + 4;
30965 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
30966 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
30968 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
30969 if (mode == TFmode)
30972 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
30974 cost = ix86_cost->int_load[2];
30976 cost = ix86_cost->int_store[2];
30977 return (cost * (((int) GET_MODE_SIZE (mode)
30978 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
30983 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
30986 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
30990 /* Return the cost of moving data from a register in class CLASS1 to
30991 one in class CLASS2.
30993 It is not required that the cost always equal 2 when FROM is the same as TO;
30994 on some machines it is expensive to move between registers if they are not
30995 general registers. */
30998 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
30999 reg_class_t class2_i)
31001 enum reg_class class1 = (enum reg_class) class1_i;
31002 enum reg_class class2 = (enum reg_class) class2_i;
31004 /* In case we require secondary memory, compute cost of the store followed
31005 by load. In order to avoid bad register allocation choices, we need
31006 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
31008 if (inline_secondary_memory_needed (class1, class2, mode, 0))
31012 cost += inline_memory_move_cost (mode, class1, 2);
31013 cost += inline_memory_move_cost (mode, class2, 2);
31015 /* In case of copying from general_purpose_register we may emit multiple
31016 stores followed by single load causing memory size mismatch stall.
31017 Count this as arbitrarily high cost of 20. */
31018 if (targetm.class_max_nregs (class1, mode)
31019 > targetm.class_max_nregs (class2, mode))
31022 /* In the case of FP/MMX moves, the registers actually overlap, and we
31023 have to switch modes in order to treat them differently. */
31024 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
31025 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
31031 /* Moves between SSE/MMX and integer unit are expensive. */
31032 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
31033 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
31035 /* ??? By keeping returned value relatively high, we limit the number
31036 of moves between integer and MMX/SSE registers for all targets.
31037 Additionally, high value prevents problem with x86_modes_tieable_p(),
31038 where integer modes in MMX/SSE registers are not tieable
31039 because of missing QImode and HImode moves to, from or between
31040 MMX/SSE registers. */
31041 return MAX (8, ix86_cost->mmxsse_to_integer);
31043 if (MAYBE_FLOAT_CLASS_P (class1))
31044 return ix86_cost->fp_move;
31045 if (MAYBE_SSE_CLASS_P (class1))
31046 return ix86_cost->sse_move;
31047 if (MAYBE_MMX_CLASS_P (class1))
31048 return ix86_cost->mmx_move;
31052 /* Return TRUE if hard register REGNO can hold a value of machine-mode
31056 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
31058 /* Flags and only flags can only hold CCmode values. */
31059 if (CC_REGNO_P (regno))
31060 return GET_MODE_CLASS (mode) == MODE_CC;
31061 if (GET_MODE_CLASS (mode) == MODE_CC
31062 || GET_MODE_CLASS (mode) == MODE_RANDOM
31063 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
31065 if (FP_REGNO_P (regno))
31066 return VALID_FP_MODE_P (mode);
31067 if (SSE_REGNO_P (regno))
31069 /* We implement the move patterns for all vector modes into and
31070 out of SSE registers, even when no operation instructions
31071 are available. OImode move is available only when AVX is
31073 return ((TARGET_AVX && mode == OImode)
31074 || VALID_AVX256_REG_MODE (mode)
31075 || VALID_SSE_REG_MODE (mode)
31076 || VALID_SSE2_REG_MODE (mode)
31077 || VALID_MMX_REG_MODE (mode)
31078 || VALID_MMX_REG_MODE_3DNOW (mode));
31080 if (MMX_REGNO_P (regno))
31082 /* We implement the move patterns for 3DNOW modes even in MMX mode,
31083 so if the register is available at all, then we can move data of
31084 the given mode into or out of it. */
31085 return (VALID_MMX_REG_MODE (mode)
31086 || VALID_MMX_REG_MODE_3DNOW (mode));
31089 if (mode == QImode)
31091 /* Take care for QImode values - they can be in non-QI regs,
31092 but then they do cause partial register stalls. */
31093 if (regno <= BX_REG || TARGET_64BIT)
31095 if (!TARGET_PARTIAL_REG_STALL)
31097 return !can_create_pseudo_p ();
31099 /* We handle both integer and floats in the general purpose registers. */
31100 else if (VALID_INT_MODE_P (mode))
31102 else if (VALID_FP_MODE_P (mode))
31104 else if (VALID_DFP_MODE_P (mode))
31106 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
31107 on to use that value in smaller contexts, this can easily force a
31108 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
31109 supporting DImode, allow it. */
31110 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
31116 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
31117 tieable integer mode. */
31120 ix86_tieable_integer_mode_p (enum machine_mode mode)
31129 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
31132 return TARGET_64BIT;
31139 /* Return true if MODE1 is accessible in a register that can hold MODE2
31140 without copying. That is, all register classes that can hold MODE2
31141 can also hold MODE1. */
31144 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
31146 if (mode1 == mode2)
31149 if (ix86_tieable_integer_mode_p (mode1)
31150 && ix86_tieable_integer_mode_p (mode2))
31153 /* MODE2 being XFmode implies fp stack or general regs, which means we
31154 can tie any smaller floating point modes to it. Note that we do not
31155 tie this with TFmode. */
31156 if (mode2 == XFmode)
31157 return mode1 == SFmode || mode1 == DFmode;
31159 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
31160 that we can tie it with SFmode. */
31161 if (mode2 == DFmode)
31162 return mode1 == SFmode;
31164 /* If MODE2 is only appropriate for an SSE register, then tie with
31165 any other mode acceptable to SSE registers. */
31166 if (GET_MODE_SIZE (mode2) == 16
31167 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
31168 return (GET_MODE_SIZE (mode1) == 16
31169 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
31171 /* If MODE2 is appropriate for an MMX register, then tie
31172 with any other mode acceptable to MMX registers. */
31173 if (GET_MODE_SIZE (mode2) == 8
31174 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
31175 return (GET_MODE_SIZE (mode1) == 8
31176 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
31181 /* Compute a (partial) cost for rtx X. Return true if the complete
31182 cost has been computed, and false if subexpressions should be
31183 scanned. In either case, *TOTAL contains the cost result. */
31186 ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
31189 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
31190 enum machine_mode mode = GET_MODE (x);
31191 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
31199 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
31201 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
31203 else if (flag_pic && SYMBOLIC_CONST (x)
31205 || (!GET_CODE (x) != LABEL_REF
31206 && (GET_CODE (x) != SYMBOL_REF
31207 || !SYMBOL_REF_LOCAL_P (x)))))
31214 if (mode == VOIDmode)
31217 switch (standard_80387_constant_p (x))
31222 default: /* Other constants */
31227 /* Start with (MEM (SYMBOL_REF)), since that's where
31228 it'll probably end up. Add a penalty for size. */
31229 *total = (COSTS_N_INSNS (1)
31230 + (flag_pic != 0 && !TARGET_64BIT)
31231 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
31237 /* The zero extensions is often completely free on x86_64, so make
31238 it as cheap as possible. */
31239 if (TARGET_64BIT && mode == DImode
31240 && GET_MODE (XEXP (x, 0)) == SImode)
31242 else if (TARGET_ZERO_EXTEND_WITH_AND)
31243 *total = cost->add;
31245 *total = cost->movzx;
31249 *total = cost->movsx;
31253 if (CONST_INT_P (XEXP (x, 1))
31254 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
31256 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
31259 *total = cost->add;
31262 if ((value == 2 || value == 3)
31263 && cost->lea <= cost->shift_const)
31265 *total = cost->lea;
31275 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
31277 if (CONST_INT_P (XEXP (x, 1)))
31279 if (INTVAL (XEXP (x, 1)) > 32)
31280 *total = cost->shift_const + COSTS_N_INSNS (2);
31282 *total = cost->shift_const * 2;
31286 if (GET_CODE (XEXP (x, 1)) == AND)
31287 *total = cost->shift_var * 2;
31289 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
31294 if (CONST_INT_P (XEXP (x, 1)))
31295 *total = cost->shift_const;
31297 *total = cost->shift_var;
31305 gcc_assert (FLOAT_MODE_P (mode));
31306 gcc_assert (TARGET_FMA || TARGET_FMA4);
31308 /* ??? SSE scalar/vector cost should be used here. */
31309 /* ??? Bald assumption that fma has the same cost as fmul. */
31310 *total = cost->fmul;
31311 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
31313 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
31315 if (GET_CODE (sub) == NEG)
31316 sub = XEXP (sub, 0);
31317 *total += rtx_cost (sub, FMA, 0, speed);
31320 if (GET_CODE (sub) == NEG)
31321 sub = XEXP (sub, 0);
31322 *total += rtx_cost (sub, FMA, 2, speed);
31327 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31329 /* ??? SSE scalar cost should be used here. */
31330 *total = cost->fmul;
31333 else if (X87_FLOAT_MODE_P (mode))
31335 *total = cost->fmul;
31338 else if (FLOAT_MODE_P (mode))
31340 /* ??? SSE vector cost should be used here. */
31341 *total = cost->fmul;
31346 rtx op0 = XEXP (x, 0);
31347 rtx op1 = XEXP (x, 1);
31349 if (CONST_INT_P (XEXP (x, 1)))
31351 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
31352 for (nbits = 0; value != 0; value &= value - 1)
31356 /* This is arbitrary. */
31359 /* Compute costs correctly for widening multiplication. */
31360 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
31361 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
31362 == GET_MODE_SIZE (mode))
31364 int is_mulwiden = 0;
31365 enum machine_mode inner_mode = GET_MODE (op0);
31367 if (GET_CODE (op0) == GET_CODE (op1))
31368 is_mulwiden = 1, op1 = XEXP (op1, 0);
31369 else if (CONST_INT_P (op1))
31371 if (GET_CODE (op0) == SIGN_EXTEND)
31372 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
31375 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
31379 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
31382 *total = (cost->mult_init[MODE_INDEX (mode)]
31383 + nbits * cost->mult_bit
31384 + rtx_cost (op0, outer_code, opno, speed)
31385 + rtx_cost (op1, outer_code, opno, speed));
31394 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31395 /* ??? SSE cost should be used here. */
31396 *total = cost->fdiv;
31397 else if (X87_FLOAT_MODE_P (mode))
31398 *total = cost->fdiv;
31399 else if (FLOAT_MODE_P (mode))
31400 /* ??? SSE vector cost should be used here. */
31401 *total = cost->fdiv;
31403 *total = cost->divide[MODE_INDEX (mode)];
31407 if (GET_MODE_CLASS (mode) == MODE_INT
31408 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
31410 if (GET_CODE (XEXP (x, 0)) == PLUS
31411 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
31412 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
31413 && CONSTANT_P (XEXP (x, 1)))
31415 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
31416 if (val == 2 || val == 4 || val == 8)
31418 *total = cost->lea;
31419 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
31420 outer_code, opno, speed);
31421 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
31422 outer_code, opno, speed);
31423 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31427 else if (GET_CODE (XEXP (x, 0)) == MULT
31428 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
31430 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
31431 if (val == 2 || val == 4 || val == 8)
31433 *total = cost->lea;
31434 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
31435 outer_code, opno, speed);
31436 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31440 else if (GET_CODE (XEXP (x, 0)) == PLUS)
31442 *total = cost->lea;
31443 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
31444 outer_code, opno, speed);
31445 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
31446 outer_code, opno, speed);
31447 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31454 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31456 /* ??? SSE cost should be used here. */
31457 *total = cost->fadd;
31460 else if (X87_FLOAT_MODE_P (mode))
31462 *total = cost->fadd;
31465 else if (FLOAT_MODE_P (mode))
31467 /* ??? SSE vector cost should be used here. */
31468 *total = cost->fadd;
31476 if (!TARGET_64BIT && mode == DImode)
31478 *total = (cost->add * 2
31479 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
31480 << (GET_MODE (XEXP (x, 0)) != DImode))
31481 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
31482 << (GET_MODE (XEXP (x, 1)) != DImode)));
31488 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31490 /* ??? SSE cost should be used here. */
31491 *total = cost->fchs;
31494 else if (X87_FLOAT_MODE_P (mode))
31496 *total = cost->fchs;
31499 else if (FLOAT_MODE_P (mode))
31501 /* ??? SSE vector cost should be used here. */
31502 *total = cost->fchs;
31508 if (!TARGET_64BIT && mode == DImode)
31509 *total = cost->add * 2;
31511 *total = cost->add;
31515 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
31516 && XEXP (XEXP (x, 0), 1) == const1_rtx
31517 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
31518 && XEXP (x, 1) == const0_rtx)
31520 /* This kind of construct is implemented using test[bwl].
31521 Treat it as if we had an AND. */
31522 *total = (cost->add
31523 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
31524 + rtx_cost (const1_rtx, outer_code, opno, speed));
31530 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
31535 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31536 /* ??? SSE cost should be used here. */
31537 *total = cost->fabs;
31538 else if (X87_FLOAT_MODE_P (mode))
31539 *total = cost->fabs;
31540 else if (FLOAT_MODE_P (mode))
31541 /* ??? SSE vector cost should be used here. */
31542 *total = cost->fabs;
31546 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31547 /* ??? SSE cost should be used here. */
31548 *total = cost->fsqrt;
31549 else if (X87_FLOAT_MODE_P (mode))
31550 *total = cost->fsqrt;
31551 else if (FLOAT_MODE_P (mode))
31552 /* ??? SSE vector cost should be used here. */
31553 *total = cost->fsqrt;
31557 if (XINT (x, 1) == UNSPEC_TP)
31564 case VEC_DUPLICATE:
31565 /* ??? Assume all of these vector manipulation patterns are
31566 recognizable. In which case they all pretty much have the
31568 *total = COSTS_N_INSNS (1);
31578 static int current_machopic_label_num;
31580 /* Given a symbol name and its associated stub, write out the
31581 definition of the stub. */
31584 machopic_output_stub (FILE *file, const char *symb, const char *stub)
31586 unsigned int length;
31587 char *binder_name, *symbol_name, lazy_ptr_name[32];
31588 int label = ++current_machopic_label_num;
31590 /* For 64-bit we shouldn't get here. */
31591 gcc_assert (!TARGET_64BIT);
31593 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
31594 symb = targetm.strip_name_encoding (symb);
31596 length = strlen (stub);
31597 binder_name = XALLOCAVEC (char, length + 32);
31598 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
31600 length = strlen (symb);
31601 symbol_name = XALLOCAVEC (char, length + 32);
31602 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
31604 sprintf (lazy_ptr_name, "L%d$lz", label);
31606 if (MACHOPIC_ATT_STUB)
31607 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
31608 else if (MACHOPIC_PURE)
31609 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
31611 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
31613 fprintf (file, "%s:\n", stub);
31614 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31616 if (MACHOPIC_ATT_STUB)
31618 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
31620 else if (MACHOPIC_PURE)
31623 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31624 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
31625 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
31626 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
31627 label, lazy_ptr_name, label);
31628 fprintf (file, "\tjmp\t*%%ecx\n");
31631 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
31633 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
31634 it needs no stub-binding-helper. */
31635 if (MACHOPIC_ATT_STUB)
31638 fprintf (file, "%s:\n", binder_name);
31642 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
31643 fprintf (file, "\tpushl\t%%ecx\n");
31646 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
31648 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
31650 /* N.B. Keep the correspondence of these
31651 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
31652 old-pic/new-pic/non-pic stubs; altering this will break
31653 compatibility with existing dylibs. */
31656 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31657 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
31660 /* 16-byte -mdynamic-no-pic stub. */
31661 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
31663 fprintf (file, "%s:\n", lazy_ptr_name);
31664 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31665 fprintf (file, ASM_LONG "%s\n", binder_name);
31667 #endif /* TARGET_MACHO */
31669 /* Order the registers for register allocator. */
31672 x86_order_regs_for_local_alloc (void)
31677 /* First allocate the local general purpose registers. */
31678 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
31679 if (GENERAL_REGNO_P (i) && call_used_regs[i])
31680 reg_alloc_order [pos++] = i;
31682 /* Global general purpose registers. */
31683 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
31684 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
31685 reg_alloc_order [pos++] = i;
31687 /* x87 registers come first in case we are doing FP math
31689 if (!TARGET_SSE_MATH)
31690 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
31691 reg_alloc_order [pos++] = i;
31693 /* SSE registers. */
31694 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
31695 reg_alloc_order [pos++] = i;
31696 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
31697 reg_alloc_order [pos++] = i;
31699 /* x87 registers. */
31700 if (TARGET_SSE_MATH)
31701 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
31702 reg_alloc_order [pos++] = i;
31704 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
31705 reg_alloc_order [pos++] = i;
31707 /* Initialize the rest of array as we do not allocate some registers
31709 while (pos < FIRST_PSEUDO_REGISTER)
31710 reg_alloc_order [pos++] = 0;
31713 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
31714 in struct attribute_spec handler. */
31716 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
31718 int flags ATTRIBUTE_UNUSED,
31719 bool *no_add_attrs)
31721 if (TREE_CODE (*node) != FUNCTION_TYPE
31722 && TREE_CODE (*node) != METHOD_TYPE
31723 && TREE_CODE (*node) != FIELD_DECL
31724 && TREE_CODE (*node) != TYPE_DECL)
31726 warning (OPT_Wattributes, "%qE attribute only applies to functions",
31728 *no_add_attrs = true;
31733 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
31735 *no_add_attrs = true;
31738 if (is_attribute_p ("callee_pop_aggregate_return", name))
31742 cst = TREE_VALUE (args);
31743 if (TREE_CODE (cst) != INTEGER_CST)
31745 warning (OPT_Wattributes,
31746 "%qE attribute requires an integer constant argument",
31748 *no_add_attrs = true;
31750 else if (compare_tree_int (cst, 0) != 0
31751 && compare_tree_int (cst, 1) != 0)
31753 warning (OPT_Wattributes,
31754 "argument to %qE attribute is neither zero, nor one",
31756 *no_add_attrs = true;
31765 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
31766 struct attribute_spec.handler. */
31768 ix86_handle_abi_attribute (tree *node, tree name,
31769 tree args ATTRIBUTE_UNUSED,
31770 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
31772 if (TREE_CODE (*node) != FUNCTION_TYPE
31773 && TREE_CODE (*node) != METHOD_TYPE
31774 && TREE_CODE (*node) != FIELD_DECL
31775 && TREE_CODE (*node) != TYPE_DECL)
31777 warning (OPT_Wattributes, "%qE attribute only applies to functions",
31779 *no_add_attrs = true;
31783 /* Can combine regparm with all attributes but fastcall. */
31784 if (is_attribute_p ("ms_abi", name))
31786 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
31788 error ("ms_abi and sysv_abi attributes are not compatible");
31793 else if (is_attribute_p ("sysv_abi", name))
31795 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
31797 error ("ms_abi and sysv_abi attributes are not compatible");
31806 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
31807 struct attribute_spec.handler. */
31809 ix86_handle_struct_attribute (tree *node, tree name,
31810 tree args ATTRIBUTE_UNUSED,
31811 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
31814 if (DECL_P (*node))
31816 if (TREE_CODE (*node) == TYPE_DECL)
31817 type = &TREE_TYPE (*node);
31822 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
31823 || TREE_CODE (*type) == UNION_TYPE)))
31825 warning (OPT_Wattributes, "%qE attribute ignored",
31827 *no_add_attrs = true;
31830 else if ((is_attribute_p ("ms_struct", name)
31831 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
31832 || ((is_attribute_p ("gcc_struct", name)
31833 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
31835 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
31837 *no_add_attrs = true;
31844 ix86_handle_fndecl_attribute (tree *node, tree name,
31845 tree args ATTRIBUTE_UNUSED,
31846 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
31848 if (TREE_CODE (*node) != FUNCTION_DECL)
31850 warning (OPT_Wattributes, "%qE attribute only applies to functions",
31852 *no_add_attrs = true;
31858 ix86_ms_bitfield_layout_p (const_tree record_type)
31860 return ((TARGET_MS_BITFIELD_LAYOUT
31861 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
31862 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
31865 /* Returns an expression indicating where the this parameter is
31866 located on entry to the FUNCTION. */
31869 x86_this_parameter (tree function)
31871 tree type = TREE_TYPE (function);
31872 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
31877 const int *parm_regs;
31879 if (ix86_function_type_abi (type) == MS_ABI)
31880 parm_regs = x86_64_ms_abi_int_parameter_registers;
31882 parm_regs = x86_64_int_parameter_registers;
31883 return gen_rtx_REG (DImode, parm_regs[aggr]);
31886 nregs = ix86_function_regparm (type, function);
31888 if (nregs > 0 && !stdarg_p (type))
31891 unsigned int ccvt = ix86_get_callcvt (type);
31893 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
31894 regno = aggr ? DX_REG : CX_REG;
31895 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
31899 return gen_rtx_MEM (SImode,
31900 plus_constant (stack_pointer_rtx, 4));
31909 return gen_rtx_MEM (SImode,
31910 plus_constant (stack_pointer_rtx, 4));
31913 return gen_rtx_REG (SImode, regno);
31916 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
31919 /* Determine whether x86_output_mi_thunk can succeed. */
31922 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
31923 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
31924 HOST_WIDE_INT vcall_offset, const_tree function)
31926 /* 64-bit can handle anything. */
31930 /* For 32-bit, everything's fine if we have one free register. */
31931 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
31934 /* Need a free register for vcall_offset. */
31938 /* Need a free register for GOT references. */
31939 if (flag_pic && !targetm.binds_local_p (function))
31942 /* Otherwise ok. */
31946 /* Output the assembler code for a thunk function. THUNK_DECL is the
31947 declaration for the thunk function itself, FUNCTION is the decl for
31948 the target function. DELTA is an immediate constant offset to be
31949 added to THIS. If VCALL_OFFSET is nonzero, the word at
31950 *(*this + vcall_offset) should be added to THIS. */
31953 x86_output_mi_thunk (FILE *file,
31954 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
31955 HOST_WIDE_INT vcall_offset, tree function)
31957 rtx this_param = x86_this_parameter (function);
31958 rtx this_reg, tmp, fnaddr;
31960 emit_note (NOTE_INSN_PROLOGUE_END);
31962 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
31963 pull it in now and let DELTA benefit. */
31964 if (REG_P (this_param))
31965 this_reg = this_param;
31966 else if (vcall_offset)
31968 /* Put the this parameter into %eax. */
31969 this_reg = gen_rtx_REG (Pmode, AX_REG);
31970 emit_move_insn (this_reg, this_param);
31973 this_reg = NULL_RTX;
31975 /* Adjust the this parameter by a fixed constant. */
31978 rtx delta_rtx = GEN_INT (delta);
31979 rtx delta_dst = this_reg ? this_reg : this_param;
31983 if (!x86_64_general_operand (delta_rtx, Pmode))
31985 tmp = gen_rtx_REG (Pmode, R10_REG);
31986 emit_move_insn (tmp, delta_rtx);
31991 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
31994 /* Adjust the this parameter by a value stored in the vtable. */
31997 rtx vcall_addr, vcall_mem, this_mem;
31998 unsigned int tmp_regno;
32001 tmp_regno = R10_REG;
32004 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
32005 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
32006 tmp_regno = AX_REG;
32008 tmp_regno = CX_REG;
32010 tmp = gen_rtx_REG (Pmode, tmp_regno);
32012 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
32013 if (Pmode != ptr_mode)
32014 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
32015 emit_move_insn (tmp, this_mem);
32017 /* Adjust the this parameter. */
32018 vcall_addr = plus_constant (tmp, vcall_offset);
32020 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
32022 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
32023 emit_move_insn (tmp2, GEN_INT (vcall_offset));
32024 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
32027 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
32028 if (Pmode != ptr_mode)
32029 emit_insn (gen_addsi_1_zext (this_reg,
32030 gen_rtx_REG (ptr_mode,
32034 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
32037 /* If necessary, drop THIS back to its stack slot. */
32038 if (this_reg && this_reg != this_param)
32039 emit_move_insn (this_param, this_reg);
32041 fnaddr = XEXP (DECL_RTL (function), 0);
32044 if (!flag_pic || targetm.binds_local_p (function)
32045 || cfun->machine->call_abi == MS_ABI)
32049 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
32050 tmp = gen_rtx_CONST (Pmode, tmp);
32051 fnaddr = gen_rtx_MEM (Pmode, tmp);
32056 if (!flag_pic || targetm.binds_local_p (function))
32059 else if (TARGET_MACHO)
32061 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
32062 fnaddr = XEXP (fnaddr, 0);
32064 #endif /* TARGET_MACHO */
32067 tmp = gen_rtx_REG (Pmode, CX_REG);
32068 output_set_got (tmp, NULL_RTX);
32070 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
32071 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
32072 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
32076 /* Our sibling call patterns do not allow memories, because we have no
32077 predicate that can distinguish between frame and non-frame memory.
32078 For our purposes here, we can get away with (ab)using a jump pattern,
32079 because we're going to do no optimization. */
32080 if (MEM_P (fnaddr))
32081 emit_jump_insn (gen_indirect_jump (fnaddr));
32084 tmp = gen_rtx_MEM (QImode, fnaddr);
32085 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
32086 tmp = emit_call_insn (tmp);
32087 SIBLING_CALL_P (tmp) = 1;
32091 /* Emit just enough of rest_of_compilation to get the insns emitted.
32092 Note that use_thunk calls assemble_start_function et al. */
32093 tmp = get_insns ();
32094 insn_locators_alloc ();
32095 shorten_branches (tmp);
32096 final_start_function (tmp, file, 1);
32097 final (tmp, file, 1);
32098 final_end_function ();
32102 x86_file_start (void)
32104 default_file_start ();
32106 darwin_file_start ();
32108 if (X86_FILE_START_VERSION_DIRECTIVE)
32109 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
32110 if (X86_FILE_START_FLTUSED)
32111 fputs ("\t.global\t__fltused\n", asm_out_file);
32112 if (ix86_asm_dialect == ASM_INTEL)
32113 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
32117 x86_field_alignment (tree field, int computed)
32119 enum machine_mode mode;
32120 tree type = TREE_TYPE (field);
32122 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
32124 mode = TYPE_MODE (strip_array_types (type));
32125 if (mode == DFmode || mode == DCmode
32126 || GET_MODE_CLASS (mode) == MODE_INT
32127 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
32128 return MIN (32, computed);
32132 /* Output assembler code to FILE to increment profiler label # LABELNO
32133 for profiling a function entry. */
32135 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
32137 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
32142 #ifndef NO_PROFILE_COUNTERS
32143 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
32146 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
32147 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
32149 fprintf (file, "\tcall\t%s\n", mcount_name);
32153 #ifndef NO_PROFILE_COUNTERS
32154 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
32157 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
32161 #ifndef NO_PROFILE_COUNTERS
32162 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
32165 fprintf (file, "\tcall\t%s\n", mcount_name);
32169 /* We don't have exact information about the insn sizes, but we may assume
32170 quite safely that we are informed about all 1 byte insns and memory
32171 address sizes. This is enough to eliminate unnecessary padding in
32175 min_insn_size (rtx insn)
32179 if (!INSN_P (insn) || !active_insn_p (insn))
32182 /* Discard alignments we've emit and jump instructions. */
32183 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
32184 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
32186 if (JUMP_TABLE_DATA_P (insn))
32189 /* Important case - calls are always 5 bytes.
32190 It is common to have many calls in the row. */
32192 && symbolic_reference_mentioned_p (PATTERN (insn))
32193 && !SIBLING_CALL_P (insn))
32195 len = get_attr_length (insn);
32199 /* For normal instructions we rely on get_attr_length being exact,
32200 with a few exceptions. */
32201 if (!JUMP_P (insn))
32203 enum attr_type type = get_attr_type (insn);
32208 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
32209 || asm_noperands (PATTERN (insn)) >= 0)
32216 /* Otherwise trust get_attr_length. */
32220 l = get_attr_length_address (insn);
32221 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
32230 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32232 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
32236 ix86_avoid_jump_mispredicts (void)
32238 rtx insn, start = get_insns ();
32239 int nbytes = 0, njumps = 0;
32242 /* Look for all minimal intervals of instructions containing 4 jumps.
32243 The intervals are bounded by START and INSN. NBYTES is the total
32244 size of instructions in the interval including INSN and not including
32245 START. When the NBYTES is smaller than 16 bytes, it is possible
32246 that the end of START and INSN ends up in the same 16byte page.
32248 The smallest offset in the page INSN can start is the case where START
32249 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
32250 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
32252 for (insn = start; insn; insn = NEXT_INSN (insn))
32256 if (LABEL_P (insn))
32258 int align = label_to_alignment (insn);
32259 int max_skip = label_to_max_skip (insn);
32263 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
32264 already in the current 16 byte page, because otherwise
32265 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
32266 bytes to reach 16 byte boundary. */
32268 || (align <= 3 && max_skip != (1 << align) - 1))
32271 fprintf (dump_file, "Label %i with max_skip %i\n",
32272 INSN_UID (insn), max_skip);
32275 while (nbytes + max_skip >= 16)
32277 start = NEXT_INSN (start);
32278 if ((JUMP_P (start)
32279 && GET_CODE (PATTERN (start)) != ADDR_VEC
32280 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
32282 njumps--, isjump = 1;
32285 nbytes -= min_insn_size (start);
32291 min_size = min_insn_size (insn);
32292 nbytes += min_size;
32294 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
32295 INSN_UID (insn), min_size);
32297 && GET_CODE (PATTERN (insn)) != ADDR_VEC
32298 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
32306 start = NEXT_INSN (start);
32307 if ((JUMP_P (start)
32308 && GET_CODE (PATTERN (start)) != ADDR_VEC
32309 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
32311 njumps--, isjump = 1;
32314 nbytes -= min_insn_size (start);
32316 gcc_assert (njumps >= 0);
32318 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
32319 INSN_UID (start), INSN_UID (insn), nbytes);
32321 if (njumps == 3 && isjump && nbytes < 16)
32323 int padsize = 15 - nbytes + min_insn_size (insn);
32326 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
32327 INSN_UID (insn), padsize);
32328 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
32334 /* AMD Athlon works faster
32335 when RET is not destination of conditional jump or directly preceded
32336 by other jump instruction. We avoid the penalty by inserting NOP just
32337 before the RET instructions in such cases. */
32339 ix86_pad_returns (void)
32344 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
32346 basic_block bb = e->src;
32347 rtx ret = BB_END (bb);
32349 bool replace = false;
32351 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
32352 || optimize_bb_for_size_p (bb))
32354 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
32355 if (active_insn_p (prev) || LABEL_P (prev))
32357 if (prev && LABEL_P (prev))
32362 FOR_EACH_EDGE (e, ei, bb->preds)
32363 if (EDGE_FREQUENCY (e) && e->src->index >= 0
32364 && !(e->flags & EDGE_FALLTHRU))
32369 prev = prev_active_insn (ret);
32371 && ((JUMP_P (prev) && any_condjump_p (prev))
32374 /* Empty functions get branch mispredict even when
32375 the jump destination is not visible to us. */
32376 if (!prev && !optimize_function_for_size_p (cfun))
32381 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
32387 /* Count the minimum number of instructions in BB. Return 4 if the
32388 number of instructions >= 4. */
32391 ix86_count_insn_bb (basic_block bb)
32394 int insn_count = 0;
32396 /* Count number of instructions in this block. Return 4 if the number
32397 of instructions >= 4. */
32398 FOR_BB_INSNS (bb, insn)
32400 /* Only happen in exit blocks. */
32402 && ANY_RETURN_P (PATTERN (insn)))
32405 if (NONDEBUG_INSN_P (insn)
32406 && GET_CODE (PATTERN (insn)) != USE
32407 && GET_CODE (PATTERN (insn)) != CLOBBER)
32410 if (insn_count >= 4)
32419 /* Count the minimum number of instructions in code path in BB.
32420 Return 4 if the number of instructions >= 4. */
32423 ix86_count_insn (basic_block bb)
32427 int min_prev_count;
32429 /* Only bother counting instructions along paths with no
32430 more than 2 basic blocks between entry and exit. Given
32431 that BB has an edge to exit, determine if a predecessor
32432 of BB has an edge from entry. If so, compute the number
32433 of instructions in the predecessor block. If there
32434 happen to be multiple such blocks, compute the minimum. */
32435 min_prev_count = 4;
32436 FOR_EACH_EDGE (e, ei, bb->preds)
32439 edge_iterator prev_ei;
32441 if (e->src == ENTRY_BLOCK_PTR)
32443 min_prev_count = 0;
32446 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
32448 if (prev_e->src == ENTRY_BLOCK_PTR)
32450 int count = ix86_count_insn_bb (e->src);
32451 if (count < min_prev_count)
32452 min_prev_count = count;
32458 if (min_prev_count < 4)
32459 min_prev_count += ix86_count_insn_bb (bb);
32461 return min_prev_count;
32464 /* Pad short funtion to 4 instructions. */
32467 ix86_pad_short_function (void)
32472 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
32474 rtx ret = BB_END (e->src);
32475 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
32477 int insn_count = ix86_count_insn (e->src);
32479 /* Pad short function. */
32480 if (insn_count < 4)
32484 /* Find epilogue. */
32487 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
32488 insn = PREV_INSN (insn);
32493 /* Two NOPs count as one instruction. */
32494 insn_count = 2 * (4 - insn_count);
32495 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
32501 /* Implement machine specific optimizations. We implement padding of returns
32502 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
32506 /* We are freeing block_for_insn in the toplev to keep compatibility
32507 with old MDEP_REORGS that are not CFG based. Recompute it now. */
32508 compute_bb_for_insn ();
32510 /* Run the vzeroupper optimization if needed. */
32511 if (TARGET_VZEROUPPER)
32512 move_or_delete_vzeroupper ();
32514 if (optimize && optimize_function_for_speed_p (cfun))
32516 if (TARGET_PAD_SHORT_FUNCTION)
32517 ix86_pad_short_function ();
32518 else if (TARGET_PAD_RETURNS)
32519 ix86_pad_returns ();
32520 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32521 if (TARGET_FOUR_JUMP_LIMIT)
32522 ix86_avoid_jump_mispredicts ();
32527 /* Return nonzero when QImode register that must be represented via REX prefix
32530 x86_extended_QIreg_mentioned_p (rtx insn)
32533 extract_insn_cached (insn);
32534 for (i = 0; i < recog_data.n_operands; i++)
32535 if (REG_P (recog_data.operand[i])
32536 && REGNO (recog_data.operand[i]) > BX_REG)
32541 /* Return nonzero when P points to register encoded via REX prefix.
32542 Called via for_each_rtx. */
32544 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
32546 unsigned int regno;
32549 regno = REGNO (*p);
32550 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
32553 /* Return true when INSN mentions register that must be encoded using REX
32556 x86_extended_reg_mentioned_p (rtx insn)
32558 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
32559 extended_reg_mentioned_1, NULL);
32562 /* If profitable, negate (without causing overflow) integer constant
32563 of mode MODE at location LOC. Return true in this case. */
32565 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
32569 if (!CONST_INT_P (*loc))
32575 /* DImode x86_64 constants must fit in 32 bits. */
32576 gcc_assert (x86_64_immediate_operand (*loc, mode));
32587 gcc_unreachable ();
32590 /* Avoid overflows. */
32591 if (mode_signbit_p (mode, *loc))
32594 val = INTVAL (*loc);
32596 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
32597 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
32598 if ((val < 0 && val != -128)
32601 *loc = GEN_INT (-val);
32608 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
32609 optabs would emit if we didn't have TFmode patterns. */
32612 x86_emit_floatuns (rtx operands[2])
32614 rtx neglab, donelab, i0, i1, f0, in, out;
32615 enum machine_mode mode, inmode;
32617 inmode = GET_MODE (operands[1]);
32618 gcc_assert (inmode == SImode || inmode == DImode);
32621 in = force_reg (inmode, operands[1]);
32622 mode = GET_MODE (out);
32623 neglab = gen_label_rtx ();
32624 donelab = gen_label_rtx ();
32625 f0 = gen_reg_rtx (mode);
32627 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
32629 expand_float (out, in, 0);
32631 emit_jump_insn (gen_jump (donelab));
32634 emit_label (neglab);
32636 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
32638 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
32640 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
32642 expand_float (f0, i0, 0);
32644 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
32646 emit_label (donelab);
32649 /* AVX2 does support 32-byte integer vector operations,
32650 thus the longest vector we are faced with is V32QImode. */
32651 #define MAX_VECT_LEN 32
32653 struct expand_vec_perm_d
32655 rtx target, op0, op1;
32656 unsigned char perm[MAX_VECT_LEN];
32657 enum machine_mode vmode;
32658 unsigned char nelt;
32662 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
32663 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
32665 /* Get a vector mode of the same size as the original but with elements
32666 twice as wide. This is only guaranteed to apply to integral vectors. */
32668 static inline enum machine_mode
32669 get_mode_wider_vector (enum machine_mode o)
32671 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
32672 enum machine_mode n = GET_MODE_WIDER_MODE (o);
32673 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
32674 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
32678 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32679 with all elements equal to VAR. Return true if successful. */
32682 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
32683 rtx target, rtx val)
32706 /* First attempt to recognize VAL as-is. */
32707 dup = gen_rtx_VEC_DUPLICATE (mode, val);
32708 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
32709 if (recog_memoized (insn) < 0)
32712 /* If that fails, force VAL into a register. */
32715 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
32716 seq = get_insns ();
32719 emit_insn_before (seq, insn);
32721 ok = recog_memoized (insn) >= 0;
32730 if (TARGET_SSE || TARGET_3DNOW_A)
32734 val = gen_lowpart (SImode, val);
32735 x = gen_rtx_TRUNCATE (HImode, val);
32736 x = gen_rtx_VEC_DUPLICATE (mode, x);
32737 emit_insn (gen_rtx_SET (VOIDmode, target, x));
32750 struct expand_vec_perm_d dperm;
32754 memset (&dperm, 0, sizeof (dperm));
32755 dperm.target = target;
32756 dperm.vmode = mode;
32757 dperm.nelt = GET_MODE_NUNITS (mode);
32758 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
32760 /* Extend to SImode using a paradoxical SUBREG. */
32761 tmp1 = gen_reg_rtx (SImode);
32762 emit_move_insn (tmp1, gen_lowpart (SImode, val));
32764 /* Insert the SImode value as low element of a V4SImode vector. */
32765 tmp2 = gen_lowpart (V4SImode, dperm.op0);
32766 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
32768 ok = (expand_vec_perm_1 (&dperm)
32769 || expand_vec_perm_broadcast_1 (&dperm));
32781 /* Replicate the value once into the next wider mode and recurse. */
32783 enum machine_mode smode, wsmode, wvmode;
32786 smode = GET_MODE_INNER (mode);
32787 wvmode = get_mode_wider_vector (mode);
32788 wsmode = GET_MODE_INNER (wvmode);
32790 val = convert_modes (wsmode, smode, val, true);
32791 x = expand_simple_binop (wsmode, ASHIFT, val,
32792 GEN_INT (GET_MODE_BITSIZE (smode)),
32793 NULL_RTX, 1, OPTAB_LIB_WIDEN);
32794 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
32796 x = gen_lowpart (wvmode, target);
32797 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
32805 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
32806 rtx x = gen_reg_rtx (hvmode);
32808 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
32811 x = gen_rtx_VEC_CONCAT (mode, x, x);
32812 emit_insn (gen_rtx_SET (VOIDmode, target, x));
32821 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32822 whose ONE_VAR element is VAR, and other elements are zero. Return true
32826 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
32827 rtx target, rtx var, int one_var)
32829 enum machine_mode vsimode;
32832 bool use_vector_set = false;
32837 /* For SSE4.1, we normally use vector set. But if the second
32838 element is zero and inter-unit moves are OK, we use movq
32840 use_vector_set = (TARGET_64BIT
32842 && !(TARGET_INTER_UNIT_MOVES
32848 use_vector_set = TARGET_SSE4_1;
32851 use_vector_set = TARGET_SSE2;
32854 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
32861 use_vector_set = TARGET_AVX;
32864 /* Use ix86_expand_vector_set in 64bit mode only. */
32865 use_vector_set = TARGET_AVX && TARGET_64BIT;
32871 if (use_vector_set)
32873 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
32874 var = force_reg (GET_MODE_INNER (mode), var);
32875 ix86_expand_vector_set (mmx_ok, target, var, one_var);
32891 var = force_reg (GET_MODE_INNER (mode), var);
32892 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
32893 emit_insn (gen_rtx_SET (VOIDmode, target, x));
32898 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
32899 new_target = gen_reg_rtx (mode);
32901 new_target = target;
32902 var = force_reg (GET_MODE_INNER (mode), var);
32903 x = gen_rtx_VEC_DUPLICATE (mode, var);
32904 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
32905 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
32908 /* We need to shuffle the value to the correct position, so
32909 create a new pseudo to store the intermediate result. */
32911 /* With SSE2, we can use the integer shuffle insns. */
32912 if (mode != V4SFmode && TARGET_SSE2)
32914 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
32916 GEN_INT (one_var == 1 ? 0 : 1),
32917 GEN_INT (one_var == 2 ? 0 : 1),
32918 GEN_INT (one_var == 3 ? 0 : 1)));
32919 if (target != new_target)
32920 emit_move_insn (target, new_target);
32924 /* Otherwise convert the intermediate result to V4SFmode and
32925 use the SSE1 shuffle instructions. */
32926 if (mode != V4SFmode)
32928 tmp = gen_reg_rtx (V4SFmode);
32929 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
32934 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
32936 GEN_INT (one_var == 1 ? 0 : 1),
32937 GEN_INT (one_var == 2 ? 0+4 : 1+4),
32938 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
32940 if (mode != V4SFmode)
32941 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
32942 else if (tmp != target)
32943 emit_move_insn (target, tmp);
32945 else if (target != new_target)
32946 emit_move_insn (target, new_target);
32951 vsimode = V4SImode;
32957 vsimode = V2SImode;
32963 /* Zero extend the variable element to SImode and recurse. */
32964 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
32966 x = gen_reg_rtx (vsimode);
32967 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
32969 gcc_unreachable ();
32971 emit_move_insn (target, gen_lowpart (mode, x));
32979 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32980 consisting of the values in VALS. It is known that all elements
32981 except ONE_VAR are constants. Return true if successful. */
32984 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
32985 rtx target, rtx vals, int one_var)
32987 rtx var = XVECEXP (vals, 0, one_var);
32988 enum machine_mode wmode;
32991 const_vec = copy_rtx (vals);
32992 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
32993 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
33001 /* For the two element vectors, it's just as easy to use
33002 the general case. */
33006 /* Use ix86_expand_vector_set in 64bit mode only. */
33029 /* There's no way to set one QImode entry easily. Combine
33030 the variable value with its adjacent constant value, and
33031 promote to an HImode set. */
33032 x = XVECEXP (vals, 0, one_var ^ 1);
33035 var = convert_modes (HImode, QImode, var, true);
33036 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
33037 NULL_RTX, 1, OPTAB_LIB_WIDEN);
33038 x = GEN_INT (INTVAL (x) & 0xff);
33042 var = convert_modes (HImode, QImode, var, true);
33043 x = gen_int_mode (INTVAL (x) << 8, HImode);
33045 if (x != const0_rtx)
33046 var = expand_simple_binop (HImode, IOR, var, x, var,
33047 1, OPTAB_LIB_WIDEN);
33049 x = gen_reg_rtx (wmode);
33050 emit_move_insn (x, gen_lowpart (wmode, const_vec));
33051 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
33053 emit_move_insn (target, gen_lowpart (mode, x));
33060 emit_move_insn (target, const_vec);
33061 ix86_expand_vector_set (mmx_ok, target, var, one_var);
33065 /* A subroutine of ix86_expand_vector_init_general. Use vector
33066 concatenate to handle the most general case: all values variable,
33067 and none identical. */
33070 ix86_expand_vector_init_concat (enum machine_mode mode,
33071 rtx target, rtx *ops, int n)
33073 enum machine_mode cmode, hmode = VOIDmode;
33074 rtx first[8], second[4];
33114 gcc_unreachable ();
33117 if (!register_operand (ops[1], cmode))
33118 ops[1] = force_reg (cmode, ops[1]);
33119 if (!register_operand (ops[0], cmode))
33120 ops[0] = force_reg (cmode, ops[0]);
33121 emit_insn (gen_rtx_SET (VOIDmode, target,
33122 gen_rtx_VEC_CONCAT (mode, ops[0],
33142 gcc_unreachable ();
33158 gcc_unreachable ();
33163 /* FIXME: We process inputs backward to help RA. PR 36222. */
33166 for (; i > 0; i -= 2, j--)
33168 first[j] = gen_reg_rtx (cmode);
33169 v = gen_rtvec (2, ops[i - 1], ops[i]);
33170 ix86_expand_vector_init (false, first[j],
33171 gen_rtx_PARALLEL (cmode, v));
33177 gcc_assert (hmode != VOIDmode);
33178 for (i = j = 0; i < n; i += 2, j++)
33180 second[j] = gen_reg_rtx (hmode);
33181 ix86_expand_vector_init_concat (hmode, second [j],
33185 ix86_expand_vector_init_concat (mode, target, second, n);
33188 ix86_expand_vector_init_concat (mode, target, first, n);
33192 gcc_unreachable ();
33196 /* A subroutine of ix86_expand_vector_init_general. Use vector
33197 interleave to handle the most general case: all values variable,
33198 and none identical. */
33201 ix86_expand_vector_init_interleave (enum machine_mode mode,
33202 rtx target, rtx *ops, int n)
33204 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
33207 rtx (*gen_load_even) (rtx, rtx, rtx);
33208 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
33209 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
33214 gen_load_even = gen_vec_setv8hi;
33215 gen_interleave_first_low = gen_vec_interleave_lowv4si;
33216 gen_interleave_second_low = gen_vec_interleave_lowv2di;
33217 inner_mode = HImode;
33218 first_imode = V4SImode;
33219 second_imode = V2DImode;
33220 third_imode = VOIDmode;
33223 gen_load_even = gen_vec_setv16qi;
33224 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
33225 gen_interleave_second_low = gen_vec_interleave_lowv4si;
33226 inner_mode = QImode;
33227 first_imode = V8HImode;
33228 second_imode = V4SImode;
33229 third_imode = V2DImode;
33232 gcc_unreachable ();
33235 for (i = 0; i < n; i++)
33237 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
33238 op0 = gen_reg_rtx (SImode);
33239 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
33241 /* Insert the SImode value as low element of V4SImode vector. */
33242 op1 = gen_reg_rtx (V4SImode);
33243 op0 = gen_rtx_VEC_MERGE (V4SImode,
33244 gen_rtx_VEC_DUPLICATE (V4SImode,
33246 CONST0_RTX (V4SImode),
33248 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
33250 /* Cast the V4SImode vector back to a vector in orignal mode. */
33251 op0 = gen_reg_rtx (mode);
33252 emit_move_insn (op0, gen_lowpart (mode, op1));
33254 /* Load even elements into the second positon. */
33255 emit_insn (gen_load_even (op0,
33256 force_reg (inner_mode,
33260 /* Cast vector to FIRST_IMODE vector. */
33261 ops[i] = gen_reg_rtx (first_imode);
33262 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
33265 /* Interleave low FIRST_IMODE vectors. */
33266 for (i = j = 0; i < n; i += 2, j++)
33268 op0 = gen_reg_rtx (first_imode);
33269 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
33271 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
33272 ops[j] = gen_reg_rtx (second_imode);
33273 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
33276 /* Interleave low SECOND_IMODE vectors. */
33277 switch (second_imode)
33280 for (i = j = 0; i < n / 2; i += 2, j++)
33282 op0 = gen_reg_rtx (second_imode);
33283 emit_insn (gen_interleave_second_low (op0, ops[i],
33286 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
33288 ops[j] = gen_reg_rtx (third_imode);
33289 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
33291 second_imode = V2DImode;
33292 gen_interleave_second_low = gen_vec_interleave_lowv2di;
33296 op0 = gen_reg_rtx (second_imode);
33297 emit_insn (gen_interleave_second_low (op0, ops[0],
33300 /* Cast the SECOND_IMODE vector back to a vector on original
33302 emit_insn (gen_rtx_SET (VOIDmode, target,
33303 gen_lowpart (mode, op0)));
33307 gcc_unreachable ();
33311 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
33312 all values variable, and none identical. */
33315 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
33316 rtx target, rtx vals)
33318 rtx ops[32], op0, op1;
33319 enum machine_mode half_mode = VOIDmode;
33326 if (!mmx_ok && !TARGET_SSE)
33338 n = GET_MODE_NUNITS (mode);
33339 for (i = 0; i < n; i++)
33340 ops[i] = XVECEXP (vals, 0, i);
33341 ix86_expand_vector_init_concat (mode, target, ops, n);
33345 half_mode = V16QImode;
33349 half_mode = V8HImode;
33353 n = GET_MODE_NUNITS (mode);
33354 for (i = 0; i < n; i++)
33355 ops[i] = XVECEXP (vals, 0, i);
33356 op0 = gen_reg_rtx (half_mode);
33357 op1 = gen_reg_rtx (half_mode);
33358 ix86_expand_vector_init_interleave (half_mode, op0, ops,
33360 ix86_expand_vector_init_interleave (half_mode, op1,
33361 &ops [n >> 1], n >> 2);
33362 emit_insn (gen_rtx_SET (VOIDmode, target,
33363 gen_rtx_VEC_CONCAT (mode, op0, op1)));
33367 if (!TARGET_SSE4_1)
33375 /* Don't use ix86_expand_vector_init_interleave if we can't
33376 move from GPR to SSE register directly. */
33377 if (!TARGET_INTER_UNIT_MOVES)
33380 n = GET_MODE_NUNITS (mode);
33381 for (i = 0; i < n; i++)
33382 ops[i] = XVECEXP (vals, 0, i);
33383 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
33391 gcc_unreachable ();
33395 int i, j, n_elts, n_words, n_elt_per_word;
33396 enum machine_mode inner_mode;
33397 rtx words[4], shift;
33399 inner_mode = GET_MODE_INNER (mode);
33400 n_elts = GET_MODE_NUNITS (mode);
33401 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
33402 n_elt_per_word = n_elts / n_words;
33403 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
33405 for (i = 0; i < n_words; ++i)
33407 rtx word = NULL_RTX;
33409 for (j = 0; j < n_elt_per_word; ++j)
33411 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
33412 elt = convert_modes (word_mode, inner_mode, elt, true);
33418 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
33419 word, 1, OPTAB_LIB_WIDEN);
33420 word = expand_simple_binop (word_mode, IOR, word, elt,
33421 word, 1, OPTAB_LIB_WIDEN);
33429 emit_move_insn (target, gen_lowpart (mode, words[0]));
33430 else if (n_words == 2)
33432 rtx tmp = gen_reg_rtx (mode);
33433 emit_clobber (tmp);
33434 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
33435 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
33436 emit_move_insn (target, tmp);
33438 else if (n_words == 4)
33440 rtx tmp = gen_reg_rtx (V4SImode);
33441 gcc_assert (word_mode == SImode);
33442 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
33443 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
33444 emit_move_insn (target, gen_lowpart (mode, tmp));
33447 gcc_unreachable ();
33451 /* Initialize vector TARGET via VALS. Suppress the use of MMX
33452 instructions unless MMX_OK is true. */
33455 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
33457 enum machine_mode mode = GET_MODE (target);
33458 enum machine_mode inner_mode = GET_MODE_INNER (mode);
33459 int n_elts = GET_MODE_NUNITS (mode);
33460 int n_var = 0, one_var = -1;
33461 bool all_same = true, all_const_zero = true;
33465 for (i = 0; i < n_elts; ++i)
33467 x = XVECEXP (vals, 0, i);
33468 if (!(CONST_INT_P (x)
33469 || GET_CODE (x) == CONST_DOUBLE
33470 || GET_CODE (x) == CONST_FIXED))
33471 n_var++, one_var = i;
33472 else if (x != CONST0_RTX (inner_mode))
33473 all_const_zero = false;
33474 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
33478 /* Constants are best loaded from the constant pool. */
33481 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
33485 /* If all values are identical, broadcast the value. */
33487 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
33488 XVECEXP (vals, 0, 0)))
33491 /* Values where only one field is non-constant are best loaded from
33492 the pool and overwritten via move later. */
33496 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
33497 XVECEXP (vals, 0, one_var),
33501 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
33505 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
33509 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
33511 enum machine_mode mode = GET_MODE (target);
33512 enum machine_mode inner_mode = GET_MODE_INNER (mode);
33513 enum machine_mode half_mode;
33514 bool use_vec_merge = false;
33516 static rtx (*gen_extract[6][2]) (rtx, rtx)
33518 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
33519 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
33520 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
33521 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
33522 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
33523 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
33525 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
33527 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
33528 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
33529 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
33530 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
33531 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
33532 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
33542 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
33543 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
33545 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
33547 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
33548 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33554 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
33558 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
33559 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
33561 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
33563 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
33564 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33571 /* For the two element vectors, we implement a VEC_CONCAT with
33572 the extraction of the other element. */
33574 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
33575 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
33578 op0 = val, op1 = tmp;
33580 op0 = tmp, op1 = val;
33582 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
33583 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33588 use_vec_merge = TARGET_SSE4_1;
33595 use_vec_merge = true;
33599 /* tmp = target = A B C D */
33600 tmp = copy_to_reg (target);
33601 /* target = A A B B */
33602 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
33603 /* target = X A B B */
33604 ix86_expand_vector_set (false, target, val, 0);
33605 /* target = A X C D */
33606 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
33607 const1_rtx, const0_rtx,
33608 GEN_INT (2+4), GEN_INT (3+4)));
33612 /* tmp = target = A B C D */
33613 tmp = copy_to_reg (target);
33614 /* tmp = X B C D */
33615 ix86_expand_vector_set (false, tmp, val, 0);
33616 /* target = A B X D */
33617 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
33618 const0_rtx, const1_rtx,
33619 GEN_INT (0+4), GEN_INT (3+4)));
33623 /* tmp = target = A B C D */
33624 tmp = copy_to_reg (target);
33625 /* tmp = X B C D */
33626 ix86_expand_vector_set (false, tmp, val, 0);
33627 /* target = A B X D */
33628 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
33629 const0_rtx, const1_rtx,
33630 GEN_INT (2+4), GEN_INT (0+4)));
33634 gcc_unreachable ();
33639 use_vec_merge = TARGET_SSE4_1;
33643 /* Element 0 handled by vec_merge below. */
33646 use_vec_merge = true;
33652 /* With SSE2, use integer shuffles to swap element 0 and ELT,
33653 store into element 0, then shuffle them back. */
33657 order[0] = GEN_INT (elt);
33658 order[1] = const1_rtx;
33659 order[2] = const2_rtx;
33660 order[3] = GEN_INT (3);
33661 order[elt] = const0_rtx;
33663 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
33664 order[1], order[2], order[3]));
33666 ix86_expand_vector_set (false, target, val, 0);
33668 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
33669 order[1], order[2], order[3]));
33673 /* For SSE1, we have to reuse the V4SF code. */
33674 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
33675 gen_lowpart (SFmode, val), elt);
33680 use_vec_merge = TARGET_SSE2;
33683 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
33687 use_vec_merge = TARGET_SSE4_1;
33694 half_mode = V16QImode;
33700 half_mode = V8HImode;
33706 half_mode = V4SImode;
33712 half_mode = V2DImode;
33718 half_mode = V4SFmode;
33724 half_mode = V2DFmode;
33730 /* Compute offset. */
33734 gcc_assert (i <= 1);
33736 /* Extract the half. */
33737 tmp = gen_reg_rtx (half_mode);
33738 emit_insn (gen_extract[j][i] (tmp, target));
33740 /* Put val in tmp at elt. */
33741 ix86_expand_vector_set (false, tmp, val, elt);
33744 emit_insn (gen_insert[j][i] (target, target, tmp));
33753 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
33754 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
33755 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33759 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
33761 emit_move_insn (mem, target);
33763 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
33764 emit_move_insn (tmp, val);
33766 emit_move_insn (target, mem);
33771 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
33773 enum machine_mode mode = GET_MODE (vec);
33774 enum machine_mode inner_mode = GET_MODE_INNER (mode);
33775 bool use_vec_extr = false;
33788 use_vec_extr = true;
33792 use_vec_extr = TARGET_SSE4_1;
33804 tmp = gen_reg_rtx (mode);
33805 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
33806 GEN_INT (elt), GEN_INT (elt),
33807 GEN_INT (elt+4), GEN_INT (elt+4)));
33811 tmp = gen_reg_rtx (mode);
33812 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
33816 gcc_unreachable ();
33819 use_vec_extr = true;
33824 use_vec_extr = TARGET_SSE4_1;
33838 tmp = gen_reg_rtx (mode);
33839 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
33840 GEN_INT (elt), GEN_INT (elt),
33841 GEN_INT (elt), GEN_INT (elt)));
33845 tmp = gen_reg_rtx (mode);
33846 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
33850 gcc_unreachable ();
33853 use_vec_extr = true;
33858 /* For SSE1, we have to reuse the V4SF code. */
33859 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
33860 gen_lowpart (V4SFmode, vec), elt);
33866 use_vec_extr = TARGET_SSE2;
33869 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
33873 use_vec_extr = TARGET_SSE4_1;
33879 tmp = gen_reg_rtx (V4SFmode);
33881 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
33883 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
33884 ix86_expand_vector_extract (false, target, tmp, elt & 3);
33892 tmp = gen_reg_rtx (V2DFmode);
33894 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
33896 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
33897 ix86_expand_vector_extract (false, target, tmp, elt & 1);
33905 tmp = gen_reg_rtx (V16QImode);
33907 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
33909 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
33910 ix86_expand_vector_extract (false, target, tmp, elt & 15);
33918 tmp = gen_reg_rtx (V8HImode);
33920 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
33922 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
33923 ix86_expand_vector_extract (false, target, tmp, elt & 7);
33931 tmp = gen_reg_rtx (V4SImode);
33933 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
33935 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
33936 ix86_expand_vector_extract (false, target, tmp, elt & 3);
33944 tmp = gen_reg_rtx (V2DImode);
33946 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
33948 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
33949 ix86_expand_vector_extract (false, target, tmp, elt & 1);
33955 /* ??? Could extract the appropriate HImode element and shift. */
33962 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
33963 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
33965 /* Let the rtl optimizers know about the zero extension performed. */
33966 if (inner_mode == QImode || inner_mode == HImode)
33968 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
33969 target = gen_lowpart (SImode, target);
33972 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
33976 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
33978 emit_move_insn (mem, vec);
33980 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
33981 emit_move_insn (target, tmp);
33985 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
33986 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
33987 The upper bits of DEST are undefined, though they shouldn't cause
33988 exceptions (some bits from src or all zeros are ok). */
33991 emit_reduc_half (rtx dest, rtx src, int i)
33994 switch (GET_MODE (src))
33998 tem = gen_sse_movhlps (dest, src, src);
34000 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
34001 GEN_INT (1 + 4), GEN_INT (1 + 4));
34004 tem = gen_vec_interleave_highv2df (dest, src, src);
34010 tem = gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, dest),
34011 gen_lowpart (V1TImode, src),
34016 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
34018 tem = gen_avx_shufps256 (dest, src, src,
34019 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
34023 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
34025 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
34032 tem = gen_avx2_permv2ti (gen_lowpart (V4DImode, dest),
34033 gen_lowpart (V4DImode, src),
34034 gen_lowpart (V4DImode, src),
34037 tem = gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, dest),
34038 gen_lowpart (V2TImode, src),
34042 gcc_unreachable ();
34047 /* Expand a vector reduction. FN is the binary pattern to reduce;
34048 DEST is the destination; IN is the input vector. */
34051 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
34053 rtx half, dst, vec = in;
34054 enum machine_mode mode = GET_MODE (in);
34057 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
34059 && mode == V8HImode
34060 && fn == gen_uminv8hi3)
34062 emit_insn (gen_sse4_1_phminposuw (dest, in));
34066 for (i = GET_MODE_BITSIZE (mode);
34067 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
34070 half = gen_reg_rtx (mode);
34071 emit_reduc_half (half, vec, i);
34072 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
34075 dst = gen_reg_rtx (mode);
34076 emit_insn (fn (dst, half, vec));
34081 /* Target hook for scalar_mode_supported_p. */
34083 ix86_scalar_mode_supported_p (enum machine_mode mode)
34085 if (DECIMAL_FLOAT_MODE_P (mode))
34086 return default_decimal_float_supported_p ();
34087 else if (mode == TFmode)
34090 return default_scalar_mode_supported_p (mode);
34093 /* Implements target hook vector_mode_supported_p. */
34095 ix86_vector_mode_supported_p (enum machine_mode mode)
34097 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
34099 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
34101 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
34103 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
34105 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
34110 /* Target hook for c_mode_for_suffix. */
34111 static enum machine_mode
34112 ix86_c_mode_for_suffix (char suffix)
34122 /* Worker function for TARGET_MD_ASM_CLOBBERS.
34124 We do this in the new i386 backend to maintain source compatibility
34125 with the old cc0-based compiler. */
34128 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
34129 tree inputs ATTRIBUTE_UNUSED,
34132 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
34134 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
34139 /* Implements target vector targetm.asm.encode_section_info. */
34141 static void ATTRIBUTE_UNUSED
34142 ix86_encode_section_info (tree decl, rtx rtl, int first)
34144 default_encode_section_info (decl, rtl, first);
34146 if (TREE_CODE (decl) == VAR_DECL
34147 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
34148 && ix86_in_large_data_p (decl))
34149 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
34152 /* Worker function for REVERSE_CONDITION. */
34155 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
34157 return (mode != CCFPmode && mode != CCFPUmode
34158 ? reverse_condition (code)
34159 : reverse_condition_maybe_unordered (code));
34162 /* Output code to perform an x87 FP register move, from OPERANDS[1]
34166 output_387_reg_move (rtx insn, rtx *operands)
34168 if (REG_P (operands[0]))
34170 if (REG_P (operands[1])
34171 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
34173 if (REGNO (operands[0]) == FIRST_STACK_REG)
34174 return output_387_ffreep (operands, 0);
34175 return "fstp\t%y0";
34177 if (STACK_TOP_P (operands[0]))
34178 return "fld%Z1\t%y1";
34181 else if (MEM_P (operands[0]))
34183 gcc_assert (REG_P (operands[1]));
34184 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
34185 return "fstp%Z0\t%y0";
34188 /* There is no non-popping store to memory for XFmode.
34189 So if we need one, follow the store with a load. */
34190 if (GET_MODE (operands[0]) == XFmode)
34191 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
34193 return "fst%Z0\t%y0";
34200 /* Output code to perform a conditional jump to LABEL, if C2 flag in
34201 FP status register is set. */
34204 ix86_emit_fp_unordered_jump (rtx label)
34206 rtx reg = gen_reg_rtx (HImode);
34209 emit_insn (gen_x86_fnstsw_1 (reg));
34211 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
34213 emit_insn (gen_x86_sahf_1 (reg));
34215 temp = gen_rtx_REG (CCmode, FLAGS_REG);
34216 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
34220 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
34222 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
34223 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
34226 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
34227 gen_rtx_LABEL_REF (VOIDmode, label),
34229 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
34231 emit_jump_insn (temp);
34232 predict_jump (REG_BR_PROB_BASE * 10 / 100);
34235 /* Output code to perform a log1p XFmode calculation. */
34237 void ix86_emit_i387_log1p (rtx op0, rtx op1)
34239 rtx label1 = gen_label_rtx ();
34240 rtx label2 = gen_label_rtx ();
34242 rtx tmp = gen_reg_rtx (XFmode);
34243 rtx tmp2 = gen_reg_rtx (XFmode);
34246 emit_insn (gen_absxf2 (tmp, op1));
34247 test = gen_rtx_GE (VOIDmode, tmp,
34248 CONST_DOUBLE_FROM_REAL_VALUE (
34249 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
34251 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
34253 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
34254 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
34255 emit_jump (label2);
34257 emit_label (label1);
34258 emit_move_insn (tmp, CONST1_RTX (XFmode));
34259 emit_insn (gen_addxf3 (tmp, op1, tmp));
34260 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
34261 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
34263 emit_label (label2);
34266 /* Emit code for round calculation. */
34267 void ix86_emit_i387_round (rtx op0, rtx op1)
34269 enum machine_mode inmode = GET_MODE (op1);
34270 enum machine_mode outmode = GET_MODE (op0);
34271 rtx e1, e2, res, tmp, tmp1, half;
34272 rtx scratch = gen_reg_rtx (HImode);
34273 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
34274 rtx jump_label = gen_label_rtx ();
34276 rtx (*gen_abs) (rtx, rtx);
34277 rtx (*gen_neg) (rtx, rtx);
34282 gen_abs = gen_abssf2;
34285 gen_abs = gen_absdf2;
34288 gen_abs = gen_absxf2;
34291 gcc_unreachable ();
34297 gen_neg = gen_negsf2;
34300 gen_neg = gen_negdf2;
34303 gen_neg = gen_negxf2;
34306 gen_neg = gen_neghi2;
34309 gen_neg = gen_negsi2;
34312 gen_neg = gen_negdi2;
34315 gcc_unreachable ();
34318 e1 = gen_reg_rtx (inmode);
34319 e2 = gen_reg_rtx (inmode);
34320 res = gen_reg_rtx (outmode);
34322 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
34324 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
34326 /* scratch = fxam(op1) */
34327 emit_insn (gen_rtx_SET (VOIDmode, scratch,
34328 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
34330 /* e1 = fabs(op1) */
34331 emit_insn (gen_abs (e1, op1));
34333 /* e2 = e1 + 0.5 */
34334 half = force_reg (inmode, half);
34335 emit_insn (gen_rtx_SET (VOIDmode, e2,
34336 gen_rtx_PLUS (inmode, e1, half)));
34338 /* res = floor(e2) */
34339 if (inmode != XFmode)
34341 tmp1 = gen_reg_rtx (XFmode);
34343 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
34344 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
34354 rtx tmp0 = gen_reg_rtx (XFmode);
34356 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
34358 emit_insn (gen_rtx_SET (VOIDmode, res,
34359 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
34360 UNSPEC_TRUNC_NOOP)));
34364 emit_insn (gen_frndintxf2_floor (res, tmp1));
34367 emit_insn (gen_lfloorxfhi2 (res, tmp1));
34370 emit_insn (gen_lfloorxfsi2 (res, tmp1));
34373 emit_insn (gen_lfloorxfdi2 (res, tmp1));
34376 gcc_unreachable ();
34379 /* flags = signbit(a) */
34380 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
34382 /* if (flags) then res = -res */
34383 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
34384 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
34385 gen_rtx_LABEL_REF (VOIDmode, jump_label),
34387 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
34388 predict_jump (REG_BR_PROB_BASE * 50 / 100);
34389 JUMP_LABEL (insn) = jump_label;
34391 emit_insn (gen_neg (res, res));
34393 emit_label (jump_label);
34394 LABEL_NUSES (jump_label) = 1;
34396 emit_move_insn (op0, res);
34399 /* Output code to perform a Newton-Rhapson approximation of a single precision
34400 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
34402 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
34404 rtx x0, x1, e0, e1;
34406 x0 = gen_reg_rtx (mode);
34407 e0 = gen_reg_rtx (mode);
34408 e1 = gen_reg_rtx (mode);
34409 x1 = gen_reg_rtx (mode);
34411 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
34413 b = force_reg (mode, b);
34415 /* x0 = rcp(b) estimate */
34416 emit_insn (gen_rtx_SET (VOIDmode, x0,
34417 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
34420 emit_insn (gen_rtx_SET (VOIDmode, e0,
34421 gen_rtx_MULT (mode, x0, b)));
34424 emit_insn (gen_rtx_SET (VOIDmode, e0,
34425 gen_rtx_MULT (mode, x0, e0)));
34428 emit_insn (gen_rtx_SET (VOIDmode, e1,
34429 gen_rtx_PLUS (mode, x0, x0)));
34432 emit_insn (gen_rtx_SET (VOIDmode, x1,
34433 gen_rtx_MINUS (mode, e1, e0)));
34436 emit_insn (gen_rtx_SET (VOIDmode, res,
34437 gen_rtx_MULT (mode, a, x1)));
34440 /* Output code to perform a Newton-Rhapson approximation of a
34441 single precision floating point [reciprocal] square root. */
34443 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
34446 rtx x0, e0, e1, e2, e3, mthree, mhalf;
34449 x0 = gen_reg_rtx (mode);
34450 e0 = gen_reg_rtx (mode);
34451 e1 = gen_reg_rtx (mode);
34452 e2 = gen_reg_rtx (mode);
34453 e3 = gen_reg_rtx (mode);
34455 real_from_integer (&r, VOIDmode, -3, -1, 0);
34456 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
34458 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
34459 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
34461 if (VECTOR_MODE_P (mode))
34463 mthree = ix86_build_const_vector (mode, true, mthree);
34464 mhalf = ix86_build_const_vector (mode, true, mhalf);
34467 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
34468 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
34470 a = force_reg (mode, a);
34472 /* x0 = rsqrt(a) estimate */
34473 emit_insn (gen_rtx_SET (VOIDmode, x0,
34474 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
34477 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
34482 zero = gen_reg_rtx (mode);
34483 mask = gen_reg_rtx (mode);
34485 zero = force_reg (mode, CONST0_RTX(mode));
34486 emit_insn (gen_rtx_SET (VOIDmode, mask,
34487 gen_rtx_NE (mode, zero, a)));
34489 emit_insn (gen_rtx_SET (VOIDmode, x0,
34490 gen_rtx_AND (mode, x0, mask)));
34494 emit_insn (gen_rtx_SET (VOIDmode, e0,
34495 gen_rtx_MULT (mode, x0, a)));
34497 emit_insn (gen_rtx_SET (VOIDmode, e1,
34498 gen_rtx_MULT (mode, e0, x0)));
34501 mthree = force_reg (mode, mthree);
34502 emit_insn (gen_rtx_SET (VOIDmode, e2,
34503 gen_rtx_PLUS (mode, e1, mthree)));
34505 mhalf = force_reg (mode, mhalf);
34507 /* e3 = -.5 * x0 */
34508 emit_insn (gen_rtx_SET (VOIDmode, e3,
34509 gen_rtx_MULT (mode, x0, mhalf)));
34511 /* e3 = -.5 * e0 */
34512 emit_insn (gen_rtx_SET (VOIDmode, e3,
34513 gen_rtx_MULT (mode, e0, mhalf)));
34514 /* ret = e2 * e3 */
34515 emit_insn (gen_rtx_SET (VOIDmode, res,
34516 gen_rtx_MULT (mode, e2, e3)));
34519 #ifdef TARGET_SOLARIS
34520 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
34523 i386_solaris_elf_named_section (const char *name, unsigned int flags,
34526 /* With Binutils 2.15, the "@unwind" marker must be specified on
34527 every occurrence of the ".eh_frame" section, not just the first
34530 && strcmp (name, ".eh_frame") == 0)
34532 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
34533 flags & SECTION_WRITE ? "aw" : "a");
34538 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
34540 solaris_elf_asm_comdat_section (name, flags, decl);
34545 default_elf_asm_named_section (name, flags, decl);
34547 #endif /* TARGET_SOLARIS */
34549 /* Return the mangling of TYPE if it is an extended fundamental type. */
34551 static const char *
34552 ix86_mangle_type (const_tree type)
34554 type = TYPE_MAIN_VARIANT (type);
34556 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
34557 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
34560 switch (TYPE_MODE (type))
34563 /* __float128 is "g". */
34566 /* "long double" or __float80 is "e". */
34573 /* For 32-bit code we can save PIC register setup by using
34574 __stack_chk_fail_local hidden function instead of calling
34575 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
34576 register, so it is better to call __stack_chk_fail directly. */
34578 static tree ATTRIBUTE_UNUSED
34579 ix86_stack_protect_fail (void)
34581 return TARGET_64BIT
34582 ? default_external_stack_protect_fail ()
34583 : default_hidden_stack_protect_fail ();
34586 /* Select a format to encode pointers in exception handling data. CODE
34587 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
34588 true if the symbol may be affected by dynamic relocations.
34590 ??? All x86 object file formats are capable of representing this.
34591 After all, the relocation needed is the same as for the call insn.
34592 Whether or not a particular assembler allows us to enter such, I
34593 guess we'll have to see. */
34595 asm_preferred_eh_data_format (int code, int global)
34599 int type = DW_EH_PE_sdata8;
34601 || ix86_cmodel == CM_SMALL_PIC
34602 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
34603 type = DW_EH_PE_sdata4;
34604 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
34606 if (ix86_cmodel == CM_SMALL
34607 || (ix86_cmodel == CM_MEDIUM && code))
34608 return DW_EH_PE_udata4;
34609 return DW_EH_PE_absptr;
34612 /* Expand copysign from SIGN to the positive value ABS_VALUE
34613 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
34616 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
34618 enum machine_mode mode = GET_MODE (sign);
34619 rtx sgn = gen_reg_rtx (mode);
34620 if (mask == NULL_RTX)
34622 enum machine_mode vmode;
34624 if (mode == SFmode)
34626 else if (mode == DFmode)
34631 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
34632 if (!VECTOR_MODE_P (mode))
34634 /* We need to generate a scalar mode mask in this case. */
34635 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
34636 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
34637 mask = gen_reg_rtx (mode);
34638 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
34642 mask = gen_rtx_NOT (mode, mask);
34643 emit_insn (gen_rtx_SET (VOIDmode, sgn,
34644 gen_rtx_AND (mode, mask, sign)));
34645 emit_insn (gen_rtx_SET (VOIDmode, result,
34646 gen_rtx_IOR (mode, abs_value, sgn)));
34649 /* Expand fabs (OP0) and return a new rtx that holds the result. The
34650 mask for masking out the sign-bit is stored in *SMASK, if that is
34653 ix86_expand_sse_fabs (rtx op0, rtx *smask)
34655 enum machine_mode vmode, mode = GET_MODE (op0);
34658 xa = gen_reg_rtx (mode);
34659 if (mode == SFmode)
34661 else if (mode == DFmode)
34665 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
34666 if (!VECTOR_MODE_P (mode))
34668 /* We need to generate a scalar mode mask in this case. */
34669 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
34670 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
34671 mask = gen_reg_rtx (mode);
34672 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
34674 emit_insn (gen_rtx_SET (VOIDmode, xa,
34675 gen_rtx_AND (mode, op0, mask)));
34683 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
34684 swapping the operands if SWAP_OPERANDS is true. The expanded
34685 code is a forward jump to a newly created label in case the
34686 comparison is true. The generated label rtx is returned. */
34688 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
34689 bool swap_operands)
34700 label = gen_label_rtx ();
34701 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
34702 emit_insn (gen_rtx_SET (VOIDmode, tmp,
34703 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
34704 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
34705 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
34706 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
34707 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
34708 JUMP_LABEL (tmp) = label;
34713 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
34714 using comparison code CODE. Operands are swapped for the comparison if
34715 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
34717 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
34718 bool swap_operands)
34720 rtx (*insn)(rtx, rtx, rtx, rtx);
34721 enum machine_mode mode = GET_MODE (op0);
34722 rtx mask = gen_reg_rtx (mode);
34731 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
34733 emit_insn (insn (mask, op0, op1,
34734 gen_rtx_fmt_ee (code, mode, op0, op1)));
34738 /* Generate and return a rtx of mode MODE for 2**n where n is the number
34739 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
34741 ix86_gen_TWO52 (enum machine_mode mode)
34743 REAL_VALUE_TYPE TWO52r;
34746 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
34747 TWO52 = const_double_from_real_value (TWO52r, mode);
34748 TWO52 = force_reg (mode, TWO52);
34753 /* Expand SSE sequence for computing lround from OP1 storing
34756 ix86_expand_lround (rtx op0, rtx op1)
34758 /* C code for the stuff we're doing below:
34759 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
34762 enum machine_mode mode = GET_MODE (op1);
34763 const struct real_format *fmt;
34764 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
34767 /* load nextafter (0.5, 0.0) */
34768 fmt = REAL_MODE_FORMAT (mode);
34769 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
34770 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
34772 /* adj = copysign (0.5, op1) */
34773 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
34774 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
34776 /* adj = op1 + adj */
34777 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
34779 /* op0 = (imode)adj */
34780 expand_fix (op0, adj, 0);
34783 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
34786 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
34788 /* C code for the stuff we're doing below (for do_floor):
34790 xi -= (double)xi > op1 ? 1 : 0;
34793 enum machine_mode fmode = GET_MODE (op1);
34794 enum machine_mode imode = GET_MODE (op0);
34795 rtx ireg, freg, label, tmp;
34797 /* reg = (long)op1 */
34798 ireg = gen_reg_rtx (imode);
34799 expand_fix (ireg, op1, 0);
34801 /* freg = (double)reg */
34802 freg = gen_reg_rtx (fmode);
34803 expand_float (freg, ireg, 0);
34805 /* ireg = (freg > op1) ? ireg - 1 : ireg */
34806 label = ix86_expand_sse_compare_and_jump (UNLE,
34807 freg, op1, !do_floor);
34808 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
34809 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
34810 emit_move_insn (ireg, tmp);
34812 emit_label (label);
34813 LABEL_NUSES (label) = 1;
34815 emit_move_insn (op0, ireg);
34818 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
34819 result in OPERAND0. */
34821 ix86_expand_rint (rtx operand0, rtx operand1)
34823 /* C code for the stuff we're doing below:
34824 xa = fabs (operand1);
34825 if (!isless (xa, 2**52))
34827 xa = xa + 2**52 - 2**52;
34828 return copysign (xa, operand1);
34830 enum machine_mode mode = GET_MODE (operand0);
34831 rtx res, xa, label, TWO52, mask;
34833 res = gen_reg_rtx (mode);
34834 emit_move_insn (res, operand1);
34836 /* xa = abs (operand1) */
34837 xa = ix86_expand_sse_fabs (res, &mask);
34839 /* if (!isless (xa, TWO52)) goto label; */
34840 TWO52 = ix86_gen_TWO52 (mode);
34841 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
34843 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
34844 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
34846 ix86_sse_copysign_to_positive (res, xa, res, mask);
34848 emit_label (label);
34849 LABEL_NUSES (label) = 1;
34851 emit_move_insn (operand0, res);
34854 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
34857 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
34859 /* C code for the stuff we expand below.
34860 double xa = fabs (x), x2;
34861 if (!isless (xa, TWO52))
34863 xa = xa + TWO52 - TWO52;
34864 x2 = copysign (xa, x);
34873 enum machine_mode mode = GET_MODE (operand0);
34874 rtx xa, TWO52, tmp, label, one, res, mask;
34876 TWO52 = ix86_gen_TWO52 (mode);
34878 /* Temporary for holding the result, initialized to the input
34879 operand to ease control flow. */
34880 res = gen_reg_rtx (mode);
34881 emit_move_insn (res, operand1);
34883 /* xa = abs (operand1) */
34884 xa = ix86_expand_sse_fabs (res, &mask);
34886 /* if (!isless (xa, TWO52)) goto label; */
34887 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
34889 /* xa = xa + TWO52 - TWO52; */
34890 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
34891 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
34893 /* xa = copysign (xa, operand1) */
34894 ix86_sse_copysign_to_positive (xa, xa, res, mask);
34896 /* generate 1.0 or -1.0 */
34897 one = force_reg (mode,
34898 const_double_from_real_value (do_floor
34899 ? dconst1 : dconstm1, mode));
34901 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
34902 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
34903 emit_insn (gen_rtx_SET (VOIDmode, tmp,
34904 gen_rtx_AND (mode, one, tmp)));
34905 /* We always need to subtract here to preserve signed zero. */
34906 tmp = expand_simple_binop (mode, MINUS,
34907 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
34908 emit_move_insn (res, tmp);
34910 emit_label (label);
34911 LABEL_NUSES (label) = 1;
34913 emit_move_insn (operand0, res);
34916 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
34919 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
34921 /* C code for the stuff we expand below.
34922 double xa = fabs (x), x2;
34923 if (!isless (xa, TWO52))
34925 x2 = (double)(long)x;
34932 if (HONOR_SIGNED_ZEROS (mode))
34933 return copysign (x2, x);
34936 enum machine_mode mode = GET_MODE (operand0);
34937 rtx xa, xi, TWO52, tmp, label, one, res, mask;
34939 TWO52 = ix86_gen_TWO52 (mode);
34941 /* Temporary for holding the result, initialized to the input
34942 operand to ease control flow. */
34943 res = gen_reg_rtx (mode);
34944 emit_move_insn (res, operand1);
34946 /* xa = abs (operand1) */
34947 xa = ix86_expand_sse_fabs (res, &mask);
34949 /* if (!isless (xa, TWO52)) goto label; */
34950 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
34952 /* xa = (double)(long)x */
34953 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
34954 expand_fix (xi, res, 0);
34955 expand_float (xa, xi, 0);
34958 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
34960 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
34961 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
34962 emit_insn (gen_rtx_SET (VOIDmode, tmp,
34963 gen_rtx_AND (mode, one, tmp)));
34964 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
34965 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
34966 emit_move_insn (res, tmp);
34968 if (HONOR_SIGNED_ZEROS (mode))
34969 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
34971 emit_label (label);
34972 LABEL_NUSES (label) = 1;
34974 emit_move_insn (operand0, res);
34977 /* Expand SSE sequence for computing round from OPERAND1 storing
34978 into OPERAND0. Sequence that works without relying on DImode truncation
34979 via cvttsd2siq that is only available on 64bit targets. */
34981 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
34983 /* C code for the stuff we expand below.
34984 double xa = fabs (x), xa2, x2;
34985 if (!isless (xa, TWO52))
34987 Using the absolute value and copying back sign makes
34988 -0.0 -> -0.0 correct.
34989 xa2 = xa + TWO52 - TWO52;
34994 else if (dxa > 0.5)
34996 x2 = copysign (xa2, x);
34999 enum machine_mode mode = GET_MODE (operand0);
35000 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
35002 TWO52 = ix86_gen_TWO52 (mode);
35004 /* Temporary for holding the result, initialized to the input
35005 operand to ease control flow. */
35006 res = gen_reg_rtx (mode);
35007 emit_move_insn (res, operand1);
35009 /* xa = abs (operand1) */
35010 xa = ix86_expand_sse_fabs (res, &mask);
35012 /* if (!isless (xa, TWO52)) goto label; */
35013 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35015 /* xa2 = xa + TWO52 - TWO52; */
35016 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35017 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
35019 /* dxa = xa2 - xa; */
35020 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
35022 /* generate 0.5, 1.0 and -0.5 */
35023 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
35024 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
35025 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
35029 tmp = gen_reg_rtx (mode);
35030 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
35031 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
35032 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35033 gen_rtx_AND (mode, one, tmp)));
35034 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35035 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
35036 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
35037 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35038 gen_rtx_AND (mode, one, tmp)));
35039 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35041 /* res = copysign (xa2, operand1) */
35042 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
35044 emit_label (label);
35045 LABEL_NUSES (label) = 1;
35047 emit_move_insn (operand0, res);
35050 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35053 ix86_expand_trunc (rtx operand0, rtx operand1)
35055 /* C code for SSE variant we expand below.
35056 double xa = fabs (x), x2;
35057 if (!isless (xa, TWO52))
35059 x2 = (double)(long)x;
35060 if (HONOR_SIGNED_ZEROS (mode))
35061 return copysign (x2, x);
35064 enum machine_mode mode = GET_MODE (operand0);
35065 rtx xa, xi, TWO52, label, res, mask;
35067 TWO52 = ix86_gen_TWO52 (mode);
35069 /* Temporary for holding the result, initialized to the input
35070 operand to ease control flow. */
35071 res = gen_reg_rtx (mode);
35072 emit_move_insn (res, operand1);
35074 /* xa = abs (operand1) */
35075 xa = ix86_expand_sse_fabs (res, &mask);
35077 /* if (!isless (xa, TWO52)) goto label; */
35078 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35080 /* x = (double)(long)x */
35081 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35082 expand_fix (xi, res, 0);
35083 expand_float (res, xi, 0);
35085 if (HONOR_SIGNED_ZEROS (mode))
35086 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
35088 emit_label (label);
35089 LABEL_NUSES (label) = 1;
35091 emit_move_insn (operand0, res);
35094 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35097 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
35099 enum machine_mode mode = GET_MODE (operand0);
35100 rtx xa, mask, TWO52, label, one, res, smask, tmp;
35102 /* C code for SSE variant we expand below.
35103 double xa = fabs (x), x2;
35104 if (!isless (xa, TWO52))
35106 xa2 = xa + TWO52 - TWO52;
35110 x2 = copysign (xa2, x);
35114 TWO52 = ix86_gen_TWO52 (mode);
35116 /* Temporary for holding the result, initialized to the input
35117 operand to ease control flow. */
35118 res = gen_reg_rtx (mode);
35119 emit_move_insn (res, operand1);
35121 /* xa = abs (operand1) */
35122 xa = ix86_expand_sse_fabs (res, &smask);
35124 /* if (!isless (xa, TWO52)) goto label; */
35125 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35127 /* res = xa + TWO52 - TWO52; */
35128 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35129 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
35130 emit_move_insn (res, tmp);
35133 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
35135 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
35136 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
35137 emit_insn (gen_rtx_SET (VOIDmode, mask,
35138 gen_rtx_AND (mode, mask, one)));
35139 tmp = expand_simple_binop (mode, MINUS,
35140 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
35141 emit_move_insn (res, tmp);
35143 /* res = copysign (res, operand1) */
35144 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
35146 emit_label (label);
35147 LABEL_NUSES (label) = 1;
35149 emit_move_insn (operand0, res);
35152 /* Expand SSE sequence for computing round from OPERAND1 storing
35155 ix86_expand_round (rtx operand0, rtx operand1)
35157 /* C code for the stuff we're doing below:
35158 double xa = fabs (x);
35159 if (!isless (xa, TWO52))
35161 xa = (double)(long)(xa + nextafter (0.5, 0.0));
35162 return copysign (xa, x);
35164 enum machine_mode mode = GET_MODE (operand0);
35165 rtx res, TWO52, xa, label, xi, half, mask;
35166 const struct real_format *fmt;
35167 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
35169 /* Temporary for holding the result, initialized to the input
35170 operand to ease control flow. */
35171 res = gen_reg_rtx (mode);
35172 emit_move_insn (res, operand1);
35174 TWO52 = ix86_gen_TWO52 (mode);
35175 xa = ix86_expand_sse_fabs (res, &mask);
35176 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35178 /* load nextafter (0.5, 0.0) */
35179 fmt = REAL_MODE_FORMAT (mode);
35180 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
35181 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
35183 /* xa = xa + 0.5 */
35184 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
35185 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
35187 /* xa = (double)(int64_t)xa */
35188 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35189 expand_fix (xi, xa, 0);
35190 expand_float (xa, xi, 0);
35192 /* res = copysign (xa, operand1) */
35193 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
35195 emit_label (label);
35196 LABEL_NUSES (label) = 1;
35198 emit_move_insn (operand0, res);
35201 /* Expand SSE sequence for computing round
35202 from OP1 storing into OP0 using sse4 round insn. */
35204 ix86_expand_round_sse4 (rtx op0, rtx op1)
35206 enum machine_mode mode = GET_MODE (op0);
35207 rtx e1, e2, res, half;
35208 const struct real_format *fmt;
35209 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
35210 rtx (*gen_copysign) (rtx, rtx, rtx);
35211 rtx (*gen_round) (rtx, rtx, rtx);
35216 gen_copysign = gen_copysignsf3;
35217 gen_round = gen_sse4_1_roundsf2;
35220 gen_copysign = gen_copysigndf3;
35221 gen_round = gen_sse4_1_rounddf2;
35224 gcc_unreachable ();
35227 /* round (a) = trunc (a + copysign (0.5, a)) */
35229 /* load nextafter (0.5, 0.0) */
35230 fmt = REAL_MODE_FORMAT (mode);
35231 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
35232 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
35233 half = const_double_from_real_value (pred_half, mode);
35235 /* e1 = copysign (0.5, op1) */
35236 e1 = gen_reg_rtx (mode);
35237 emit_insn (gen_copysign (e1, half, op1));
35239 /* e2 = op1 + e1 */
35240 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
35242 /* res = trunc (e2) */
35243 res = gen_reg_rtx (mode);
35244 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
35246 emit_move_insn (op0, res);
35250 /* Table of valid machine attributes. */
35251 static const struct attribute_spec ix86_attribute_table[] =
35253 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
35254 affects_type_identity } */
35255 /* Stdcall attribute says callee is responsible for popping arguments
35256 if they are not variable. */
35257 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35259 /* Fastcall attribute says callee is responsible for popping arguments
35260 if they are not variable. */
35261 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35263 /* Thiscall attribute says callee is responsible for popping arguments
35264 if they are not variable. */
35265 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35267 /* Cdecl attribute says the callee is a normal C declaration */
35268 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35270 /* Regparm attribute specifies how many integer arguments are to be
35271 passed in registers. */
35272 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
35274 /* Sseregparm attribute says we are using x86_64 calling conventions
35275 for FP arguments. */
35276 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35278 /* The transactional memory builtins are implicitly regparm or fastcall
35279 depending on the ABI. Override the generic do-nothing attribute that
35280 these builtins were declared with. */
35281 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
35283 /* force_align_arg_pointer says this function realigns the stack at entry. */
35284 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
35285 false, true, true, ix86_handle_cconv_attribute, false },
35286 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35287 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
35288 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
35289 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
35292 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
35294 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
35296 #ifdef SUBTARGET_ATTRIBUTE_TABLE
35297 SUBTARGET_ATTRIBUTE_TABLE,
35299 /* ms_abi and sysv_abi calling convention function attributes. */
35300 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
35301 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
35302 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
35304 { "callee_pop_aggregate_return", 1, 1, false, true, true,
35305 ix86_handle_callee_pop_aggregate_return, true },
35307 { NULL, 0, 0, false, false, false, NULL, false }
35310 /* Implement targetm.vectorize.builtin_vectorization_cost. */
35312 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
35313 tree vectype ATTRIBUTE_UNUSED,
35314 int misalign ATTRIBUTE_UNUSED)
35316 switch (type_of_cost)
35319 return ix86_cost->scalar_stmt_cost;
35322 return ix86_cost->scalar_load_cost;
35325 return ix86_cost->scalar_store_cost;
35328 return ix86_cost->vec_stmt_cost;
35331 return ix86_cost->vec_align_load_cost;
35334 return ix86_cost->vec_store_cost;
35336 case vec_to_scalar:
35337 return ix86_cost->vec_to_scalar_cost;
35339 case scalar_to_vec:
35340 return ix86_cost->scalar_to_vec_cost;
35342 case unaligned_load:
35343 case unaligned_store:
35344 return ix86_cost->vec_unalign_load_cost;
35346 case cond_branch_taken:
35347 return ix86_cost->cond_taken_branch_cost;
35349 case cond_branch_not_taken:
35350 return ix86_cost->cond_not_taken_branch_cost;
35353 case vec_promote_demote:
35354 return ix86_cost->vec_stmt_cost;
35357 gcc_unreachable ();
35361 /* Construct (set target (vec_select op0 (parallel perm))) and
35362 return true if that's a valid instruction in the active ISA. */
35365 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
35367 rtx rperm[MAX_VECT_LEN], x;
35370 for (i = 0; i < nelt; ++i)
35371 rperm[i] = GEN_INT (perm[i]);
35373 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
35374 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
35375 x = gen_rtx_SET (VOIDmode, target, x);
35378 if (recog_memoized (x) < 0)
35386 /* Similar, but generate a vec_concat from op0 and op1 as well. */
35389 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
35390 const unsigned char *perm, unsigned nelt)
35392 enum machine_mode v2mode;
35395 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
35396 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
35397 return expand_vselect (target, x, perm, nelt);
35400 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35401 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
35404 expand_vec_perm_blend (struct expand_vec_perm_d *d)
35406 enum machine_mode vmode = d->vmode;
35407 unsigned i, mask, nelt = d->nelt;
35408 rtx target, op0, op1, x;
35409 rtx rperm[32], vperm;
35411 if (d->op0 == d->op1)
35413 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
35415 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
35417 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
35422 /* This is a blend, not a permute. Elements must stay in their
35423 respective lanes. */
35424 for (i = 0; i < nelt; ++i)
35426 unsigned e = d->perm[i];
35427 if (!(e == i || e == i + nelt))
35434 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
35435 decision should be extracted elsewhere, so that we only try that
35436 sequence once all budget==3 options have been tried. */
35437 target = d->target;
35450 for (i = 0; i < nelt; ++i)
35451 mask |= (d->perm[i] >= nelt) << i;
35455 for (i = 0; i < 2; ++i)
35456 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
35461 for (i = 0; i < 4; ++i)
35462 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
35467 /* See if bytes move in pairs so we can use pblendw with
35468 an immediate argument, rather than pblendvb with a vector
35470 for (i = 0; i < 16; i += 2)
35471 if (d->perm[i] + 1 != d->perm[i + 1])
35474 for (i = 0; i < nelt; ++i)
35475 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
35478 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
35479 vperm = force_reg (vmode, vperm);
35481 if (GET_MODE_SIZE (vmode) == 16)
35482 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
35484 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
35488 for (i = 0; i < 8; ++i)
35489 mask |= (d->perm[i * 2] >= 16) << i;
35494 target = gen_lowpart (vmode, target);
35495 op0 = gen_lowpart (vmode, op0);
35496 op1 = gen_lowpart (vmode, op1);
35500 /* See if bytes move in pairs. If not, vpblendvb must be used. */
35501 for (i = 0; i < 32; i += 2)
35502 if (d->perm[i] + 1 != d->perm[i + 1])
35504 /* See if bytes move in quadruplets. If yes, vpblendd
35505 with immediate can be used. */
35506 for (i = 0; i < 32; i += 4)
35507 if (d->perm[i] + 2 != d->perm[i + 2])
35511 /* See if bytes move the same in both lanes. If yes,
35512 vpblendw with immediate can be used. */
35513 for (i = 0; i < 16; i += 2)
35514 if (d->perm[i] + 16 != d->perm[i + 16])
35517 /* Use vpblendw. */
35518 for (i = 0; i < 16; ++i)
35519 mask |= (d->perm[i * 2] >= 32) << i;
35524 /* Use vpblendd. */
35525 for (i = 0; i < 8; ++i)
35526 mask |= (d->perm[i * 4] >= 32) << i;
35531 /* See if words move in pairs. If yes, vpblendd can be used. */
35532 for (i = 0; i < 16; i += 2)
35533 if (d->perm[i] + 1 != d->perm[i + 1])
35537 /* See if words move the same in both lanes. If not,
35538 vpblendvb must be used. */
35539 for (i = 0; i < 8; i++)
35540 if (d->perm[i] + 8 != d->perm[i + 8])
35542 /* Use vpblendvb. */
35543 for (i = 0; i < 32; ++i)
35544 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
35548 target = gen_lowpart (vmode, target);
35549 op0 = gen_lowpart (vmode, op0);
35550 op1 = gen_lowpart (vmode, op1);
35551 goto finish_pblendvb;
35554 /* Use vpblendw. */
35555 for (i = 0; i < 16; ++i)
35556 mask |= (d->perm[i] >= 16) << i;
35560 /* Use vpblendd. */
35561 for (i = 0; i < 8; ++i)
35562 mask |= (d->perm[i * 2] >= 16) << i;
35567 /* Use vpblendd. */
35568 for (i = 0; i < 4; ++i)
35569 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
35574 gcc_unreachable ();
35577 /* This matches five different patterns with the different modes. */
35578 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
35579 x = gen_rtx_SET (VOIDmode, target, x);
35585 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35586 in terms of the variable form of vpermilps.
35588 Note that we will have already failed the immediate input vpermilps,
35589 which requires that the high and low part shuffle be identical; the
35590 variable form doesn't require that. */
35593 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
35595 rtx rperm[8], vperm;
35598 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
35601 /* We can only permute within the 128-bit lane. */
35602 for (i = 0; i < 8; ++i)
35604 unsigned e = d->perm[i];
35605 if (i < 4 ? e >= 4 : e < 4)
35612 for (i = 0; i < 8; ++i)
35614 unsigned e = d->perm[i];
35616 /* Within each 128-bit lane, the elements of op0 are numbered
35617 from 0 and the elements of op1 are numbered from 4. */
35623 rperm[i] = GEN_INT (e);
35626 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
35627 vperm = force_reg (V8SImode, vperm);
35628 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
35633 /* Return true if permutation D can be performed as VMODE permutation
35637 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
35639 unsigned int i, j, chunk;
35641 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
35642 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
35643 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
35646 if (GET_MODE_NUNITS (vmode) >= d->nelt)
35649 chunk = d->nelt / GET_MODE_NUNITS (vmode);
35650 for (i = 0; i < d->nelt; i += chunk)
35651 if (d->perm[i] & (chunk - 1))
35654 for (j = 1; j < chunk; ++j)
35655 if (d->perm[i] + j != d->perm[i + j])
35661 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35662 in terms of pshufb, vpperm, vpermq, vpermd or vperm2i128. */
35665 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
35667 unsigned i, nelt, eltsz, mask;
35668 unsigned char perm[32];
35669 enum machine_mode vmode = V16QImode;
35670 rtx rperm[32], vperm, target, op0, op1;
35674 if (d->op0 != d->op1)
35676 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
35679 && valid_perm_using_mode_p (V2TImode, d))
35684 /* Use vperm2i128 insn. The pattern uses
35685 V4DImode instead of V2TImode. */
35686 target = gen_lowpart (V4DImode, d->target);
35687 op0 = gen_lowpart (V4DImode, d->op0);
35688 op1 = gen_lowpart (V4DImode, d->op1);
35690 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
35691 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
35692 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
35700 if (GET_MODE_SIZE (d->vmode) == 16)
35705 else if (GET_MODE_SIZE (d->vmode) == 32)
35710 /* V4DImode should be already handled through
35711 expand_vselect by vpermq instruction. */
35712 gcc_assert (d->vmode != V4DImode);
35715 if (d->vmode == V8SImode
35716 || d->vmode == V16HImode
35717 || d->vmode == V32QImode)
35719 /* First see if vpermq can be used for
35720 V8SImode/V16HImode/V32QImode. */
35721 if (valid_perm_using_mode_p (V4DImode, d))
35723 for (i = 0; i < 4; i++)
35724 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
35727 return expand_vselect (gen_lowpart (V4DImode, d->target),
35728 gen_lowpart (V4DImode, d->op0),
35732 /* Next see if vpermd can be used. */
35733 if (valid_perm_using_mode_p (V8SImode, d))
35737 if (vmode == V32QImode)
35739 /* vpshufb only works intra lanes, it is not
35740 possible to shuffle bytes in between the lanes. */
35741 for (i = 0; i < nelt; ++i)
35742 if ((d->perm[i] ^ i) & (nelt / 2))
35753 if (vmode == V8SImode)
35754 for (i = 0; i < 8; ++i)
35755 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
35758 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
35759 if (d->op0 != d->op1)
35760 mask = 2 * nelt - 1;
35761 else if (vmode == V16QImode)
35764 mask = nelt / 2 - 1;
35766 for (i = 0; i < nelt; ++i)
35768 unsigned j, e = d->perm[i] & mask;
35769 for (j = 0; j < eltsz; ++j)
35770 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
35774 vperm = gen_rtx_CONST_VECTOR (vmode,
35775 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
35776 vperm = force_reg (vmode, vperm);
35778 target = gen_lowpart (vmode, d->target);
35779 op0 = gen_lowpart (vmode, d->op0);
35780 if (d->op0 == d->op1)
35782 if (vmode == V16QImode)
35783 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
35784 else if (vmode == V32QImode)
35785 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
35787 emit_insn (gen_avx2_permvarv8si (target, vperm, op0));
35791 op1 = gen_lowpart (vmode, d->op1);
35792 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
35798 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
35799 in a single instruction. */
35802 expand_vec_perm_1 (struct expand_vec_perm_d *d)
35804 unsigned i, nelt = d->nelt;
35805 unsigned char perm2[MAX_VECT_LEN];
35807 /* Check plain VEC_SELECT first, because AVX has instructions that could
35808 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
35809 input where SEL+CONCAT may not. */
35810 if (d->op0 == d->op1)
35812 int mask = nelt - 1;
35813 bool identity_perm = true;
35814 bool broadcast_perm = true;
35816 for (i = 0; i < nelt; i++)
35818 perm2[i] = d->perm[i] & mask;
35820 identity_perm = false;
35822 broadcast_perm = false;
35828 emit_move_insn (d->target, d->op0);
35831 else if (broadcast_perm && TARGET_AVX2)
35833 /* Use vpbroadcast{b,w,d}. */
35834 rtx op = d->op0, (*gen) (rtx, rtx) = NULL;
35838 op = gen_lowpart (V16QImode, op);
35839 gen = gen_avx2_pbroadcastv32qi;
35842 op = gen_lowpart (V8HImode, op);
35843 gen = gen_avx2_pbroadcastv16hi;
35846 op = gen_lowpart (V4SImode, op);
35847 gen = gen_avx2_pbroadcastv8si;
35850 gen = gen_avx2_pbroadcastv16qi;
35853 gen = gen_avx2_pbroadcastv8hi;
35855 /* For other modes prefer other shuffles this function creates. */
35861 emit_insn (gen (d->target, op));
35866 if (expand_vselect (d->target, d->op0, perm2, nelt))
35869 /* There are plenty of patterns in sse.md that are written for
35870 SEL+CONCAT and are not replicated for a single op. Perhaps
35871 that should be changed, to avoid the nastiness here. */
35873 /* Recognize interleave style patterns, which means incrementing
35874 every other permutation operand. */
35875 for (i = 0; i < nelt; i += 2)
35877 perm2[i] = d->perm[i] & mask;
35878 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
35880 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
35883 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
35886 for (i = 0; i < nelt; i += 4)
35888 perm2[i + 0] = d->perm[i + 0] & mask;
35889 perm2[i + 1] = d->perm[i + 1] & mask;
35890 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
35891 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
35894 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
35899 /* Finally, try the fully general two operand permute. */
35900 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
35903 /* Recognize interleave style patterns with reversed operands. */
35904 if (d->op0 != d->op1)
35906 for (i = 0; i < nelt; ++i)
35908 unsigned e = d->perm[i];
35916 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
35920 /* Try the SSE4.1 blend variable merge instructions. */
35921 if (expand_vec_perm_blend (d))
35924 /* Try one of the AVX vpermil variable permutations. */
35925 if (expand_vec_perm_vpermil (d))
35928 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
35929 vpshufb, vpermd or vpermq variable permutation. */
35930 if (expand_vec_perm_pshufb (d))
35936 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35937 in terms of a pair of pshuflw + pshufhw instructions. */
35940 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
35942 unsigned char perm2[MAX_VECT_LEN];
35946 if (d->vmode != V8HImode || d->op0 != d->op1)
35949 /* The two permutations only operate in 64-bit lanes. */
35950 for (i = 0; i < 4; ++i)
35951 if (d->perm[i] >= 4)
35953 for (i = 4; i < 8; ++i)
35954 if (d->perm[i] < 4)
35960 /* Emit the pshuflw. */
35961 memcpy (perm2, d->perm, 4);
35962 for (i = 4; i < 8; ++i)
35964 ok = expand_vselect (d->target, d->op0, perm2, 8);
35967 /* Emit the pshufhw. */
35968 memcpy (perm2 + 4, d->perm + 4, 4);
35969 for (i = 0; i < 4; ++i)
35971 ok = expand_vselect (d->target, d->target, perm2, 8);
35977 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
35978 the permutation using the SSSE3 palignr instruction. This succeeds
35979 when all of the elements in PERM fit within one vector and we merely
35980 need to shift them down so that a single vector permutation has a
35981 chance to succeed. */
35984 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
35986 unsigned i, nelt = d->nelt;
35991 /* Even with AVX, palignr only operates on 128-bit vectors. */
35992 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
35995 min = nelt, max = 0;
35996 for (i = 0; i < nelt; ++i)
35998 unsigned e = d->perm[i];
36004 if (min == 0 || max - min >= nelt)
36007 /* Given that we have SSSE3, we know we'll be able to implement the
36008 single operand permutation after the palignr with pshufb. */
36012 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
36013 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
36014 gen_lowpart (TImode, d->op1),
36015 gen_lowpart (TImode, d->op0), shift));
36017 d->op0 = d->op1 = d->target;
36020 for (i = 0; i < nelt; ++i)
36022 unsigned e = d->perm[i] - min;
36028 /* Test for the degenerate case where the alignment by itself
36029 produces the desired permutation. */
36033 ok = expand_vec_perm_1 (d);
36039 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
36041 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36042 a two vector permutation into a single vector permutation by using
36043 an interleave operation to merge the vectors. */
36046 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
36048 struct expand_vec_perm_d dremap, dfinal;
36049 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
36050 unsigned HOST_WIDE_INT contents;
36051 unsigned char remap[2 * MAX_VECT_LEN];
36053 bool ok, same_halves = false;
36055 if (GET_MODE_SIZE (d->vmode) == 16)
36057 if (d->op0 == d->op1)
36060 else if (GET_MODE_SIZE (d->vmode) == 32)
36064 /* For 32-byte modes allow even d->op0 == d->op1.
36065 The lack of cross-lane shuffling in some instructions
36066 might prevent a single insn shuffle. */
36068 dfinal.testing_p = true;
36069 /* If expand_vec_perm_interleave3 can expand this into
36070 a 3 insn sequence, give up and let it be expanded as
36071 3 insn sequence. While that is one insn longer,
36072 it doesn't need a memory operand and in the common
36073 case that both interleave low and high permutations
36074 with the same operands are adjacent needs 4 insns
36075 for both after CSE. */
36076 if (expand_vec_perm_interleave3 (&dfinal))
36082 /* Examine from whence the elements come. */
36084 for (i = 0; i < nelt; ++i)
36085 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
36087 memset (remap, 0xff, sizeof (remap));
36090 if (GET_MODE_SIZE (d->vmode) == 16)
36092 unsigned HOST_WIDE_INT h1, h2, h3, h4;
36094 /* Split the two input vectors into 4 halves. */
36095 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
36100 /* If the elements from the low halves use interleave low, and similarly
36101 for interleave high. If the elements are from mis-matched halves, we
36102 can use shufps for V4SF/V4SI or do a DImode shuffle. */
36103 if ((contents & (h1 | h3)) == contents)
36106 for (i = 0; i < nelt2; ++i)
36109 remap[i + nelt] = i * 2 + 1;
36110 dremap.perm[i * 2] = i;
36111 dremap.perm[i * 2 + 1] = i + nelt;
36113 if (!TARGET_SSE2 && d->vmode == V4SImode)
36114 dremap.vmode = V4SFmode;
36116 else if ((contents & (h2 | h4)) == contents)
36119 for (i = 0; i < nelt2; ++i)
36121 remap[i + nelt2] = i * 2;
36122 remap[i + nelt + nelt2] = i * 2 + 1;
36123 dremap.perm[i * 2] = i + nelt2;
36124 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
36126 if (!TARGET_SSE2 && d->vmode == V4SImode)
36127 dremap.vmode = V4SFmode;
36129 else if ((contents & (h1 | h4)) == contents)
36132 for (i = 0; i < nelt2; ++i)
36135 remap[i + nelt + nelt2] = i + nelt2;
36136 dremap.perm[i] = i;
36137 dremap.perm[i + nelt2] = i + nelt + nelt2;
36142 dremap.vmode = V2DImode;
36144 dremap.perm[0] = 0;
36145 dremap.perm[1] = 3;
36148 else if ((contents & (h2 | h3)) == contents)
36151 for (i = 0; i < nelt2; ++i)
36153 remap[i + nelt2] = i;
36154 remap[i + nelt] = i + nelt2;
36155 dremap.perm[i] = i + nelt2;
36156 dremap.perm[i + nelt2] = i + nelt;
36161 dremap.vmode = V2DImode;
36163 dremap.perm[0] = 1;
36164 dremap.perm[1] = 2;
36172 unsigned int nelt4 = nelt / 4, nzcnt = 0;
36173 unsigned HOST_WIDE_INT q[8];
36174 unsigned int nonzero_halves[4];
36176 /* Split the two input vectors into 8 quarters. */
36177 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
36178 for (i = 1; i < 8; ++i)
36179 q[i] = q[0] << (nelt4 * i);
36180 for (i = 0; i < 4; ++i)
36181 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
36183 nonzero_halves[nzcnt] = i;
36189 gcc_assert (d->op0 == d->op1);
36190 nonzero_halves[1] = nonzero_halves[0];
36191 same_halves = true;
36193 else if (d->op0 == d->op1)
36195 gcc_assert (nonzero_halves[0] == 0);
36196 gcc_assert (nonzero_halves[1] == 1);
36201 if (d->perm[0] / nelt2 == nonzero_halves[1])
36203 /* Attempt to increase the likelyhood that dfinal
36204 shuffle will be intra-lane. */
36205 char tmph = nonzero_halves[0];
36206 nonzero_halves[0] = nonzero_halves[1];
36207 nonzero_halves[1] = tmph;
36210 /* vperm2f128 or vperm2i128. */
36211 for (i = 0; i < nelt2; ++i)
36213 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
36214 remap[i + nonzero_halves[0] * nelt2] = i;
36215 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
36216 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
36219 if (d->vmode != V8SFmode
36220 && d->vmode != V4DFmode
36221 && d->vmode != V8SImode)
36223 dremap.vmode = V8SImode;
36225 for (i = 0; i < 4; ++i)
36227 dremap.perm[i] = i + nonzero_halves[0] * 4;
36228 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
36232 else if (d->op0 == d->op1)
36234 else if (TARGET_AVX2
36235 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
36238 for (i = 0; i < nelt4; ++i)
36241 remap[i + nelt] = i * 2 + 1;
36242 remap[i + nelt2] = i * 2 + nelt2;
36243 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
36244 dremap.perm[i * 2] = i;
36245 dremap.perm[i * 2 + 1] = i + nelt;
36246 dremap.perm[i * 2 + nelt2] = i + nelt2;
36247 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
36250 else if (TARGET_AVX2
36251 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
36254 for (i = 0; i < nelt4; ++i)
36256 remap[i + nelt4] = i * 2;
36257 remap[i + nelt + nelt4] = i * 2 + 1;
36258 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
36259 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
36260 dremap.perm[i * 2] = i + nelt4;
36261 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
36262 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
36263 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
36270 /* Use the remapping array set up above to move the elements from their
36271 swizzled locations into their final destinations. */
36273 for (i = 0; i < nelt; ++i)
36275 unsigned e = remap[d->perm[i]];
36276 gcc_assert (e < nelt);
36277 /* If same_halves is true, both halves of the remapped vector are the
36278 same. Avoid cross-lane accesses if possible. */
36279 if (same_halves && i >= nelt2)
36281 gcc_assert (e < nelt2);
36282 dfinal.perm[i] = e + nelt2;
36285 dfinal.perm[i] = e;
36287 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
36288 dfinal.op1 = dfinal.op0;
36289 dremap.target = dfinal.op0;
36291 /* Test if the final remap can be done with a single insn. For V4SFmode or
36292 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
36294 ok = expand_vec_perm_1 (&dfinal);
36295 seq = get_insns ();
36304 if (dremap.vmode != dfinal.vmode)
36306 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
36307 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
36308 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
36311 ok = expand_vec_perm_1 (&dremap);
36318 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36319 a single vector cross-lane permutation into vpermq followed
36320 by any of the single insn permutations. */
36323 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
36325 struct expand_vec_perm_d dremap, dfinal;
36326 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
36327 unsigned contents[2];
36331 && (d->vmode == V32QImode || d->vmode == V16HImode)
36332 && d->op0 == d->op1))
36337 for (i = 0; i < nelt2; ++i)
36339 contents[0] |= 1u << (d->perm[i] / nelt4);
36340 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
36343 for (i = 0; i < 2; ++i)
36345 unsigned int cnt = 0;
36346 for (j = 0; j < 4; ++j)
36347 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
36355 dremap.vmode = V4DImode;
36357 dremap.target = gen_reg_rtx (V4DImode);
36358 dremap.op0 = gen_lowpart (V4DImode, d->op0);
36359 dremap.op1 = dremap.op0;
36360 for (i = 0; i < 2; ++i)
36362 unsigned int cnt = 0;
36363 for (j = 0; j < 4; ++j)
36364 if ((contents[i] & (1u << j)) != 0)
36365 dremap.perm[2 * i + cnt++] = j;
36366 for (; cnt < 2; ++cnt)
36367 dremap.perm[2 * i + cnt] = 0;
36371 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
36372 dfinal.op1 = dfinal.op0;
36373 for (i = 0, j = 0; i < nelt; ++i)
36377 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
36378 if ((d->perm[i] / nelt4) == dremap.perm[j])
36380 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
36381 dfinal.perm[i] |= nelt4;
36383 gcc_unreachable ();
36386 ok = expand_vec_perm_1 (&dremap);
36389 ok = expand_vec_perm_1 (&dfinal);
36395 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36396 a two vector permutation using 2 intra-lane interleave insns
36397 and cross-lane shuffle for 32-byte vectors. */
36400 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
36403 rtx (*gen) (rtx, rtx, rtx);
36405 if (d->op0 == d->op1)
36407 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
36409 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
36415 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
36417 for (i = 0; i < nelt; i += 2)
36418 if (d->perm[i] != d->perm[0] + i / 2
36419 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
36429 gen = gen_vec_interleave_highv32qi;
36431 gen = gen_vec_interleave_lowv32qi;
36435 gen = gen_vec_interleave_highv16hi;
36437 gen = gen_vec_interleave_lowv16hi;
36441 gen = gen_vec_interleave_highv8si;
36443 gen = gen_vec_interleave_lowv8si;
36447 gen = gen_vec_interleave_highv4di;
36449 gen = gen_vec_interleave_lowv4di;
36453 gen = gen_vec_interleave_highv8sf;
36455 gen = gen_vec_interleave_lowv8sf;
36459 gen = gen_vec_interleave_highv4df;
36461 gen = gen_vec_interleave_lowv4df;
36464 gcc_unreachable ();
36467 emit_insn (gen (d->target, d->op0, d->op1));
36471 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
36472 permutation with two pshufb insns and an ior. We should have already
36473 failed all two instruction sequences. */
36476 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
36478 rtx rperm[2][16], vperm, l, h, op, m128;
36479 unsigned int i, nelt, eltsz;
36481 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
36483 gcc_assert (d->op0 != d->op1);
36486 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36488 /* Generate two permutation masks. If the required element is within
36489 the given vector it is shuffled into the proper lane. If the required
36490 element is in the other vector, force a zero into the lane by setting
36491 bit 7 in the permutation mask. */
36492 m128 = GEN_INT (-128);
36493 for (i = 0; i < nelt; ++i)
36495 unsigned j, e = d->perm[i];
36496 unsigned which = (e >= nelt);
36500 for (j = 0; j < eltsz; ++j)
36502 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
36503 rperm[1-which][i*eltsz + j] = m128;
36507 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
36508 vperm = force_reg (V16QImode, vperm);
36510 l = gen_reg_rtx (V16QImode);
36511 op = gen_lowpart (V16QImode, d->op0);
36512 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
36514 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
36515 vperm = force_reg (V16QImode, vperm);
36517 h = gen_reg_rtx (V16QImode);
36518 op = gen_lowpart (V16QImode, d->op1);
36519 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
36521 op = gen_lowpart (V16QImode, d->target);
36522 emit_insn (gen_iorv16qi3 (op, l, h));
36527 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
36528 with two vpshufb insns, vpermq and vpor. We should have already failed
36529 all two or three instruction sequences. */
36532 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
36534 rtx rperm[2][32], vperm, l, h, hp, op, m128;
36535 unsigned int i, nelt, eltsz;
36538 || d->op0 != d->op1
36539 || (d->vmode != V32QImode && d->vmode != V16HImode))
36546 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36548 /* Generate two permutation masks. If the required element is within
36549 the same lane, it is shuffled in. If the required element from the
36550 other lane, force a zero by setting bit 7 in the permutation mask.
36551 In the other mask the mask has non-negative elements if element
36552 is requested from the other lane, but also moved to the other lane,
36553 so that the result of vpshufb can have the two V2TImode halves
36555 m128 = GEN_INT (-128);
36556 for (i = 0; i < nelt; ++i)
36558 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
36559 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
36561 for (j = 0; j < eltsz; ++j)
36563 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
36564 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
36568 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
36569 vperm = force_reg (V32QImode, vperm);
36571 h = gen_reg_rtx (V32QImode);
36572 op = gen_lowpart (V32QImode, d->op0);
36573 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
36575 /* Swap the 128-byte lanes of h into hp. */
36576 hp = gen_reg_rtx (V4DImode);
36577 op = gen_lowpart (V4DImode, h);
36578 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
36581 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
36582 vperm = force_reg (V32QImode, vperm);
36584 l = gen_reg_rtx (V32QImode);
36585 op = gen_lowpart (V32QImode, d->op0);
36586 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
36588 op = gen_lowpart (V32QImode, d->target);
36589 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
36594 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
36595 and extract-odd permutations of two V32QImode and V16QImode operand
36596 with two vpshufb insns, vpor and vpermq. We should have already
36597 failed all two or three instruction sequences. */
36600 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
36602 rtx rperm[2][32], vperm, l, h, ior, op, m128;
36603 unsigned int i, nelt, eltsz;
36606 || d->op0 == d->op1
36607 || (d->vmode != V32QImode && d->vmode != V16HImode))
36610 for (i = 0; i < d->nelt; ++i)
36611 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
36618 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36620 /* Generate two permutation masks. In the first permutation mask
36621 the first quarter will contain indexes for the first half
36622 of the op0, the second quarter will contain bit 7 set, third quarter
36623 will contain indexes for the second half of the op0 and the
36624 last quarter bit 7 set. In the second permutation mask
36625 the first quarter will contain bit 7 set, the second quarter
36626 indexes for the first half of the op1, the third quarter bit 7 set
36627 and last quarter indexes for the second half of the op1.
36628 I.e. the first mask e.g. for V32QImode extract even will be:
36629 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
36630 (all values masked with 0xf except for -128) and second mask
36631 for extract even will be
36632 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
36633 m128 = GEN_INT (-128);
36634 for (i = 0; i < nelt; ++i)
36636 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
36637 unsigned which = d->perm[i] >= nelt;
36638 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
36640 for (j = 0; j < eltsz; ++j)
36642 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
36643 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
36647 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
36648 vperm = force_reg (V32QImode, vperm);
36650 l = gen_reg_rtx (V32QImode);
36651 op = gen_lowpart (V32QImode, d->op0);
36652 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
36654 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
36655 vperm = force_reg (V32QImode, vperm);
36657 h = gen_reg_rtx (V32QImode);
36658 op = gen_lowpart (V32QImode, d->op1);
36659 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
36661 ior = gen_reg_rtx (V32QImode);
36662 emit_insn (gen_iorv32qi3 (ior, l, h));
36664 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
36665 op = gen_lowpart (V4DImode, d->target);
36666 ior = gen_lowpart (V4DImode, ior);
36667 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
36668 const1_rtx, GEN_INT (3)));
36673 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
36674 and extract-odd permutations. */
36677 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
36684 t1 = gen_reg_rtx (V4DFmode);
36685 t2 = gen_reg_rtx (V4DFmode);
36687 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
36688 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
36689 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
36691 /* Now an unpck[lh]pd will produce the result required. */
36693 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
36695 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
36701 int mask = odd ? 0xdd : 0x88;
36703 t1 = gen_reg_rtx (V8SFmode);
36704 t2 = gen_reg_rtx (V8SFmode);
36705 t3 = gen_reg_rtx (V8SFmode);
36707 /* Shuffle within the 128-bit lanes to produce:
36708 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
36709 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
36712 /* Shuffle the lanes around to produce:
36713 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
36714 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
36717 /* Shuffle within the 128-bit lanes to produce:
36718 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
36719 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
36721 /* Shuffle within the 128-bit lanes to produce:
36722 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
36723 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
36725 /* Shuffle the lanes around to produce:
36726 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
36727 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
36736 /* These are always directly implementable by expand_vec_perm_1. */
36737 gcc_unreachable ();
36741 return expand_vec_perm_pshufb2 (d);
36744 /* We need 2*log2(N)-1 operations to achieve odd/even
36745 with interleave. */
36746 t1 = gen_reg_rtx (V8HImode);
36747 t2 = gen_reg_rtx (V8HImode);
36748 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
36749 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
36750 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
36751 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
36753 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
36755 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
36762 return expand_vec_perm_pshufb2 (d);
36765 t1 = gen_reg_rtx (V16QImode);
36766 t2 = gen_reg_rtx (V16QImode);
36767 t3 = gen_reg_rtx (V16QImode);
36768 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
36769 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
36770 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
36771 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
36772 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
36773 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
36775 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
36777 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
36784 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
36789 struct expand_vec_perm_d d_copy = *d;
36790 d_copy.vmode = V4DFmode;
36791 d_copy.target = gen_lowpart (V4DFmode, d->target);
36792 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
36793 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
36794 return expand_vec_perm_even_odd_1 (&d_copy, odd);
36797 t1 = gen_reg_rtx (V4DImode);
36798 t2 = gen_reg_rtx (V4DImode);
36800 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
36801 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
36802 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
36804 /* Now an vpunpck[lh]qdq will produce the result required. */
36806 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
36808 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
36815 struct expand_vec_perm_d d_copy = *d;
36816 d_copy.vmode = V8SFmode;
36817 d_copy.target = gen_lowpart (V8SFmode, d->target);
36818 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
36819 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
36820 return expand_vec_perm_even_odd_1 (&d_copy, odd);
36823 t1 = gen_reg_rtx (V8SImode);
36824 t2 = gen_reg_rtx (V8SImode);
36826 /* Shuffle the lanes around into
36827 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
36828 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t1),
36829 gen_lowpart (V4DImode, d->op0),
36830 gen_lowpart (V4DImode, d->op1),
36832 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t2),
36833 gen_lowpart (V4DImode, d->op0),
36834 gen_lowpart (V4DImode, d->op1),
36837 /* Swap the 2nd and 3rd position in each lane into
36838 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
36839 emit_insn (gen_avx2_pshufdv3 (t1, t1,
36840 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
36841 emit_insn (gen_avx2_pshufdv3 (t2, t2,
36842 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
36844 /* Now an vpunpck[lh]qdq will produce
36845 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
36847 t3 = gen_avx2_interleave_highv4di (gen_lowpart (V4DImode, d->target),
36848 gen_lowpart (V4DImode, t1),
36849 gen_lowpart (V4DImode, t2));
36851 t3 = gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode, d->target),
36852 gen_lowpart (V4DImode, t1),
36853 gen_lowpart (V4DImode, t2));
36858 gcc_unreachable ();
36864 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
36865 extract-even and extract-odd permutations. */
36868 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
36870 unsigned i, odd, nelt = d->nelt;
36873 if (odd != 0 && odd != 1)
36876 for (i = 1; i < nelt; ++i)
36877 if (d->perm[i] != 2 * i + odd)
36880 return expand_vec_perm_even_odd_1 (d, odd);
36883 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
36884 permutations. We assume that expand_vec_perm_1 has already failed. */
36887 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
36889 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
36890 enum machine_mode vmode = d->vmode;
36891 unsigned char perm2[4];
36899 /* These are special-cased in sse.md so that we can optionally
36900 use the vbroadcast instruction. They expand to two insns
36901 if the input happens to be in a register. */
36902 gcc_unreachable ();
36908 /* These are always implementable using standard shuffle patterns. */
36909 gcc_unreachable ();
36913 /* These can be implemented via interleave. We save one insn by
36914 stopping once we have promoted to V4SImode and then use pshufd. */
36918 rtx (*gen) (rtx, rtx, rtx)
36919 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
36920 : gen_vec_interleave_lowv8hi;
36924 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
36925 : gen_vec_interleave_highv8hi;
36930 dest = gen_reg_rtx (vmode);
36931 emit_insn (gen (dest, op0, op0));
36932 vmode = get_mode_wider_vector (vmode);
36933 op0 = gen_lowpart (vmode, dest);
36935 while (vmode != V4SImode);
36937 memset (perm2, elt, 4);
36938 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
36946 /* For AVX2 broadcasts of the first element vpbroadcast* or
36947 vpermq should be used by expand_vec_perm_1. */
36948 gcc_assert (!TARGET_AVX2 || d->perm[0]);
36952 gcc_unreachable ();
36956 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
36957 broadcast permutations. */
36960 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
36962 unsigned i, elt, nelt = d->nelt;
36964 if (d->op0 != d->op1)
36968 for (i = 1; i < nelt; ++i)
36969 if (d->perm[i] != elt)
36972 return expand_vec_perm_broadcast_1 (d);
36975 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
36976 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
36977 all the shorter instruction sequences. */
36980 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
36982 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
36983 unsigned int i, nelt, eltsz;
36987 || d->op0 == d->op1
36988 || (d->vmode != V32QImode && d->vmode != V16HImode))
36995 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36997 /* Generate 4 permutation masks. If the required element is within
36998 the same lane, it is shuffled in. If the required element from the
36999 other lane, force a zero by setting bit 7 in the permutation mask.
37000 In the other mask the mask has non-negative elements if element
37001 is requested from the other lane, but also moved to the other lane,
37002 so that the result of vpshufb can have the two V2TImode halves
37004 m128 = GEN_INT (-128);
37005 for (i = 0; i < 32; ++i)
37007 rperm[0][i] = m128;
37008 rperm[1][i] = m128;
37009 rperm[2][i] = m128;
37010 rperm[3][i] = m128;
37016 for (i = 0; i < nelt; ++i)
37018 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
37019 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
37020 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
37022 for (j = 0; j < eltsz; ++j)
37023 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
37024 used[which] = true;
37027 for (i = 0; i < 2; ++i)
37029 if (!used[2 * i + 1])
37034 vperm = gen_rtx_CONST_VECTOR (V32QImode,
37035 gen_rtvec_v (32, rperm[2 * i + 1]));
37036 vperm = force_reg (V32QImode, vperm);
37037 h[i] = gen_reg_rtx (V32QImode);
37038 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
37039 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
37042 /* Swap the 128-byte lanes of h[X]. */
37043 for (i = 0; i < 2; ++i)
37045 if (h[i] == NULL_RTX)
37047 op = gen_reg_rtx (V4DImode);
37048 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
37049 const2_rtx, GEN_INT (3), const0_rtx,
37051 h[i] = gen_lowpart (V32QImode, op);
37054 for (i = 0; i < 2; ++i)
37061 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
37062 vperm = force_reg (V32QImode, vperm);
37063 l[i] = gen_reg_rtx (V32QImode);
37064 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
37065 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
37068 for (i = 0; i < 2; ++i)
37072 op = gen_reg_rtx (V32QImode);
37073 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
37080 gcc_assert (l[0] && l[1]);
37081 op = gen_lowpart (V32QImode, d->target);
37082 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
37086 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
37087 With all of the interface bits taken care of, perform the expansion
37088 in D and return true on success. */
37091 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
37093 /* Try a single instruction expansion. */
37094 if (expand_vec_perm_1 (d))
37097 /* Try sequences of two instructions. */
37099 if (expand_vec_perm_pshuflw_pshufhw (d))
37102 if (expand_vec_perm_palignr (d))
37105 if (expand_vec_perm_interleave2 (d))
37108 if (expand_vec_perm_broadcast (d))
37111 if (expand_vec_perm_vpermq_perm_1 (d))
37114 /* Try sequences of three instructions. */
37116 if (expand_vec_perm_pshufb2 (d))
37119 if (expand_vec_perm_interleave3 (d))
37122 /* Try sequences of four instructions. */
37124 if (expand_vec_perm_vpshufb2_vpermq (d))
37127 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
37130 /* ??? Look for narrow permutations whose element orderings would
37131 allow the promotion to a wider mode. */
37133 /* ??? Look for sequences of interleave or a wider permute that place
37134 the data into the correct lanes for a half-vector shuffle like
37135 pshuf[lh]w or vpermilps. */
37137 /* ??? Look for sequences of interleave that produce the desired results.
37138 The combinatorics of punpck[lh] get pretty ugly... */
37140 if (expand_vec_perm_even_odd (d))
37143 /* Even longer sequences. */
37144 if (expand_vec_perm_vpshufb4_vpermq2 (d))
37151 ix86_expand_vec_perm_const (rtx operands[4])
37153 struct expand_vec_perm_d d;
37154 unsigned char perm[MAX_VECT_LEN];
37155 int i, nelt, which;
37158 d.target = operands[0];
37159 d.op0 = operands[1];
37160 d.op1 = operands[2];
37163 d.vmode = GET_MODE (d.target);
37164 gcc_assert (VECTOR_MODE_P (d.vmode));
37165 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37166 d.testing_p = false;
37168 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
37169 gcc_assert (XVECLEN (sel, 0) == nelt);
37170 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
37172 for (i = which = 0; i < nelt; ++i)
37174 rtx e = XVECEXP (sel, 0, i);
37175 int ei = INTVAL (e) & (2 * nelt - 1);
37177 which |= (ei < nelt ? 1 : 2);
37188 if (!rtx_equal_p (d.op0, d.op1))
37191 /* The elements of PERM do not suggest that only the first operand
37192 is used, but both operands are identical. Allow easier matching
37193 of the permutation by folding the permutation into the single
37195 for (i = 0; i < nelt; ++i)
37196 if (d.perm[i] >= nelt)
37205 for (i = 0; i < nelt; ++i)
37211 if (ix86_expand_vec_perm_const_1 (&d))
37214 /* If the mask says both arguments are needed, but they are the same,
37215 the above tried to expand with d.op0 == d.op1. If that didn't work,
37216 retry with d.op0 != d.op1 as that is what testing has been done with. */
37217 if (which == 3 && d.op0 == d.op1)
37222 memcpy (d.perm, perm, sizeof (perm));
37223 d.op1 = gen_reg_rtx (d.vmode);
37225 ok = ix86_expand_vec_perm_const_1 (&d);
37226 seq = get_insns ();
37230 emit_move_insn (d.op1, d.op0);
37239 /* Implement targetm.vectorize.vec_perm_const_ok. */
37242 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
37243 const unsigned char *sel)
37245 struct expand_vec_perm_d d;
37246 unsigned int i, nelt, which;
37250 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37251 d.testing_p = true;
37253 /* Given sufficient ISA support we can just return true here
37254 for selected vector modes. */
37255 if (GET_MODE_SIZE (d.vmode) == 16)
37257 /* All implementable with a single vpperm insn. */
37260 /* All implementable with 2 pshufb + 1 ior. */
37263 /* All implementable with shufpd or unpck[lh]pd. */
37268 /* Extract the values from the vector CST into the permutation
37270 memcpy (d.perm, sel, nelt);
37271 for (i = which = 0; i < nelt; ++i)
37273 unsigned char e = d.perm[i];
37274 gcc_assert (e < 2 * nelt);
37275 which |= (e < nelt ? 1 : 2);
37278 /* For all elements from second vector, fold the elements to first. */
37280 for (i = 0; i < nelt; ++i)
37283 /* Check whether the mask can be applied to the vector type. */
37284 one_vec = (which != 3);
37286 /* Implementable with shufps or pshufd. */
37287 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
37290 /* Otherwise we have to go through the motions and see if we can
37291 figure out how to generate the requested permutation. */
37292 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
37293 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
37295 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
37298 ret = ix86_expand_vec_perm_const_1 (&d);
37305 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
37307 struct expand_vec_perm_d d;
37313 d.vmode = GET_MODE (targ);
37314 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37315 d.testing_p = false;
37317 for (i = 0; i < nelt; ++i)
37318 d.perm[i] = i * 2 + odd;
37320 /* We'll either be able to implement the permutation directly... */
37321 if (expand_vec_perm_1 (&d))
37324 /* ... or we use the special-case patterns. */
37325 expand_vec_perm_even_odd_1 (&d, odd);
37328 /* Expand an insert into a vector register through pinsr insn.
37329 Return true if successful. */
37332 ix86_expand_pinsr (rtx *operands)
37334 rtx dst = operands[0];
37335 rtx src = operands[3];
37337 unsigned int size = INTVAL (operands[1]);
37338 unsigned int pos = INTVAL (operands[2]);
37340 if (GET_CODE (dst) == SUBREG)
37342 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
37343 dst = SUBREG_REG (dst);
37346 if (GET_CODE (src) == SUBREG)
37347 src = SUBREG_REG (src);
37349 switch (GET_MODE (dst))
37356 enum machine_mode srcmode, dstmode;
37357 rtx (*pinsr)(rtx, rtx, rtx, rtx);
37359 srcmode = mode_for_size (size, MODE_INT, 0);
37364 if (!TARGET_SSE4_1)
37366 dstmode = V16QImode;
37367 pinsr = gen_sse4_1_pinsrb;
37373 dstmode = V8HImode;
37374 pinsr = gen_sse2_pinsrw;
37378 if (!TARGET_SSE4_1)
37380 dstmode = V4SImode;
37381 pinsr = gen_sse4_1_pinsrd;
37385 gcc_assert (TARGET_64BIT);
37386 if (!TARGET_SSE4_1)
37388 dstmode = V2DImode;
37389 pinsr = gen_sse4_1_pinsrq;
37396 dst = gen_lowpart (dstmode, dst);
37397 src = gen_lowpart (srcmode, src);
37401 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
37410 /* This function returns the calling abi specific va_list type node.
37411 It returns the FNDECL specific va_list type. */
37414 ix86_fn_abi_va_list (tree fndecl)
37417 return va_list_type_node;
37418 gcc_assert (fndecl != NULL_TREE);
37420 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
37421 return ms_va_list_type_node;
37423 return sysv_va_list_type_node;
37426 /* Returns the canonical va_list type specified by TYPE. If there
37427 is no valid TYPE provided, it return NULL_TREE. */
37430 ix86_canonical_va_list_type (tree type)
37434 /* Resolve references and pointers to va_list type. */
37435 if (TREE_CODE (type) == MEM_REF)
37436 type = TREE_TYPE (type);
37437 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
37438 type = TREE_TYPE (type);
37439 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
37440 type = TREE_TYPE (type);
37442 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
37444 wtype = va_list_type_node;
37445 gcc_assert (wtype != NULL_TREE);
37447 if (TREE_CODE (wtype) == ARRAY_TYPE)
37449 /* If va_list is an array type, the argument may have decayed
37450 to a pointer type, e.g. by being passed to another function.
37451 In that case, unwrap both types so that we can compare the
37452 underlying records. */
37453 if (TREE_CODE (htype) == ARRAY_TYPE
37454 || POINTER_TYPE_P (htype))
37456 wtype = TREE_TYPE (wtype);
37457 htype = TREE_TYPE (htype);
37460 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37461 return va_list_type_node;
37462 wtype = sysv_va_list_type_node;
37463 gcc_assert (wtype != NULL_TREE);
37465 if (TREE_CODE (wtype) == ARRAY_TYPE)
37467 /* If va_list is an array type, the argument may have decayed
37468 to a pointer type, e.g. by being passed to another function.
37469 In that case, unwrap both types so that we can compare the
37470 underlying records. */
37471 if (TREE_CODE (htype) == ARRAY_TYPE
37472 || POINTER_TYPE_P (htype))
37474 wtype = TREE_TYPE (wtype);
37475 htype = TREE_TYPE (htype);
37478 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37479 return sysv_va_list_type_node;
37480 wtype = ms_va_list_type_node;
37481 gcc_assert (wtype != NULL_TREE);
37483 if (TREE_CODE (wtype) == ARRAY_TYPE)
37485 /* If va_list is an array type, the argument may have decayed
37486 to a pointer type, e.g. by being passed to another function.
37487 In that case, unwrap both types so that we can compare the
37488 underlying records. */
37489 if (TREE_CODE (htype) == ARRAY_TYPE
37490 || POINTER_TYPE_P (htype))
37492 wtype = TREE_TYPE (wtype);
37493 htype = TREE_TYPE (htype);
37496 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37497 return ms_va_list_type_node;
37500 return std_canonical_va_list_type (type);
37503 /* Iterate through the target-specific builtin types for va_list.
37504 IDX denotes the iterator, *PTREE is set to the result type of
37505 the va_list builtin, and *PNAME to its internal type.
37506 Returns zero if there is no element for this index, otherwise
37507 IDX should be increased upon the next call.
37508 Note, do not iterate a base builtin's name like __builtin_va_list.
37509 Used from c_common_nodes_and_builtins. */
37512 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
37522 *ptree = ms_va_list_type_node;
37523 *pname = "__builtin_ms_va_list";
37527 *ptree = sysv_va_list_type_node;
37528 *pname = "__builtin_sysv_va_list";
37536 #undef TARGET_SCHED_DISPATCH
37537 #define TARGET_SCHED_DISPATCH has_dispatch
37538 #undef TARGET_SCHED_DISPATCH_DO
37539 #define TARGET_SCHED_DISPATCH_DO do_dispatch
37540 #undef TARGET_SCHED_REASSOCIATION_WIDTH
37541 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
37543 /* The size of the dispatch window is the total number of bytes of
37544 object code allowed in a window. */
37545 #define DISPATCH_WINDOW_SIZE 16
37547 /* Number of dispatch windows considered for scheduling. */
37548 #define MAX_DISPATCH_WINDOWS 3
37550 /* Maximum number of instructions in a window. */
37553 /* Maximum number of immediate operands in a window. */
37556 /* Maximum number of immediate bits allowed in a window. */
37557 #define MAX_IMM_SIZE 128
37559 /* Maximum number of 32 bit immediates allowed in a window. */
37560 #define MAX_IMM_32 4
37562 /* Maximum number of 64 bit immediates allowed in a window. */
37563 #define MAX_IMM_64 2
37565 /* Maximum total of loads or prefetches allowed in a window. */
37568 /* Maximum total of stores allowed in a window. */
37569 #define MAX_STORE 1
37575 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
37576 enum dispatch_group {
37591 /* Number of allowable groups in a dispatch window. It is an array
37592 indexed by dispatch_group enum. 100 is used as a big number,
37593 because the number of these kind of operations does not have any
37594 effect in dispatch window, but we need them for other reasons in
37596 static unsigned int num_allowable_groups[disp_last] = {
37597 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
37600 char group_name[disp_last + 1][16] = {
37601 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
37602 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
37603 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
37606 /* Instruction path. */
37609 path_single, /* Single micro op. */
37610 path_double, /* Double micro op. */
37611 path_multi, /* Instructions with more than 2 micro op.. */
37615 /* sched_insn_info defines a window to the instructions scheduled in
37616 the basic block. It contains a pointer to the insn_info table and
37617 the instruction scheduled.
37619 Windows are allocated for each basic block and are linked
37621 typedef struct sched_insn_info_s {
37623 enum dispatch_group group;
37624 enum insn_path path;
37629 /* Linked list of dispatch windows. This is a two way list of
37630 dispatch windows of a basic block. It contains information about
37631 the number of uops in the window and the total number of
37632 instructions and of bytes in the object code for this dispatch
37634 typedef struct dispatch_windows_s {
37635 int num_insn; /* Number of insn in the window. */
37636 int num_uops; /* Number of uops in the window. */
37637 int window_size; /* Number of bytes in the window. */
37638 int window_num; /* Window number between 0 or 1. */
37639 int num_imm; /* Number of immediates in an insn. */
37640 int num_imm_32; /* Number of 32 bit immediates in an insn. */
37641 int num_imm_64; /* Number of 64 bit immediates in an insn. */
37642 int imm_size; /* Total immediates in the window. */
37643 int num_loads; /* Total memory loads in the window. */
37644 int num_stores; /* Total memory stores in the window. */
37645 int violation; /* Violation exists in window. */
37646 sched_insn_info *window; /* Pointer to the window. */
37647 struct dispatch_windows_s *next;
37648 struct dispatch_windows_s *prev;
37649 } dispatch_windows;
37651 /* Immediate valuse used in an insn. */
37652 typedef struct imm_info_s
37659 static dispatch_windows *dispatch_window_list;
37660 static dispatch_windows *dispatch_window_list1;
37662 /* Get dispatch group of insn. */
37664 static enum dispatch_group
37665 get_mem_group (rtx insn)
37667 enum attr_memory memory;
37669 if (INSN_CODE (insn) < 0)
37670 return disp_no_group;
37671 memory = get_attr_memory (insn);
37672 if (memory == MEMORY_STORE)
37675 if (memory == MEMORY_LOAD)
37678 if (memory == MEMORY_BOTH)
37679 return disp_load_store;
37681 return disp_no_group;
37684 /* Return true if insn is a compare instruction. */
37689 enum attr_type type;
37691 type = get_attr_type (insn);
37692 return (type == TYPE_TEST
37693 || type == TYPE_ICMP
37694 || type == TYPE_FCMP
37695 || GET_CODE (PATTERN (insn)) == COMPARE);
37698 /* Return true if a dispatch violation encountered. */
37701 dispatch_violation (void)
37703 if (dispatch_window_list->next)
37704 return dispatch_window_list->next->violation;
37705 return dispatch_window_list->violation;
37708 /* Return true if insn is a branch instruction. */
37711 is_branch (rtx insn)
37713 return (CALL_P (insn) || JUMP_P (insn));
37716 /* Return true if insn is a prefetch instruction. */
37719 is_prefetch (rtx insn)
37721 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
37724 /* This function initializes a dispatch window and the list container holding a
37725 pointer to the window. */
37728 init_window (int window_num)
37731 dispatch_windows *new_list;
37733 if (window_num == 0)
37734 new_list = dispatch_window_list;
37736 new_list = dispatch_window_list1;
37738 new_list->num_insn = 0;
37739 new_list->num_uops = 0;
37740 new_list->window_size = 0;
37741 new_list->next = NULL;
37742 new_list->prev = NULL;
37743 new_list->window_num = window_num;
37744 new_list->num_imm = 0;
37745 new_list->num_imm_32 = 0;
37746 new_list->num_imm_64 = 0;
37747 new_list->imm_size = 0;
37748 new_list->num_loads = 0;
37749 new_list->num_stores = 0;
37750 new_list->violation = false;
37752 for (i = 0; i < MAX_INSN; i++)
37754 new_list->window[i].insn = NULL;
37755 new_list->window[i].group = disp_no_group;
37756 new_list->window[i].path = no_path;
37757 new_list->window[i].byte_len = 0;
37758 new_list->window[i].imm_bytes = 0;
37763 /* This function allocates and initializes a dispatch window and the
37764 list container holding a pointer to the window. */
37766 static dispatch_windows *
37767 allocate_window (void)
37769 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
37770 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
37775 /* This routine initializes the dispatch scheduling information. It
37776 initiates building dispatch scheduler tables and constructs the
37777 first dispatch window. */
37780 init_dispatch_sched (void)
37782 /* Allocate a dispatch list and a window. */
37783 dispatch_window_list = allocate_window ();
37784 dispatch_window_list1 = allocate_window ();
37789 /* This function returns true if a branch is detected. End of a basic block
37790 does not have to be a branch, but here we assume only branches end a
37794 is_end_basic_block (enum dispatch_group group)
37796 return group == disp_branch;
37799 /* This function is called when the end of a window processing is reached. */
37802 process_end_window (void)
37804 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
37805 if (dispatch_window_list->next)
37807 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
37808 gcc_assert (dispatch_window_list->window_size
37809 + dispatch_window_list1->window_size <= 48);
37815 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
37816 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
37817 for 48 bytes of instructions. Note that these windows are not dispatch
37818 windows that their sizes are DISPATCH_WINDOW_SIZE. */
37820 static dispatch_windows *
37821 allocate_next_window (int window_num)
37823 if (window_num == 0)
37825 if (dispatch_window_list->next)
37828 return dispatch_window_list;
37831 dispatch_window_list->next = dispatch_window_list1;
37832 dispatch_window_list1->prev = dispatch_window_list;
37834 return dispatch_window_list1;
37837 /* Increment the number of immediate operands of an instruction. */
37840 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
37845 switch ( GET_CODE (*in_rtx))
37850 (imm_values->imm)++;
37851 if (x86_64_immediate_operand (*in_rtx, SImode))
37852 (imm_values->imm32)++;
37854 (imm_values->imm64)++;
37858 (imm_values->imm)++;
37859 (imm_values->imm64)++;
37863 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
37865 (imm_values->imm)++;
37866 (imm_values->imm32)++;
37877 /* Compute number of immediate operands of an instruction. */
37880 find_constant (rtx in_rtx, imm_info *imm_values)
37882 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
37883 (rtx_function) find_constant_1, (void *) imm_values);
37886 /* Return total size of immediate operands of an instruction along with number
37887 of corresponding immediate-operands. It initializes its parameters to zero
37888 befor calling FIND_CONSTANT.
37889 INSN is the input instruction. IMM is the total of immediates.
37890 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
37894 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
37896 imm_info imm_values = {0, 0, 0};
37898 find_constant (insn, &imm_values);
37899 *imm = imm_values.imm;
37900 *imm32 = imm_values.imm32;
37901 *imm64 = imm_values.imm64;
37902 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
37905 /* This function indicates if an operand of an instruction is an
37909 has_immediate (rtx insn)
37911 int num_imm_operand;
37912 int num_imm32_operand;
37913 int num_imm64_operand;
37916 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
37917 &num_imm64_operand);
37921 /* Return single or double path for instructions. */
37923 static enum insn_path
37924 get_insn_path (rtx insn)
37926 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
37928 if ((int)path == 0)
37929 return path_single;
37931 if ((int)path == 1)
37932 return path_double;
37937 /* Return insn dispatch group. */
37939 static enum dispatch_group
37940 get_insn_group (rtx insn)
37942 enum dispatch_group group = get_mem_group (insn);
37946 if (is_branch (insn))
37947 return disp_branch;
37952 if (has_immediate (insn))
37955 if (is_prefetch (insn))
37956 return disp_prefetch;
37958 return disp_no_group;
37961 /* Count number of GROUP restricted instructions in a dispatch
37962 window WINDOW_LIST. */
37965 count_num_restricted (rtx insn, dispatch_windows *window_list)
37967 enum dispatch_group group = get_insn_group (insn);
37969 int num_imm_operand;
37970 int num_imm32_operand;
37971 int num_imm64_operand;
37973 if (group == disp_no_group)
37976 if (group == disp_imm)
37978 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
37979 &num_imm64_operand);
37980 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
37981 || num_imm_operand + window_list->num_imm > MAX_IMM
37982 || (num_imm32_operand > 0
37983 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
37984 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
37985 || (num_imm64_operand > 0
37986 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
37987 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
37988 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
37989 && num_imm64_operand > 0
37990 && ((window_list->num_imm_64 > 0
37991 && window_list->num_insn >= 2)
37992 || window_list->num_insn >= 3)))
37998 if ((group == disp_load_store
37999 && (window_list->num_loads >= MAX_LOAD
38000 || window_list->num_stores >= MAX_STORE))
38001 || ((group == disp_load
38002 || group == disp_prefetch)
38003 && window_list->num_loads >= MAX_LOAD)
38004 || (group == disp_store
38005 && window_list->num_stores >= MAX_STORE))
38011 /* This function returns true if insn satisfies dispatch rules on the
38012 last window scheduled. */
38015 fits_dispatch_window (rtx insn)
38017 dispatch_windows *window_list = dispatch_window_list;
38018 dispatch_windows *window_list_next = dispatch_window_list->next;
38019 unsigned int num_restrict;
38020 enum dispatch_group group = get_insn_group (insn);
38021 enum insn_path path = get_insn_path (insn);
38024 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
38025 instructions should be given the lowest priority in the
38026 scheduling process in Haifa scheduler to make sure they will be
38027 scheduled in the same dispatch window as the refrence to them. */
38028 if (group == disp_jcc || group == disp_cmp)
38031 /* Check nonrestricted. */
38032 if (group == disp_no_group || group == disp_branch)
38035 /* Get last dispatch window. */
38036 if (window_list_next)
38037 window_list = window_list_next;
38039 if (window_list->window_num == 1)
38041 sum = window_list->prev->window_size + window_list->window_size;
38044 || (min_insn_size (insn) + sum) >= 48)
38045 /* Window 1 is full. Go for next window. */
38049 num_restrict = count_num_restricted (insn, window_list);
38051 if (num_restrict > num_allowable_groups[group])
38054 /* See if it fits in the first window. */
38055 if (window_list->window_num == 0)
38057 /* The first widow should have only single and double path
38059 if (path == path_double
38060 && (window_list->num_uops + 2) > MAX_INSN)
38062 else if (path != path_single)
38068 /* Add an instruction INSN with NUM_UOPS micro-operations to the
38069 dispatch window WINDOW_LIST. */
38072 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
38074 int byte_len = min_insn_size (insn);
38075 int num_insn = window_list->num_insn;
38077 sched_insn_info *window = window_list->window;
38078 enum dispatch_group group = get_insn_group (insn);
38079 enum insn_path path = get_insn_path (insn);
38080 int num_imm_operand;
38081 int num_imm32_operand;
38082 int num_imm64_operand;
38084 if (!window_list->violation && group != disp_cmp
38085 && !fits_dispatch_window (insn))
38086 window_list->violation = true;
38088 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38089 &num_imm64_operand);
38091 /* Initialize window with new instruction. */
38092 window[num_insn].insn = insn;
38093 window[num_insn].byte_len = byte_len;
38094 window[num_insn].group = group;
38095 window[num_insn].path = path;
38096 window[num_insn].imm_bytes = imm_size;
38098 window_list->window_size += byte_len;
38099 window_list->num_insn = num_insn + 1;
38100 window_list->num_uops = window_list->num_uops + num_uops;
38101 window_list->imm_size += imm_size;
38102 window_list->num_imm += num_imm_operand;
38103 window_list->num_imm_32 += num_imm32_operand;
38104 window_list->num_imm_64 += num_imm64_operand;
38106 if (group == disp_store)
38107 window_list->num_stores += 1;
38108 else if (group == disp_load
38109 || group == disp_prefetch)
38110 window_list->num_loads += 1;
38111 else if (group == disp_load_store)
38113 window_list->num_stores += 1;
38114 window_list->num_loads += 1;
38118 /* Adds a scheduled instruction, INSN, to the current dispatch window.
38119 If the total bytes of instructions or the number of instructions in
38120 the window exceed allowable, it allocates a new window. */
38123 add_to_dispatch_window (rtx insn)
38126 dispatch_windows *window_list;
38127 dispatch_windows *next_list;
38128 dispatch_windows *window0_list;
38129 enum insn_path path;
38130 enum dispatch_group insn_group;
38138 if (INSN_CODE (insn) < 0)
38141 byte_len = min_insn_size (insn);
38142 window_list = dispatch_window_list;
38143 next_list = window_list->next;
38144 path = get_insn_path (insn);
38145 insn_group = get_insn_group (insn);
38147 /* Get the last dispatch window. */
38149 window_list = dispatch_window_list->next;
38151 if (path == path_single)
38153 else if (path == path_double)
38156 insn_num_uops = (int) path;
38158 /* If current window is full, get a new window.
38159 Window number zero is full, if MAX_INSN uops are scheduled in it.
38160 Window number one is full, if window zero's bytes plus window
38161 one's bytes is 32, or if the bytes of the new instruction added
38162 to the total makes it greater than 48, or it has already MAX_INSN
38163 instructions in it. */
38164 num_insn = window_list->num_insn;
38165 num_uops = window_list->num_uops;
38166 window_num = window_list->window_num;
38167 insn_fits = fits_dispatch_window (insn);
38169 if (num_insn >= MAX_INSN
38170 || num_uops + insn_num_uops > MAX_INSN
38173 window_num = ~window_num & 1;
38174 window_list = allocate_next_window (window_num);
38177 if (window_num == 0)
38179 add_insn_window (insn, window_list, insn_num_uops);
38180 if (window_list->num_insn >= MAX_INSN
38181 && insn_group == disp_branch)
38183 process_end_window ();
38187 else if (window_num == 1)
38189 window0_list = window_list->prev;
38190 sum = window0_list->window_size + window_list->window_size;
38192 || (byte_len + sum) >= 48)
38194 process_end_window ();
38195 window_list = dispatch_window_list;
38198 add_insn_window (insn, window_list, insn_num_uops);
38201 gcc_unreachable ();
38203 if (is_end_basic_block (insn_group))
38205 /* End of basic block is reached do end-basic-block process. */
38206 process_end_window ();
38211 /* Print the dispatch window, WINDOW_NUM, to FILE. */
38213 DEBUG_FUNCTION static void
38214 debug_dispatch_window_file (FILE *file, int window_num)
38216 dispatch_windows *list;
38219 if (window_num == 0)
38220 list = dispatch_window_list;
38222 list = dispatch_window_list1;
38224 fprintf (file, "Window #%d:\n", list->window_num);
38225 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
38226 list->num_insn, list->num_uops, list->window_size);
38227 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38228 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
38230 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
38232 fprintf (file, " insn info:\n");
38234 for (i = 0; i < MAX_INSN; i++)
38236 if (!list->window[i].insn)
38238 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
38239 i, group_name[list->window[i].group],
38240 i, (void *)list->window[i].insn,
38241 i, list->window[i].path,
38242 i, list->window[i].byte_len,
38243 i, list->window[i].imm_bytes);
38247 /* Print to stdout a dispatch window. */
38249 DEBUG_FUNCTION void
38250 debug_dispatch_window (int window_num)
38252 debug_dispatch_window_file (stdout, window_num);
38255 /* Print INSN dispatch information to FILE. */
38257 DEBUG_FUNCTION static void
38258 debug_insn_dispatch_info_file (FILE *file, rtx insn)
38261 enum insn_path path;
38262 enum dispatch_group group;
38264 int num_imm_operand;
38265 int num_imm32_operand;
38266 int num_imm64_operand;
38268 if (INSN_CODE (insn) < 0)
38271 byte_len = min_insn_size (insn);
38272 path = get_insn_path (insn);
38273 group = get_insn_group (insn);
38274 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38275 &num_imm64_operand);
38277 fprintf (file, " insn info:\n");
38278 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
38279 group_name[group], path, byte_len);
38280 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38281 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
38284 /* Print to STDERR the status of the ready list with respect to
38285 dispatch windows. */
38287 DEBUG_FUNCTION void
38288 debug_ready_dispatch (void)
38291 int no_ready = number_in_ready ();
38293 fprintf (stdout, "Number of ready: %d\n", no_ready);
38295 for (i = 0; i < no_ready; i++)
38296 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
38299 /* This routine is the driver of the dispatch scheduler. */
38302 do_dispatch (rtx insn, int mode)
38304 if (mode == DISPATCH_INIT)
38305 init_dispatch_sched ();
38306 else if (mode == ADD_TO_DISPATCH_WINDOW)
38307 add_to_dispatch_window (insn);
38310 /* Return TRUE if Dispatch Scheduling is supported. */
38313 has_dispatch (rtx insn, int action)
38315 if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
38316 && flag_dispatch_scheduler)
38322 case IS_DISPATCH_ON:
38327 return is_cmp (insn);
38329 case DISPATCH_VIOLATION:
38330 return dispatch_violation ();
38332 case FITS_DISPATCH_WINDOW:
38333 return fits_dispatch_window (insn);
38339 /* Implementation of reassociation_width target hook used by
38340 reassoc phase to identify parallelism level in reassociated
38341 tree. Statements tree_code is passed in OPC. Arguments type
38344 Currently parallel reassociation is enabled for Atom
38345 processors only and we set reassociation width to be 2
38346 because Atom may issue up to 2 instructions per cycle.
38348 Return value should be fixed if parallel reassociation is
38349 enabled for other processors. */
38352 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
38353 enum machine_mode mode)
38357 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
38359 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
38365 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
38366 place emms and femms instructions. */
38368 static enum machine_mode
38369 ix86_preferred_simd_mode (enum machine_mode mode)
38377 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
38379 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
38381 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
38383 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
38386 if (TARGET_AVX && !TARGET_PREFER_AVX128)
38392 if (!TARGET_VECTORIZE_DOUBLE)
38394 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
38396 else if (TARGET_SSE2)
38405 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
38408 static unsigned int
38409 ix86_autovectorize_vector_sizes (void)
38411 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
38414 /* Initialize the GCC target structure. */
38415 #undef TARGET_RETURN_IN_MEMORY
38416 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
38418 #undef TARGET_LEGITIMIZE_ADDRESS
38419 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
38421 #undef TARGET_ATTRIBUTE_TABLE
38422 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
38423 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38424 # undef TARGET_MERGE_DECL_ATTRIBUTES
38425 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
38428 #undef TARGET_COMP_TYPE_ATTRIBUTES
38429 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
38431 #undef TARGET_INIT_BUILTINS
38432 #define TARGET_INIT_BUILTINS ix86_init_builtins
38433 #undef TARGET_BUILTIN_DECL
38434 #define TARGET_BUILTIN_DECL ix86_builtin_decl
38435 #undef TARGET_EXPAND_BUILTIN
38436 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
38438 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
38439 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
38440 ix86_builtin_vectorized_function
38442 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
38443 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
38445 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
38446 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
38448 #undef TARGET_VECTORIZE_BUILTIN_GATHER
38449 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
38451 #undef TARGET_BUILTIN_RECIPROCAL
38452 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
38454 #undef TARGET_ASM_FUNCTION_EPILOGUE
38455 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
38457 #undef TARGET_ENCODE_SECTION_INFO
38458 #ifndef SUBTARGET_ENCODE_SECTION_INFO
38459 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
38461 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
38464 #undef TARGET_ASM_OPEN_PAREN
38465 #define TARGET_ASM_OPEN_PAREN ""
38466 #undef TARGET_ASM_CLOSE_PAREN
38467 #define TARGET_ASM_CLOSE_PAREN ""
38469 #undef TARGET_ASM_BYTE_OP
38470 #define TARGET_ASM_BYTE_OP ASM_BYTE
38472 #undef TARGET_ASM_ALIGNED_HI_OP
38473 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
38474 #undef TARGET_ASM_ALIGNED_SI_OP
38475 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
38477 #undef TARGET_ASM_ALIGNED_DI_OP
38478 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
38481 #undef TARGET_PROFILE_BEFORE_PROLOGUE
38482 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
38484 #undef TARGET_ASM_UNALIGNED_HI_OP
38485 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
38486 #undef TARGET_ASM_UNALIGNED_SI_OP
38487 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
38488 #undef TARGET_ASM_UNALIGNED_DI_OP
38489 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
38491 #undef TARGET_PRINT_OPERAND
38492 #define TARGET_PRINT_OPERAND ix86_print_operand
38493 #undef TARGET_PRINT_OPERAND_ADDRESS
38494 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
38495 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
38496 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
38497 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
38498 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
38500 #undef TARGET_SCHED_INIT_GLOBAL
38501 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
38502 #undef TARGET_SCHED_ADJUST_COST
38503 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
38504 #undef TARGET_SCHED_ISSUE_RATE
38505 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
38506 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
38507 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
38508 ia32_multipass_dfa_lookahead
38510 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
38511 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
38514 #undef TARGET_HAVE_TLS
38515 #define TARGET_HAVE_TLS true
38517 #undef TARGET_CANNOT_FORCE_CONST_MEM
38518 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
38519 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
38520 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
38522 #undef TARGET_DELEGITIMIZE_ADDRESS
38523 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
38525 #undef TARGET_MS_BITFIELD_LAYOUT_P
38526 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
38529 #undef TARGET_BINDS_LOCAL_P
38530 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
38532 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38533 #undef TARGET_BINDS_LOCAL_P
38534 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
38537 #undef TARGET_ASM_OUTPUT_MI_THUNK
38538 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
38539 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
38540 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
38542 #undef TARGET_ASM_FILE_START
38543 #define TARGET_ASM_FILE_START x86_file_start
38545 #undef TARGET_OPTION_OVERRIDE
38546 #define TARGET_OPTION_OVERRIDE ix86_option_override
38548 #undef TARGET_REGISTER_MOVE_COST
38549 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
38550 #undef TARGET_MEMORY_MOVE_COST
38551 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
38552 #undef TARGET_RTX_COSTS
38553 #define TARGET_RTX_COSTS ix86_rtx_costs
38554 #undef TARGET_ADDRESS_COST
38555 #define TARGET_ADDRESS_COST ix86_address_cost
38557 #undef TARGET_FIXED_CONDITION_CODE_REGS
38558 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
38559 #undef TARGET_CC_MODES_COMPATIBLE
38560 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
38562 #undef TARGET_MACHINE_DEPENDENT_REORG
38563 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
38565 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
38566 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
38568 #undef TARGET_BUILD_BUILTIN_VA_LIST
38569 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
38571 #undef TARGET_ENUM_VA_LIST_P
38572 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
38574 #undef TARGET_FN_ABI_VA_LIST
38575 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
38577 #undef TARGET_CANONICAL_VA_LIST_TYPE
38578 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
38580 #undef TARGET_EXPAND_BUILTIN_VA_START
38581 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
38583 #undef TARGET_MD_ASM_CLOBBERS
38584 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
38586 #undef TARGET_PROMOTE_PROTOTYPES
38587 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
38588 #undef TARGET_STRUCT_VALUE_RTX
38589 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
38590 #undef TARGET_SETUP_INCOMING_VARARGS
38591 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
38592 #undef TARGET_MUST_PASS_IN_STACK
38593 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
38594 #undef TARGET_FUNCTION_ARG_ADVANCE
38595 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
38596 #undef TARGET_FUNCTION_ARG
38597 #define TARGET_FUNCTION_ARG ix86_function_arg
38598 #undef TARGET_FUNCTION_ARG_BOUNDARY
38599 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
38600 #undef TARGET_PASS_BY_REFERENCE
38601 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
38602 #undef TARGET_INTERNAL_ARG_POINTER
38603 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
38604 #undef TARGET_UPDATE_STACK_BOUNDARY
38605 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
38606 #undef TARGET_GET_DRAP_RTX
38607 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
38608 #undef TARGET_STRICT_ARGUMENT_NAMING
38609 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
38610 #undef TARGET_STATIC_CHAIN
38611 #define TARGET_STATIC_CHAIN ix86_static_chain
38612 #undef TARGET_TRAMPOLINE_INIT
38613 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
38614 #undef TARGET_RETURN_POPS_ARGS
38615 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
38617 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
38618 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
38620 #undef TARGET_SCALAR_MODE_SUPPORTED_P
38621 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
38623 #undef TARGET_VECTOR_MODE_SUPPORTED_P
38624 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
38626 #undef TARGET_C_MODE_FOR_SUFFIX
38627 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
38630 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
38631 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
38634 #ifdef SUBTARGET_INSERT_ATTRIBUTES
38635 #undef TARGET_INSERT_ATTRIBUTES
38636 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
38639 #undef TARGET_MANGLE_TYPE
38640 #define TARGET_MANGLE_TYPE ix86_mangle_type
38643 #undef TARGET_STACK_PROTECT_FAIL
38644 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
38647 #undef TARGET_FUNCTION_VALUE
38648 #define TARGET_FUNCTION_VALUE ix86_function_value
38650 #undef TARGET_FUNCTION_VALUE_REGNO_P
38651 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
38653 #undef TARGET_PROMOTE_FUNCTION_MODE
38654 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
38656 #undef TARGET_SECONDARY_RELOAD
38657 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
38659 #undef TARGET_CLASS_MAX_NREGS
38660 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
38662 #undef TARGET_PREFERRED_RELOAD_CLASS
38663 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
38664 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
38665 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
38666 #undef TARGET_CLASS_LIKELY_SPILLED_P
38667 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
38669 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
38670 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
38671 ix86_builtin_vectorization_cost
38672 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
38673 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
38674 ix86_vectorize_vec_perm_const_ok
38675 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
38676 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
38677 ix86_preferred_simd_mode
38678 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
38679 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
38680 ix86_autovectorize_vector_sizes
38682 #undef TARGET_SET_CURRENT_FUNCTION
38683 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
38685 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
38686 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
38688 #undef TARGET_OPTION_SAVE
38689 #define TARGET_OPTION_SAVE ix86_function_specific_save
38691 #undef TARGET_OPTION_RESTORE
38692 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
38694 #undef TARGET_OPTION_PRINT
38695 #define TARGET_OPTION_PRINT ix86_function_specific_print
38697 #undef TARGET_CAN_INLINE_P
38698 #define TARGET_CAN_INLINE_P ix86_can_inline_p
38700 #undef TARGET_EXPAND_TO_RTL_HOOK
38701 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
38703 #undef TARGET_LEGITIMATE_ADDRESS_P
38704 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
38706 #undef TARGET_LEGITIMATE_CONSTANT_P
38707 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
38709 #undef TARGET_FRAME_POINTER_REQUIRED
38710 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
38712 #undef TARGET_CAN_ELIMINATE
38713 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
38715 #undef TARGET_EXTRA_LIVE_ON_ENTRY
38716 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
38718 #undef TARGET_ASM_CODE_END
38719 #define TARGET_ASM_CODE_END ix86_code_end
38721 #undef TARGET_CONDITIONAL_REGISTER_USAGE
38722 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
38725 #undef TARGET_INIT_LIBFUNCS
38726 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
38729 struct gcc_target targetm = TARGET_INITIALIZER;
38731 #include "gt-i386.h"