1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 #include "sched-int.h"
62 #include "diagnostic.h"
64 enum upper_128bits_state
71 typedef struct block_info_def
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
78 /* TRUE if block has been processed. */
80 /* TRUE if block has been scanned. */
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev;
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88 enum call_avx256_state
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256 = -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256,
94 /* Callee passes 256bit AVX register. */
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
99 /* vzeroupper intrinsic. */
103 /* Check if a 256bit AVX register is referenced in stores. */
106 check_avx256_stores (rtx dest, const_rtx set, void *data)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
110 || (GET_CODE (set) == SET
111 && REG_P (SET_SRC (set))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
114 enum upper_128bits_state *state
115 = (enum upper_128bits_state *) data;
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
124 STATE is state of the upper 128bits of AVX registers at entry. */
127 move_or_delete_vzeroupper_2 (basic_block bb,
128 enum upper_128bits_state state)
131 rtx vzeroupper_insn = NULL_RTX;
136 if (BLOCK_INFO (bb)->unchanged)
139 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
142 BLOCK_INFO (bb)->state = state;
146 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
149 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
150 bb->index, BLOCK_INFO (bb)->state);
154 BLOCK_INFO (bb)->prev = state;
157 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
162 /* BB_END changes when it is deleted. */
163 bb_end = BB_END (bb);
165 while (insn != bb_end)
167 insn = NEXT_INSN (insn);
169 if (!NONDEBUG_INSN_P (insn))
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn) || CALL_P (insn))
175 if (!vzeroupper_insn)
178 if (PREV_INSN (insn) != vzeroupper_insn)
182 fprintf (dump_file, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file, PREV_INSN (insn));
184 fprintf (dump_file, "before:\n");
185 print_rtl_single (dump_file, insn);
187 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
190 vzeroupper_insn = NULL_RTX;
194 pat = PATTERN (insn);
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat) == UNSPEC_VOLATILE
198 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file, "Found vzeroupper:\n");
204 print_rtl_single (dump_file, insn);
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat) == PARALLEL
211 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
217 /* Delete pending vzeroupper insertion. */
220 delete_insn (vzeroupper_insn);
221 vzeroupper_insn = NULL_RTX;
224 else if (state != used)
226 note_stores (pat, check_avx256_stores, &state);
233 /* Process vzeroupper intrinsic. */
234 avx256 = INTVAL (XVECEXP (pat, 0, 0));
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256 == callee_return_avx256)
247 /* Remove unnecessary vzeroupper since upper 128bits are
251 fprintf (dump_file, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file, insn);
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
260 if (avx256 != callee_return_pass_avx256)
263 if (avx256 == callee_return_pass_avx256
264 || avx256 == callee_pass_avx256)
266 /* Must remove vzeroupper since callee passes in 256bit
270 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file, insn);
277 vzeroupper_insn = insn;
283 BLOCK_INFO (bb)->state = state;
284 BLOCK_INFO (bb)->unchanged = unchanged;
285 BLOCK_INFO (bb)->scanned = true;
288 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb->index, unchanged ? "unchanged" : "changed",
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
299 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
303 enum upper_128bits_state state, old_state, new_state;
307 fprintf (dump_file, " Process [bb %i]: status: %d\n",
308 block->index, BLOCK_INFO (block)->processed);
310 if (BLOCK_INFO (block)->processed)
315 /* Check all predecessor edges of this block. */
316 seen_unknown = false;
317 FOR_EACH_EDGE (e, ei, block->preds)
321 switch (BLOCK_INFO (e->src)->state)
324 if (!unknown_is_unused)
338 old_state = BLOCK_INFO (block)->state;
339 move_or_delete_vzeroupper_2 (block, state);
340 new_state = BLOCK_INFO (block)->state;
342 if (state != unknown || new_state == used)
343 BLOCK_INFO (block)->processed = true;
345 /* Need to rescan if the upper 128bits of AVX registers are changed
347 if (new_state != old_state)
349 if (new_state == used)
350 cfun->machine->rescan_vzeroupper_p = 1;
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
362 move_or_delete_vzeroupper (void)
367 fibheap_t worklist, pending, fibheap_swap;
368 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def));
376 /* Process outgoing edges of entry point. */
378 fprintf (dump_file, "Process outgoing edges of entry point\n");
380 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
382 move_or_delete_vzeroupper_2 (e->dest,
383 cfun->machine->caller_pass_avx256_p
385 BLOCK_INFO (e->dest)->processed = true;
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
391 bb_order = XNEWVEC (int, last_basic_block);
392 pre_and_rev_post_order_compute (NULL, rc_order, false);
393 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
394 bb_order[rc_order[i]] = i;
397 worklist = fibheap_new ();
398 pending = fibheap_new ();
399 visited = sbitmap_alloc (last_basic_block);
400 in_worklist = sbitmap_alloc (last_basic_block);
401 in_pending = sbitmap_alloc (last_basic_block);
402 sbitmap_zero (in_worklist);
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending);
407 if (BLOCK_INFO (bb)->processed)
408 RESET_BIT (in_pending, bb->index);
411 move_or_delete_vzeroupper_1 (bb, false);
412 fibheap_insert (pending, bb_order[bb->index], bb);
416 fprintf (dump_file, "Check remaining basic blocks\n");
418 while (!fibheap_empty (pending))
420 fibheap_swap = pending;
422 worklist = fibheap_swap;
423 sbitmap_swap = in_pending;
424 in_pending = in_worklist;
425 in_worklist = sbitmap_swap;
427 sbitmap_zero (visited);
429 cfun->machine->rescan_vzeroupper_p = 0;
431 while (!fibheap_empty (worklist))
433 bb = (basic_block) fibheap_extract_min (worklist);
434 RESET_BIT (in_worklist, bb->index);
435 gcc_assert (!TEST_BIT (visited, bb->index));
436 if (!TEST_BIT (visited, bb->index))
440 SET_BIT (visited, bb->index);
442 if (move_or_delete_vzeroupper_1 (bb, false))
443 FOR_EACH_EDGE (e, ei, bb->succs)
445 if (e->dest == EXIT_BLOCK_PTR
446 || BLOCK_INFO (e->dest)->processed)
449 if (TEST_BIT (visited, e->dest->index))
451 if (!TEST_BIT (in_pending, e->dest->index))
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending, e->dest->index);
455 fibheap_insert (pending,
456 bb_order[e->dest->index],
460 else if (!TEST_BIT (in_worklist, e->dest->index))
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist, e->dest->index);
464 fibheap_insert (worklist, bb_order[e->dest->index],
471 if (!cfun->machine->rescan_vzeroupper_p)
476 fibheap_delete (worklist);
477 fibheap_delete (pending);
478 sbitmap_free (visited);
479 sbitmap_free (in_worklist);
480 sbitmap_free (in_pending);
483 fprintf (dump_file, "Process remaining basic blocks\n");
486 move_or_delete_vzeroupper_1 (bb, true);
488 free_aux_for_blocks ();
491 static rtx legitimize_dllimport_symbol (rtx, bool);
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
512 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
566 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
567 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}},
568 {{{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
569 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
570 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
571 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}},
572 1, /* scalar_stmt_cost. */
573 1, /* scalar load_cost. */
574 1, /* scalar_store_cost. */
575 1, /* vec_stmt_cost. */
576 1, /* vec_to_scalar_cost. */
577 1, /* scalar_to_vec_cost. */
578 1, /* vec_align_load_cost. */
579 1, /* vec_unalign_load_cost. */
580 1, /* vec_store_cost. */
581 1, /* cond_taken_branch_cost. */
582 1, /* cond_not_taken_branch_cost. */
585 /* Processor costs (relative to an add) */
587 struct processor_costs i386_cost = { /* 386 specific costs */
588 COSTS_N_INSNS (1), /* cost of an add instruction */
589 COSTS_N_INSNS (1), /* cost of a lea instruction */
590 COSTS_N_INSNS (3), /* variable shift costs */
591 COSTS_N_INSNS (2), /* constant shift costs */
592 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
593 COSTS_N_INSNS (6), /* HI */
594 COSTS_N_INSNS (6), /* SI */
595 COSTS_N_INSNS (6), /* DI */
596 COSTS_N_INSNS (6)}, /* other */
597 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
598 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
599 COSTS_N_INSNS (23), /* HI */
600 COSTS_N_INSNS (23), /* SI */
601 COSTS_N_INSNS (23), /* DI */
602 COSTS_N_INSNS (23)}, /* other */
603 COSTS_N_INSNS (3), /* cost of movsx */
604 COSTS_N_INSNS (2), /* cost of movzx */
605 15, /* "large" insn */
607 4, /* cost for loading QImode using movzbl */
608 {2, 4, 2}, /* cost of loading integer registers
609 in QImode, HImode and SImode.
610 Relative to reg-reg move (2). */
611 {2, 4, 2}, /* cost of storing integer registers */
612 2, /* cost of reg,reg fld/fst */
613 {8, 8, 8}, /* cost of loading fp registers
614 in SFmode, DFmode and XFmode */
615 {8, 8, 8}, /* cost of storing fp registers
616 in SFmode, DFmode and XFmode */
617 2, /* cost of moving MMX register */
618 {4, 8}, /* cost of loading MMX registers
619 in SImode and DImode */
620 {4, 8}, /* cost of storing MMX registers
621 in SImode and DImode */
622 2, /* cost of moving SSE register */
623 {4, 8, 16}, /* cost of loading SSE registers
624 in SImode, DImode and TImode */
625 {4, 8, 16}, /* cost of storing SSE registers
626 in SImode, DImode and TImode */
627 3, /* MMX or SSE register to integer */
628 0, /* size of l1 cache */
629 0, /* size of l2 cache */
630 0, /* size of prefetch block */
631 0, /* number of parallel prefetches */
633 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
634 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
635 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
636 COSTS_N_INSNS (22), /* cost of FABS instruction. */
637 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
638 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
639 {{{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
640 DUMMY_STRINGOP_ALGS},
641 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
642 DUMMY_STRINGOP_ALGS}},
643 {{{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
644 DUMMY_STRINGOP_ALGS},
645 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
646 DUMMY_STRINGOP_ALGS}},
647 1, /* scalar_stmt_cost. */
648 1, /* scalar load_cost. */
649 1, /* scalar_store_cost. */
650 1, /* vec_stmt_cost. */
651 1, /* vec_to_scalar_cost. */
652 1, /* scalar_to_vec_cost. */
653 1, /* vec_align_load_cost. */
654 2, /* vec_unalign_load_cost. */
655 1, /* vec_store_cost. */
656 3, /* cond_taken_branch_cost. */
657 1, /* cond_not_taken_branch_cost. */
661 struct processor_costs i486_cost = { /* 486 specific costs */
662 COSTS_N_INSNS (1), /* cost of an add instruction */
663 COSTS_N_INSNS (1), /* cost of a lea instruction */
664 COSTS_N_INSNS (3), /* variable shift costs */
665 COSTS_N_INSNS (2), /* constant shift costs */
666 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
667 COSTS_N_INSNS (12), /* HI */
668 COSTS_N_INSNS (12), /* SI */
669 COSTS_N_INSNS (12), /* DI */
670 COSTS_N_INSNS (12)}, /* other */
671 1, /* cost of multiply per each bit set */
672 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
673 COSTS_N_INSNS (40), /* HI */
674 COSTS_N_INSNS (40), /* SI */
675 COSTS_N_INSNS (40), /* DI */
676 COSTS_N_INSNS (40)}, /* other */
677 COSTS_N_INSNS (3), /* cost of movsx */
678 COSTS_N_INSNS (2), /* cost of movzx */
679 15, /* "large" insn */
681 4, /* cost for loading QImode using movzbl */
682 {2, 4, 2}, /* cost of loading integer registers
683 in QImode, HImode and SImode.
684 Relative to reg-reg move (2). */
685 {2, 4, 2}, /* cost of storing integer registers */
686 2, /* cost of reg,reg fld/fst */
687 {8, 8, 8}, /* cost of loading fp registers
688 in SFmode, DFmode and XFmode */
689 {8, 8, 8}, /* cost of storing fp registers
690 in SFmode, DFmode and XFmode */
691 2, /* cost of moving MMX register */
692 {4, 8}, /* cost of loading MMX registers
693 in SImode and DImode */
694 {4, 8}, /* cost of storing MMX registers
695 in SImode and DImode */
696 2, /* cost of moving SSE register */
697 {4, 8, 16}, /* cost of loading SSE registers
698 in SImode, DImode and TImode */
699 {4, 8, 16}, /* cost of storing SSE registers
700 in SImode, DImode and TImode */
701 3, /* MMX or SSE register to integer */
702 4, /* size of l1 cache. 486 has 8kB cache
703 shared for code and data, so 4kB is
704 not really precise. */
705 4, /* size of l2 cache */
706 0, /* size of prefetch block */
707 0, /* number of parallel prefetches */
709 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
710 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
711 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
712 COSTS_N_INSNS (3), /* cost of FABS instruction. */
713 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
714 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
715 {{{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
716 DUMMY_STRINGOP_ALGS},
717 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
718 DUMMY_STRINGOP_ALGS}},
719 {{{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
720 DUMMY_STRINGOP_ALGS},
721 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
722 DUMMY_STRINGOP_ALGS}},
723 1, /* scalar_stmt_cost. */
724 1, /* scalar load_cost. */
725 1, /* scalar_store_cost. */
726 1, /* vec_stmt_cost. */
727 1, /* vec_to_scalar_cost. */
728 1, /* scalar_to_vec_cost. */
729 1, /* vec_align_load_cost. */
730 2, /* vec_unalign_load_cost. */
731 1, /* vec_store_cost. */
732 3, /* cond_taken_branch_cost. */
733 1, /* cond_not_taken_branch_cost. */
737 struct processor_costs pentium_cost = {
738 COSTS_N_INSNS (1), /* cost of an add instruction */
739 COSTS_N_INSNS (1), /* cost of a lea instruction */
740 COSTS_N_INSNS (4), /* variable shift costs */
741 COSTS_N_INSNS (1), /* constant shift costs */
742 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
743 COSTS_N_INSNS (11), /* HI */
744 COSTS_N_INSNS (11), /* SI */
745 COSTS_N_INSNS (11), /* DI */
746 COSTS_N_INSNS (11)}, /* other */
747 0, /* cost of multiply per each bit set */
748 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
749 COSTS_N_INSNS (25), /* HI */
750 COSTS_N_INSNS (25), /* SI */
751 COSTS_N_INSNS (25), /* DI */
752 COSTS_N_INSNS (25)}, /* other */
753 COSTS_N_INSNS (3), /* cost of movsx */
754 COSTS_N_INSNS (2), /* cost of movzx */
755 8, /* "large" insn */
757 6, /* cost for loading QImode using movzbl */
758 {2, 4, 2}, /* cost of loading integer registers
759 in QImode, HImode and SImode.
760 Relative to reg-reg move (2). */
761 {2, 4, 2}, /* cost of storing integer registers */
762 2, /* cost of reg,reg fld/fst */
763 {2, 2, 6}, /* cost of loading fp registers
764 in SFmode, DFmode and XFmode */
765 {4, 4, 6}, /* cost of storing fp registers
766 in SFmode, DFmode and XFmode */
767 8, /* cost of moving MMX register */
768 {8, 8}, /* cost of loading MMX registers
769 in SImode and DImode */
770 {8, 8}, /* cost of storing MMX registers
771 in SImode and DImode */
772 2, /* cost of moving SSE register */
773 {4, 8, 16}, /* cost of loading SSE registers
774 in SImode, DImode and TImode */
775 {4, 8, 16}, /* cost of storing SSE registers
776 in SImode, DImode and TImode */
777 3, /* MMX or SSE register to integer */
778 8, /* size of l1 cache. */
779 8, /* size of l2 cache */
780 0, /* size of prefetch block */
781 0, /* number of parallel prefetches */
783 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
784 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
785 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
786 COSTS_N_INSNS (1), /* cost of FABS instruction. */
787 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
788 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
789 {{{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
790 DUMMY_STRINGOP_ALGS},
791 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
792 DUMMY_STRINGOP_ALGS}},
793 {{{libcall, {{-1, rep_prefix_4_byte}}},
794 DUMMY_STRINGOP_ALGS},
795 {{libcall, {{-1, rep_prefix_4_byte}}},
796 DUMMY_STRINGOP_ALGS}},
797 1, /* scalar_stmt_cost. */
798 1, /* scalar load_cost. */
799 1, /* scalar_store_cost. */
800 1, /* vec_stmt_cost. */
801 1, /* vec_to_scalar_cost. */
802 1, /* scalar_to_vec_cost. */
803 1, /* vec_align_load_cost. */
804 2, /* vec_unalign_load_cost. */
805 1, /* vec_store_cost. */
806 3, /* cond_taken_branch_cost. */
807 1, /* cond_not_taken_branch_cost. */
811 struct processor_costs pentiumpro_cost = {
812 COSTS_N_INSNS (1), /* cost of an add instruction */
813 COSTS_N_INSNS (1), /* cost of a lea instruction */
814 COSTS_N_INSNS (1), /* variable shift costs */
815 COSTS_N_INSNS (1), /* constant shift costs */
816 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
817 COSTS_N_INSNS (4), /* HI */
818 COSTS_N_INSNS (4), /* SI */
819 COSTS_N_INSNS (4), /* DI */
820 COSTS_N_INSNS (4)}, /* other */
821 0, /* cost of multiply per each bit set */
822 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
823 COSTS_N_INSNS (17), /* HI */
824 COSTS_N_INSNS (17), /* SI */
825 COSTS_N_INSNS (17), /* DI */
826 COSTS_N_INSNS (17)}, /* other */
827 COSTS_N_INSNS (1), /* cost of movsx */
828 COSTS_N_INSNS (1), /* cost of movzx */
829 8, /* "large" insn */
831 2, /* cost for loading QImode using movzbl */
832 {4, 4, 4}, /* cost of loading integer registers
833 in QImode, HImode and SImode.
834 Relative to reg-reg move (2). */
835 {2, 2, 2}, /* cost of storing integer registers */
836 2, /* cost of reg,reg fld/fst */
837 {2, 2, 6}, /* cost of loading fp registers
838 in SFmode, DFmode and XFmode */
839 {4, 4, 6}, /* cost of storing fp registers
840 in SFmode, DFmode and XFmode */
841 2, /* cost of moving MMX register */
842 {2, 2}, /* cost of loading MMX registers
843 in SImode and DImode */
844 {2, 2}, /* cost of storing MMX registers
845 in SImode and DImode */
846 2, /* cost of moving SSE register */
847 {2, 2, 8}, /* cost of loading SSE registers
848 in SImode, DImode and TImode */
849 {2, 2, 8}, /* cost of storing SSE registers
850 in SImode, DImode and TImode */
851 3, /* MMX or SSE register to integer */
852 8, /* size of l1 cache. */
853 256, /* size of l2 cache */
854 32, /* size of prefetch block */
855 6, /* number of parallel prefetches */
857 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
858 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
859 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
860 COSTS_N_INSNS (2), /* cost of FABS instruction. */
861 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
862 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
863 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
864 (we ensure the alignment). For small blocks inline loop is still a
865 noticeable win, for bigger blocks either rep movsl or rep movsb is
866 way to go. Rep movsb has apparently more expensive startup time in CPU,
867 but after 4K the difference is down in the noise. */
868 {{{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
869 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
870 DUMMY_STRINGOP_ALGS},
871 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
872 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
873 DUMMY_STRINGOP_ALGS}},
874 {{{rep_prefix_4_byte, {{1024, unrolled_loop},
875 {8192, rep_prefix_4_byte}, {-1, libcall}}},
876 DUMMY_STRINGOP_ALGS},
877 {{rep_prefix_4_byte, {{1024, unrolled_loop},
878 {8192, rep_prefix_4_byte}, {-1, libcall}}},
879 DUMMY_STRINGOP_ALGS}},
880 1, /* scalar_stmt_cost. */
881 1, /* scalar load_cost. */
882 1, /* scalar_store_cost. */
883 1, /* vec_stmt_cost. */
884 1, /* vec_to_scalar_cost. */
885 1, /* scalar_to_vec_cost. */
886 1, /* vec_align_load_cost. */
887 2, /* vec_unalign_load_cost. */
888 1, /* vec_store_cost. */
889 3, /* cond_taken_branch_cost. */
890 1, /* cond_not_taken_branch_cost. */
894 struct processor_costs geode_cost = {
895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 COSTS_N_INSNS (2), /* variable shift costs */
898 COSTS_N_INSNS (1), /* constant shift costs */
899 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
900 COSTS_N_INSNS (4), /* HI */
901 COSTS_N_INSNS (7), /* SI */
902 COSTS_N_INSNS (7), /* DI */
903 COSTS_N_INSNS (7)}, /* other */
904 0, /* cost of multiply per each bit set */
905 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
906 COSTS_N_INSNS (23), /* HI */
907 COSTS_N_INSNS (39), /* SI */
908 COSTS_N_INSNS (39), /* DI */
909 COSTS_N_INSNS (39)}, /* other */
910 COSTS_N_INSNS (1), /* cost of movsx */
911 COSTS_N_INSNS (1), /* cost of movzx */
912 8, /* "large" insn */
914 1, /* cost for loading QImode using movzbl */
915 {1, 1, 1}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {1, 1, 1}, /* cost of storing integer registers */
919 1, /* cost of reg,reg fld/fst */
920 {1, 1, 1}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {4, 6, 6}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
925 1, /* cost of moving MMX register */
926 {1, 1}, /* cost of loading MMX registers
927 in SImode and DImode */
928 {1, 1}, /* cost of storing MMX registers
929 in SImode and DImode */
930 1, /* cost of moving SSE register */
931 {1, 1, 1}, /* cost of loading SSE registers
932 in SImode, DImode and TImode */
933 {1, 1, 1}, /* cost of storing SSE registers
934 in SImode, DImode and TImode */
935 1, /* MMX or SSE register to integer */
936 64, /* size of l1 cache. */
937 128, /* size of l2 cache. */
938 32, /* size of prefetch block */
939 1, /* number of parallel prefetches */
941 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
942 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
943 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
944 COSTS_N_INSNS (1), /* cost of FABS instruction. */
945 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
946 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
947 {{{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
948 DUMMY_STRINGOP_ALGS},
949 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
950 DUMMY_STRINGOP_ALGS}},
951 {{{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
952 DUMMY_STRINGOP_ALGS},
953 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
954 DUMMY_STRINGOP_ALGS}},
955 1, /* scalar_stmt_cost. */
956 1, /* scalar load_cost. */
957 1, /* scalar_store_cost. */
958 1, /* vec_stmt_cost. */
959 1, /* vec_to_scalar_cost. */
960 1, /* scalar_to_vec_cost. */
961 1, /* vec_align_load_cost. */
962 2, /* vec_unalign_load_cost. */
963 1, /* vec_store_cost. */
964 3, /* cond_taken_branch_cost. */
965 1, /* cond_not_taken_branch_cost. */
969 struct processor_costs k6_cost = {
970 COSTS_N_INSNS (1), /* cost of an add instruction */
971 COSTS_N_INSNS (2), /* cost of a lea instruction */
972 COSTS_N_INSNS (1), /* variable shift costs */
973 COSTS_N_INSNS (1), /* constant shift costs */
974 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
975 COSTS_N_INSNS (3), /* HI */
976 COSTS_N_INSNS (3), /* SI */
977 COSTS_N_INSNS (3), /* DI */
978 COSTS_N_INSNS (3)}, /* other */
979 0, /* cost of multiply per each bit set */
980 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
981 COSTS_N_INSNS (18), /* HI */
982 COSTS_N_INSNS (18), /* SI */
983 COSTS_N_INSNS (18), /* DI */
984 COSTS_N_INSNS (18)}, /* other */
985 COSTS_N_INSNS (2), /* cost of movsx */
986 COSTS_N_INSNS (2), /* cost of movzx */
987 8, /* "large" insn */
989 3, /* cost for loading QImode using movzbl */
990 {4, 5, 4}, /* cost of loading integer registers
991 in QImode, HImode and SImode.
992 Relative to reg-reg move (2). */
993 {2, 3, 2}, /* cost of storing integer registers */
994 4, /* cost of reg,reg fld/fst */
995 {6, 6, 6}, /* cost of loading fp registers
996 in SFmode, DFmode and XFmode */
997 {4, 4, 4}, /* cost of storing fp registers
998 in SFmode, DFmode and XFmode */
999 2, /* cost of moving MMX register */
1000 {2, 2}, /* cost of loading MMX registers
1001 in SImode and DImode */
1002 {2, 2}, /* cost of storing MMX registers
1003 in SImode and DImode */
1004 2, /* cost of moving SSE register */
1005 {2, 2, 8}, /* cost of loading SSE registers
1006 in SImode, DImode and TImode */
1007 {2, 2, 8}, /* cost of storing SSE registers
1008 in SImode, DImode and TImode */
1009 6, /* MMX or SSE register to integer */
1010 32, /* size of l1 cache. */
1011 32, /* size of l2 cache. Some models
1012 have integrated l2 cache, but
1013 optimizing for k6 is not important
1014 enough to worry about that. */
1015 32, /* size of prefetch block */
1016 1, /* number of parallel prefetches */
1017 1, /* Branch cost */
1018 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
1019 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
1020 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
1021 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1022 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1023 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
1024 {{{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1025 DUMMY_STRINGOP_ALGS},
1026 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1027 DUMMY_STRINGOP_ALGS}},
1028 {{{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1029 DUMMY_STRINGOP_ALGS},
1030 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1031 DUMMY_STRINGOP_ALGS}},
1032 1, /* scalar_stmt_cost. */
1033 1, /* scalar load_cost. */
1034 1, /* scalar_store_cost. */
1035 1, /* vec_stmt_cost. */
1036 1, /* vec_to_scalar_cost. */
1037 1, /* scalar_to_vec_cost. */
1038 1, /* vec_align_load_cost. */
1039 2, /* vec_unalign_load_cost. */
1040 1, /* vec_store_cost. */
1041 3, /* cond_taken_branch_cost. */
1042 1, /* cond_not_taken_branch_cost. */
1046 struct processor_costs athlon_cost = {
1047 COSTS_N_INSNS (1), /* cost of an add instruction */
1048 COSTS_N_INSNS (2), /* cost of a lea instruction */
1049 COSTS_N_INSNS (1), /* variable shift costs */
1050 COSTS_N_INSNS (1), /* constant shift costs */
1051 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1052 COSTS_N_INSNS (5), /* HI */
1053 COSTS_N_INSNS (5), /* SI */
1054 COSTS_N_INSNS (5), /* DI */
1055 COSTS_N_INSNS (5)}, /* other */
1056 0, /* cost of multiply per each bit set */
1057 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1058 COSTS_N_INSNS (26), /* HI */
1059 COSTS_N_INSNS (42), /* SI */
1060 COSTS_N_INSNS (74), /* DI */
1061 COSTS_N_INSNS (74)}, /* other */
1062 COSTS_N_INSNS (1), /* cost of movsx */
1063 COSTS_N_INSNS (1), /* cost of movzx */
1064 8, /* "large" insn */
1066 4, /* cost for loading QImode using movzbl */
1067 {3, 4, 3}, /* cost of loading integer registers
1068 in QImode, HImode and SImode.
1069 Relative to reg-reg move (2). */
1070 {3, 4, 3}, /* cost of storing integer registers */
1071 4, /* cost of reg,reg fld/fst */
1072 {4, 4, 12}, /* cost of loading fp registers
1073 in SFmode, DFmode and XFmode */
1074 {6, 6, 8}, /* cost of storing fp registers
1075 in SFmode, DFmode and XFmode */
1076 2, /* cost of moving MMX register */
1077 {4, 4}, /* cost of loading MMX registers
1078 in SImode and DImode */
1079 {4, 4}, /* cost of storing MMX registers
1080 in SImode and DImode */
1081 2, /* cost of moving SSE register */
1082 {4, 4, 6}, /* cost of loading SSE registers
1083 in SImode, DImode and TImode */
1084 {4, 4, 5}, /* cost of storing SSE registers
1085 in SImode, DImode and TImode */
1086 5, /* MMX or SSE register to integer */
1087 64, /* size of l1 cache. */
1088 256, /* size of l2 cache. */
1089 64, /* size of prefetch block */
1090 6, /* number of parallel prefetches */
1091 5, /* Branch cost */
1092 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1093 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1094 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1095 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1096 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1097 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1098 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1099 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1100 128 bytes for memset. */
1101 {{{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1102 DUMMY_STRINGOP_ALGS},
1103 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1104 DUMMY_STRINGOP_ALGS}},
1105 {{{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1106 DUMMY_STRINGOP_ALGS},
1107 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1108 DUMMY_STRINGOP_ALGS}},
1109 1, /* scalar_stmt_cost. */
1110 1, /* scalar load_cost. */
1111 1, /* scalar_store_cost. */
1112 1, /* vec_stmt_cost. */
1113 1, /* vec_to_scalar_cost. */
1114 1, /* scalar_to_vec_cost. */
1115 1, /* vec_align_load_cost. */
1116 2, /* vec_unalign_load_cost. */
1117 1, /* vec_store_cost. */
1118 3, /* cond_taken_branch_cost. */
1119 1, /* cond_not_taken_branch_cost. */
1123 struct processor_costs k8_cost = {
1124 COSTS_N_INSNS (1), /* cost of an add instruction */
1125 COSTS_N_INSNS (2), /* cost of a lea instruction */
1126 COSTS_N_INSNS (1), /* variable shift costs */
1127 COSTS_N_INSNS (1), /* constant shift costs */
1128 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1129 COSTS_N_INSNS (4), /* HI */
1130 COSTS_N_INSNS (3), /* SI */
1131 COSTS_N_INSNS (4), /* DI */
1132 COSTS_N_INSNS (5)}, /* other */
1133 0, /* cost of multiply per each bit set */
1134 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1135 COSTS_N_INSNS (26), /* HI */
1136 COSTS_N_INSNS (42), /* SI */
1137 COSTS_N_INSNS (74), /* DI */
1138 COSTS_N_INSNS (74)}, /* other */
1139 COSTS_N_INSNS (1), /* cost of movsx */
1140 COSTS_N_INSNS (1), /* cost of movzx */
1141 8, /* "large" insn */
1143 4, /* cost for loading QImode using movzbl */
1144 {3, 4, 3}, /* cost of loading integer registers
1145 in QImode, HImode and SImode.
1146 Relative to reg-reg move (2). */
1147 {3, 4, 3}, /* cost of storing integer registers */
1148 4, /* cost of reg,reg fld/fst */
1149 {4, 4, 12}, /* cost of loading fp registers
1150 in SFmode, DFmode and XFmode */
1151 {6, 6, 8}, /* cost of storing fp registers
1152 in SFmode, DFmode and XFmode */
1153 2, /* cost of moving MMX register */
1154 {3, 3}, /* cost of loading MMX registers
1155 in SImode and DImode */
1156 {4, 4}, /* cost of storing MMX registers
1157 in SImode and DImode */
1158 2, /* cost of moving SSE register */
1159 {4, 3, 6}, /* cost of loading SSE registers
1160 in SImode, DImode and TImode */
1161 {4, 4, 5}, /* cost of storing SSE registers
1162 in SImode, DImode and TImode */
1163 5, /* MMX or SSE register to integer */
1164 64, /* size of l1 cache. */
1165 512, /* size of l2 cache. */
1166 64, /* size of prefetch block */
1167 /* New AMD processors never drop prefetches; if they cannot be performed
1168 immediately, they are queued. We set number of simultaneous prefetches
1169 to a large constant to reflect this (it probably is not a good idea not
1170 to limit number of prefetches at all, as their execution also takes some
1172 100, /* number of parallel prefetches */
1173 3, /* Branch cost */
1174 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1175 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1176 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1177 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1178 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1179 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1180 /* K8 has optimized REP instruction for medium sized blocks, but for very
1181 small blocks it is better to use loop. For large blocks, libcall can
1182 do nontemporary accesses and beat inline considerably. */
1183 {{{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1184 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1185 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1186 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}},
1187 {{{libcall, {{8, loop}, {24, unrolled_loop},
1188 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1189 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1190 {{libcall, {{8, loop}, {24, unrolled_loop},
1191 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1192 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}},
1193 4, /* scalar_stmt_cost. */
1194 2, /* scalar load_cost. */
1195 2, /* scalar_store_cost. */
1196 5, /* vec_stmt_cost. */
1197 0, /* vec_to_scalar_cost. */
1198 2, /* scalar_to_vec_cost. */
1199 2, /* vec_align_load_cost. */
1200 3, /* vec_unalign_load_cost. */
1201 3, /* vec_store_cost. */
1202 3, /* cond_taken_branch_cost. */
1203 2, /* cond_not_taken_branch_cost. */
1206 struct processor_costs amdfam10_cost = {
1207 COSTS_N_INSNS (1), /* cost of an add instruction */
1208 COSTS_N_INSNS (2), /* cost of a lea instruction */
1209 COSTS_N_INSNS (1), /* variable shift costs */
1210 COSTS_N_INSNS (1), /* constant shift costs */
1211 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1212 COSTS_N_INSNS (4), /* HI */
1213 COSTS_N_INSNS (3), /* SI */
1214 COSTS_N_INSNS (4), /* DI */
1215 COSTS_N_INSNS (5)}, /* other */
1216 0, /* cost of multiply per each bit set */
1217 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1218 COSTS_N_INSNS (35), /* HI */
1219 COSTS_N_INSNS (51), /* SI */
1220 COSTS_N_INSNS (83), /* DI */
1221 COSTS_N_INSNS (83)}, /* other */
1222 COSTS_N_INSNS (1), /* cost of movsx */
1223 COSTS_N_INSNS (1), /* cost of movzx */
1224 8, /* "large" insn */
1226 4, /* cost for loading QImode using movzbl */
1227 {3, 4, 3}, /* cost of loading integer registers
1228 in QImode, HImode and SImode.
1229 Relative to reg-reg move (2). */
1230 {3, 4, 3}, /* cost of storing integer registers */
1231 4, /* cost of reg,reg fld/fst */
1232 {4, 4, 12}, /* cost of loading fp registers
1233 in SFmode, DFmode and XFmode */
1234 {6, 6, 8}, /* cost of storing fp registers
1235 in SFmode, DFmode and XFmode */
1236 2, /* cost of moving MMX register */
1237 {3, 3}, /* cost of loading MMX registers
1238 in SImode and DImode */
1239 {4, 4}, /* cost of storing MMX registers
1240 in SImode and DImode */
1241 2, /* cost of moving SSE register */
1242 {4, 4, 3}, /* cost of loading SSE registers
1243 in SImode, DImode and TImode */
1244 {4, 4, 5}, /* cost of storing SSE registers
1245 in SImode, DImode and TImode */
1246 3, /* MMX or SSE register to integer */
1248 MOVD reg64, xmmreg Double FSTORE 4
1249 MOVD reg32, xmmreg Double FSTORE 4
1251 MOVD reg64, xmmreg Double FADD 3
1253 MOVD reg32, xmmreg Double FADD 3
1255 64, /* size of l1 cache. */
1256 512, /* size of l2 cache. */
1257 64, /* size of prefetch block */
1258 /* New AMD processors never drop prefetches; if they cannot be performed
1259 immediately, they are queued. We set number of simultaneous prefetches
1260 to a large constant to reflect this (it probably is not a good idea not
1261 to limit number of prefetches at all, as their execution also takes some
1263 100, /* number of parallel prefetches */
1264 2, /* Branch cost */
1265 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1266 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1267 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1268 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1269 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1270 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1272 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1273 very small blocks it is better to use loop. For large blocks, libcall can
1274 do nontemporary accesses and beat inline considerably. */
1275 {{{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1276 {libcall, {{16, loop}, {512, rep_prefix_8_byte}, {-1, libcall}}}},
1277 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1278 {libcall, {{16, loop}, {512, rep_prefix_8_byte}, {-1, libcall}}}}},
1279 {{{libcall, {{8, loop}, {24, unrolled_loop},
1280 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1281 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1282 {{libcall, {{8, loop}, {24, unrolled_loop},
1283 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1284 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}},
1285 4, /* scalar_stmt_cost. */
1286 2, /* scalar load_cost. */
1287 2, /* scalar_store_cost. */
1288 6, /* vec_stmt_cost. */
1289 0, /* vec_to_scalar_cost. */
1290 2, /* scalar_to_vec_cost. */
1291 2, /* vec_align_load_cost. */
1292 2, /* vec_unalign_load_cost. */
1293 2, /* vec_store_cost. */
1294 2, /* cond_taken_branch_cost. */
1295 1, /* cond_not_taken_branch_cost. */
1298 struct processor_costs bdver1_cost = {
1299 COSTS_N_INSNS (1), /* cost of an add instruction */
1300 COSTS_N_INSNS (1), /* cost of a lea instruction */
1301 COSTS_N_INSNS (1), /* variable shift costs */
1302 COSTS_N_INSNS (1), /* constant shift costs */
1303 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1304 COSTS_N_INSNS (4), /* HI */
1305 COSTS_N_INSNS (4), /* SI */
1306 COSTS_N_INSNS (6), /* DI */
1307 COSTS_N_INSNS (6)}, /* other */
1308 0, /* cost of multiply per each bit set */
1309 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1310 COSTS_N_INSNS (35), /* HI */
1311 COSTS_N_INSNS (51), /* SI */
1312 COSTS_N_INSNS (83), /* DI */
1313 COSTS_N_INSNS (83)}, /* other */
1314 COSTS_N_INSNS (1), /* cost of movsx */
1315 COSTS_N_INSNS (1), /* cost of movzx */
1316 8, /* "large" insn */
1318 4, /* cost for loading QImode using movzbl */
1319 {5, 5, 4}, /* cost of loading integer registers
1320 in QImode, HImode and SImode.
1321 Relative to reg-reg move (2). */
1322 {4, 4, 4}, /* cost of storing integer registers */
1323 2, /* cost of reg,reg fld/fst */
1324 {5, 5, 12}, /* cost of loading fp registers
1325 in SFmode, DFmode and XFmode */
1326 {4, 4, 8}, /* cost of storing fp registers
1327 in SFmode, DFmode and XFmode */
1328 2, /* cost of moving MMX register */
1329 {4, 4}, /* cost of loading MMX registers
1330 in SImode and DImode */
1331 {4, 4}, /* cost of storing MMX registers
1332 in SImode and DImode */
1333 2, /* cost of moving SSE register */
1334 {4, 4, 4}, /* cost of loading SSE registers
1335 in SImode, DImode and TImode */
1336 {4, 4, 4}, /* cost of storing SSE registers
1337 in SImode, DImode and TImode */
1338 2, /* MMX or SSE register to integer */
1340 MOVD reg64, xmmreg Double FSTORE 4
1341 MOVD reg32, xmmreg Double FSTORE 4
1343 MOVD reg64, xmmreg Double FADD 3
1345 MOVD reg32, xmmreg Double FADD 3
1347 16, /* size of l1 cache. */
1348 2048, /* size of l2 cache. */
1349 64, /* size of prefetch block */
1350 /* New AMD processors never drop prefetches; if they cannot be performed
1351 immediately, they are queued. We set number of simultaneous prefetches
1352 to a large constant to reflect this (it probably is not a good idea not
1353 to limit number of prefetches at all, as their execution also takes some
1355 100, /* number of parallel prefetches */
1356 2, /* Branch cost */
1357 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1358 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1359 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1360 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1361 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1362 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1364 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1365 very small blocks it is better to use loop. For large blocks, libcall
1366 can do nontemporary accesses and beat inline considerably. */
1367 {{{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1368 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1369 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1370 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}},
1371 {{{libcall, {{8, loop}, {24, unrolled_loop},
1372 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1373 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1374 {{libcall, {{8, loop}, {24, unrolled_loop},
1375 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1376 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}},
1377 6, /* scalar_stmt_cost. */
1378 4, /* scalar load_cost. */
1379 4, /* scalar_store_cost. */
1380 6, /* vec_stmt_cost. */
1381 0, /* vec_to_scalar_cost. */
1382 2, /* scalar_to_vec_cost. */
1383 4, /* vec_align_load_cost. */
1384 4, /* vec_unalign_load_cost. */
1385 4, /* vec_store_cost. */
1386 2, /* cond_taken_branch_cost. */
1387 1, /* cond_not_taken_branch_cost. */
1390 struct processor_costs bdver2_cost = {
1391 COSTS_N_INSNS (1), /* cost of an add instruction */
1392 COSTS_N_INSNS (1), /* cost of a lea instruction */
1393 COSTS_N_INSNS (1), /* variable shift costs */
1394 COSTS_N_INSNS (1), /* constant shift costs */
1395 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1396 COSTS_N_INSNS (4), /* HI */
1397 COSTS_N_INSNS (4), /* SI */
1398 COSTS_N_INSNS (6), /* DI */
1399 COSTS_N_INSNS (6)}, /* other */
1400 0, /* cost of multiply per each bit set */
1401 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1402 COSTS_N_INSNS (35), /* HI */
1403 COSTS_N_INSNS (51), /* SI */
1404 COSTS_N_INSNS (83), /* DI */
1405 COSTS_N_INSNS (83)}, /* other */
1406 COSTS_N_INSNS (1), /* cost of movsx */
1407 COSTS_N_INSNS (1), /* cost of movzx */
1408 8, /* "large" insn */
1410 4, /* cost for loading QImode using movzbl */
1411 {5, 5, 4}, /* cost of loading integer registers
1412 in QImode, HImode and SImode.
1413 Relative to reg-reg move (2). */
1414 {4, 4, 4}, /* cost of storing integer registers */
1415 2, /* cost of reg,reg fld/fst */
1416 {5, 5, 12}, /* cost of loading fp registers
1417 in SFmode, DFmode and XFmode */
1418 {4, 4, 8}, /* cost of storing fp registers
1419 in SFmode, DFmode and XFmode */
1420 2, /* cost of moving MMX register */
1421 {4, 4}, /* cost of loading MMX registers
1422 in SImode and DImode */
1423 {4, 4}, /* cost of storing MMX registers
1424 in SImode and DImode */
1425 2, /* cost of moving SSE register */
1426 {4, 4, 4}, /* cost of loading SSE registers
1427 in SImode, DImode and TImode */
1428 {4, 4, 4}, /* cost of storing SSE registers
1429 in SImode, DImode and TImode */
1430 2, /* MMX or SSE register to integer */
1432 MOVD reg64, xmmreg Double FSTORE 4
1433 MOVD reg32, xmmreg Double FSTORE 4
1435 MOVD reg64, xmmreg Double FADD 3
1437 MOVD reg32, xmmreg Double FADD 3
1439 16, /* size of l1 cache. */
1440 2048, /* size of l2 cache. */
1441 64, /* size of prefetch block */
1442 /* New AMD processors never drop prefetches; if they cannot be performed
1443 immediately, they are queued. We set number of simultaneous prefetches
1444 to a large constant to reflect this (it probably is not a good idea not
1445 to limit number of prefetches at all, as their execution also takes some
1447 100, /* number of parallel prefetches */
1448 2, /* Branch cost */
1449 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1450 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1451 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1452 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1453 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1454 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1456 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1457 very small blocks it is better to use loop. For large blocks, libcall
1458 can do nontemporary accesses and beat inline considerably. */
1459 {{{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1460 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1461 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1462 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}},
1463 {{{libcall, {{8, loop}, {24, unrolled_loop},
1464 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1465 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1466 {{libcall, {{8, loop}, {24, unrolled_loop},
1467 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1468 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}},
1469 6, /* scalar_stmt_cost. */
1470 4, /* scalar load_cost. */
1471 4, /* scalar_store_cost. */
1472 6, /* vec_stmt_cost. */
1473 0, /* vec_to_scalar_cost. */
1474 2, /* scalar_to_vec_cost. */
1475 4, /* vec_align_load_cost. */
1476 4, /* vec_unalign_load_cost. */
1477 4, /* vec_store_cost. */
1478 2, /* cond_taken_branch_cost. */
1479 1, /* cond_not_taken_branch_cost. */
1482 struct processor_costs btver1_cost = {
1483 COSTS_N_INSNS (1), /* cost of an add instruction */
1484 COSTS_N_INSNS (2), /* cost of a lea instruction */
1485 COSTS_N_INSNS (1), /* variable shift costs */
1486 COSTS_N_INSNS (1), /* constant shift costs */
1487 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1488 COSTS_N_INSNS (4), /* HI */
1489 COSTS_N_INSNS (3), /* SI */
1490 COSTS_N_INSNS (4), /* DI */
1491 COSTS_N_INSNS (5)}, /* other */
1492 0, /* cost of multiply per each bit set */
1493 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1494 COSTS_N_INSNS (35), /* HI */
1495 COSTS_N_INSNS (51), /* SI */
1496 COSTS_N_INSNS (83), /* DI */
1497 COSTS_N_INSNS (83)}, /* other */
1498 COSTS_N_INSNS (1), /* cost of movsx */
1499 COSTS_N_INSNS (1), /* cost of movzx */
1500 8, /* "large" insn */
1502 4, /* cost for loading QImode using movzbl */
1503 {3, 4, 3}, /* cost of loading integer registers
1504 in QImode, HImode and SImode.
1505 Relative to reg-reg move (2). */
1506 {3, 4, 3}, /* cost of storing integer registers */
1507 4, /* cost of reg,reg fld/fst */
1508 {4, 4, 12}, /* cost of loading fp registers
1509 in SFmode, DFmode and XFmode */
1510 {6, 6, 8}, /* cost of storing fp registers
1511 in SFmode, DFmode and XFmode */
1512 2, /* cost of moving MMX register */
1513 {3, 3}, /* cost of loading MMX registers
1514 in SImode and DImode */
1515 {4, 4}, /* cost of storing MMX registers
1516 in SImode and DImode */
1517 2, /* cost of moving SSE register */
1518 {4, 4, 3}, /* cost of loading SSE registers
1519 in SImode, DImode and TImode */
1520 {4, 4, 5}, /* cost of storing SSE registers
1521 in SImode, DImode and TImode */
1522 3, /* MMX or SSE register to integer */
1524 MOVD reg64, xmmreg Double FSTORE 4
1525 MOVD reg32, xmmreg Double FSTORE 4
1527 MOVD reg64, xmmreg Double FADD 3
1529 MOVD reg32, xmmreg Double FADD 3
1531 32, /* size of l1 cache. */
1532 512, /* size of l2 cache. */
1533 64, /* size of prefetch block */
1534 100, /* number of parallel prefetches */
1535 2, /* Branch cost */
1536 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1537 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1538 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1539 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1540 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1541 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1543 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1544 very small blocks it is better to use loop. For large blocks, libcall can
1545 do nontemporary accesses and beat inline considerably. */
1546 {{{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1547 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1548 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1549 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}},
1550 {{{libcall, {{8, loop}, {24, unrolled_loop},
1551 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1552 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1553 {{libcall, {{8, loop}, {24, unrolled_loop},
1554 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1555 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}},
1556 4, /* scalar_stmt_cost. */
1557 2, /* scalar load_cost. */
1558 2, /* scalar_store_cost. */
1559 6, /* vec_stmt_cost. */
1560 0, /* vec_to_scalar_cost. */
1561 2, /* scalar_to_vec_cost. */
1562 2, /* vec_align_load_cost. */
1563 2, /* vec_unalign_load_cost. */
1564 2, /* vec_store_cost. */
1565 2, /* cond_taken_branch_cost. */
1566 1, /* cond_not_taken_branch_cost. */
1570 struct processor_costs pentium4_cost = {
1571 COSTS_N_INSNS (1), /* cost of an add instruction */
1572 COSTS_N_INSNS (3), /* cost of a lea instruction */
1573 COSTS_N_INSNS (4), /* variable shift costs */
1574 COSTS_N_INSNS (4), /* constant shift costs */
1575 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1576 COSTS_N_INSNS (15), /* HI */
1577 COSTS_N_INSNS (15), /* SI */
1578 COSTS_N_INSNS (15), /* DI */
1579 COSTS_N_INSNS (15)}, /* other */
1580 0, /* cost of multiply per each bit set */
1581 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1582 COSTS_N_INSNS (56), /* HI */
1583 COSTS_N_INSNS (56), /* SI */
1584 COSTS_N_INSNS (56), /* DI */
1585 COSTS_N_INSNS (56)}, /* other */
1586 COSTS_N_INSNS (1), /* cost of movsx */
1587 COSTS_N_INSNS (1), /* cost of movzx */
1588 16, /* "large" insn */
1590 2, /* cost for loading QImode using movzbl */
1591 {4, 5, 4}, /* cost of loading integer registers
1592 in QImode, HImode and SImode.
1593 Relative to reg-reg move (2). */
1594 {2, 3, 2}, /* cost of storing integer registers */
1595 2, /* cost of reg,reg fld/fst */
1596 {2, 2, 6}, /* cost of loading fp registers
1597 in SFmode, DFmode and XFmode */
1598 {4, 4, 6}, /* cost of storing fp registers
1599 in SFmode, DFmode and XFmode */
1600 2, /* cost of moving MMX register */
1601 {2, 2}, /* cost of loading MMX registers
1602 in SImode and DImode */
1603 {2, 2}, /* cost of storing MMX registers
1604 in SImode and DImode */
1605 12, /* cost of moving SSE register */
1606 {12, 12, 12}, /* cost of loading SSE registers
1607 in SImode, DImode and TImode */
1608 {2, 2, 8}, /* cost of storing SSE registers
1609 in SImode, DImode and TImode */
1610 10, /* MMX or SSE register to integer */
1611 8, /* size of l1 cache. */
1612 256, /* size of l2 cache. */
1613 64, /* size of prefetch block */
1614 6, /* number of parallel prefetches */
1615 2, /* Branch cost */
1616 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1617 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1618 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1619 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1620 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1621 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1623 {{{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1624 DUMMY_STRINGOP_ALGS},
1625 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1626 DUMMY_STRINGOP_ALGS}},
1628 {{{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1630 DUMMY_STRINGOP_ALGS},
1631 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1633 DUMMY_STRINGOP_ALGS}},
1634 1, /* scalar_stmt_cost. */
1635 1, /* scalar load_cost. */
1636 1, /* scalar_store_cost. */
1637 1, /* vec_stmt_cost. */
1638 1, /* vec_to_scalar_cost. */
1639 1, /* scalar_to_vec_cost. */
1640 1, /* vec_align_load_cost. */
1641 2, /* vec_unalign_load_cost. */
1642 1, /* vec_store_cost. */
1643 3, /* cond_taken_branch_cost. */
1644 1, /* cond_not_taken_branch_cost. */
1648 struct processor_costs nocona_cost = {
1649 COSTS_N_INSNS (1), /* cost of an add instruction */
1650 COSTS_N_INSNS (1), /* cost of a lea instruction */
1651 COSTS_N_INSNS (1), /* variable shift costs */
1652 COSTS_N_INSNS (1), /* constant shift costs */
1653 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1654 COSTS_N_INSNS (10), /* HI */
1655 COSTS_N_INSNS (10), /* SI */
1656 COSTS_N_INSNS (10), /* DI */
1657 COSTS_N_INSNS (10)}, /* other */
1658 0, /* cost of multiply per each bit set */
1659 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1660 COSTS_N_INSNS (66), /* HI */
1661 COSTS_N_INSNS (66), /* SI */
1662 COSTS_N_INSNS (66), /* DI */
1663 COSTS_N_INSNS (66)}, /* other */
1664 COSTS_N_INSNS (1), /* cost of movsx */
1665 COSTS_N_INSNS (1), /* cost of movzx */
1666 16, /* "large" insn */
1667 17, /* MOVE_RATIO */
1668 4, /* cost for loading QImode using movzbl */
1669 {4, 4, 4}, /* cost of loading integer registers
1670 in QImode, HImode and SImode.
1671 Relative to reg-reg move (2). */
1672 {4, 4, 4}, /* cost of storing integer registers */
1673 3, /* cost of reg,reg fld/fst */
1674 {12, 12, 12}, /* cost of loading fp registers
1675 in SFmode, DFmode and XFmode */
1676 {4, 4, 4}, /* cost of storing fp registers
1677 in SFmode, DFmode and XFmode */
1678 6, /* cost of moving MMX register */
1679 {12, 12}, /* cost of loading MMX registers
1680 in SImode and DImode */
1681 {12, 12}, /* cost of storing MMX registers
1682 in SImode and DImode */
1683 6, /* cost of moving SSE register */
1684 {12, 12, 12}, /* cost of loading SSE registers
1685 in SImode, DImode and TImode */
1686 {12, 12, 12}, /* cost of storing SSE registers
1687 in SImode, DImode and TImode */
1688 8, /* MMX or SSE register to integer */
1689 8, /* size of l1 cache. */
1690 1024, /* size of l2 cache. */
1691 128, /* size of prefetch block */
1692 8, /* number of parallel prefetches */
1693 1, /* Branch cost */
1694 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1695 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1696 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1697 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1698 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1699 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1701 {{{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1702 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1703 {100000, unrolled_loop}, {-1, libcall}}}},
1704 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1705 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1706 {100000, unrolled_loop}, {-1, libcall}}}}},
1708 {{{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1710 {libcall, {{24, loop}, {64, unrolled_loop},
1711 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1712 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1714 {libcall, {{24, loop}, {64, unrolled_loop},
1715 {8192, rep_prefix_8_byte}, {-1, libcall}}}}},
1716 1, /* scalar_stmt_cost. */
1717 1, /* scalar load_cost. */
1718 1, /* scalar_store_cost. */
1719 1, /* vec_stmt_cost. */
1720 1, /* vec_to_scalar_cost. */
1721 1, /* scalar_to_vec_cost. */
1722 1, /* vec_align_load_cost. */
1723 2, /* vec_unalign_load_cost. */
1724 1, /* vec_store_cost. */
1725 3, /* cond_taken_branch_cost. */
1726 1, /* cond_not_taken_branch_cost. */
1730 struct processor_costs atom_cost = {
1731 COSTS_N_INSNS (1), /* cost of an add instruction */
1732 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1733 COSTS_N_INSNS (1), /* variable shift costs */
1734 COSTS_N_INSNS (1), /* constant shift costs */
1735 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1736 COSTS_N_INSNS (4), /* HI */
1737 COSTS_N_INSNS (3), /* SI */
1738 COSTS_N_INSNS (4), /* DI */
1739 COSTS_N_INSNS (2)}, /* other */
1740 0, /* cost of multiply per each bit set */
1741 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1742 COSTS_N_INSNS (26), /* HI */
1743 COSTS_N_INSNS (42), /* SI */
1744 COSTS_N_INSNS (74), /* DI */
1745 COSTS_N_INSNS (74)}, /* other */
1746 COSTS_N_INSNS (1), /* cost of movsx */
1747 COSTS_N_INSNS (1), /* cost of movzx */
1748 8, /* "large" insn */
1749 17, /* MOVE_RATIO */
1750 4, /* cost for loading QImode using movzbl */
1751 {4, 4, 4}, /* cost of loading integer registers
1752 in QImode, HImode and SImode.
1753 Relative to reg-reg move (2). */
1754 {4, 4, 4}, /* cost of storing integer registers */
1755 4, /* cost of reg,reg fld/fst */
1756 {12, 12, 12}, /* cost of loading fp registers
1757 in SFmode, DFmode and XFmode */
1758 {6, 6, 8}, /* cost of storing fp registers
1759 in SFmode, DFmode and XFmode */
1760 2, /* cost of moving MMX register */
1761 {8, 8}, /* cost of loading MMX registers
1762 in SImode and DImode */
1763 {8, 8}, /* cost of storing MMX registers
1764 in SImode and DImode */
1765 2, /* cost of moving SSE register */
1766 {8, 8, 8}, /* cost of loading SSE registers
1767 in SImode, DImode and TImode */
1768 {8, 8, 8}, /* cost of storing SSE registers
1769 in SImode, DImode and TImode */
1770 5, /* MMX or SSE register to integer */
1771 32, /* size of l1 cache. */
1772 256, /* size of l2 cache. */
1773 64, /* size of prefetch block */
1774 6, /* number of parallel prefetches */
1775 3, /* Branch cost */
1776 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1777 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1778 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1779 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1780 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1781 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1783 /* stringop_algs for memcpy.
1784 SSE loops works best on Atom, but fall back into non-SSE unrolled loop variant
1786 {{{libcall, {{4096, sse_loop}, {4096, unrolled_loop}, {-1, libcall}}}, /* Known alignment. */
1787 {libcall, {{4096, sse_loop}, {4096, unrolled_loop}, {-1, libcall}}}},
1788 {{libcall, {{-1, libcall}}}, /* Unknown alignment. */
1789 {libcall, {{2048, sse_loop}, {2048, unrolled_loop},
1792 /* stringop_algs for memset. */
1793 {{{libcall, {{4096, sse_loop}, {4096, unrolled_loop}, {-1, libcall}}}, /* Known alignment. */
1794 {libcall, {{4096, sse_loop}, {4096, unrolled_loop}, {-1, libcall}}}},
1795 {{libcall, {{1024, sse_loop}, {1024, unrolled_loop}, /* Unknown alignment. */
1797 {libcall, {{2048, sse_loop}, {2048, unrolled_loop},
1799 1, /* scalar_stmt_cost. */
1800 1, /* scalar load_cost. */
1801 1, /* scalar_store_cost. */
1802 1, /* vec_stmt_cost. */
1803 1, /* vec_to_scalar_cost. */
1804 1, /* scalar_to_vec_cost. */
1805 1, /* vec_align_load_cost. */
1806 2, /* vec_unalign_load_cost. */
1807 1, /* vec_store_cost. */
1808 3, /* cond_taken_branch_cost. */
1809 1, /* cond_not_taken_branch_cost. */
1812 /* Core should produce code tuned for core variants. */
1814 struct processor_costs core_cost = {
1815 COSTS_N_INSNS (1), /* cost of an add instruction */
1816 /* On all chips taken into consideration lea is 2 cycles and more. With
1817 this cost however our current implementation of synth_mult results in
1818 use of unnecessary temporary registers causing regression on several
1819 SPECfp benchmarks. */
1820 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1821 COSTS_N_INSNS (1), /* variable shift costs */
1822 COSTS_N_INSNS (1), /* constant shift costs */
1823 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1824 COSTS_N_INSNS (4), /* HI */
1825 COSTS_N_INSNS (3), /* SI */
1826 COSTS_N_INSNS (4), /* DI */
1827 COSTS_N_INSNS (2)}, /* other */
1828 0, /* cost of multiply per each bit set */
1829 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1830 COSTS_N_INSNS (26), /* HI */
1831 COSTS_N_INSNS (42), /* SI */
1832 COSTS_N_INSNS (74), /* DI */
1833 COSTS_N_INSNS (74)}, /* other */
1834 COSTS_N_INSNS (1), /* cost of movsx */
1835 COSTS_N_INSNS (1), /* cost of movzx */
1836 8, /* "large" insn */
1837 17, /* MOVE_RATIO */
1838 4, /* cost for loading QImode using movzbl */
1839 {4, 4, 4}, /* cost of loading integer registers
1840 in QImode, HImode and SImode.
1841 Relative to reg-reg move (2). */
1842 {4, 4, 4}, /* cost of storing integer registers */
1843 4, /* cost of reg,reg fld/fst */
1844 {12, 12, 12}, /* cost of loading fp registers
1845 in SFmode, DFmode and XFmode */
1846 {6, 6, 8}, /* cost of storing fp registers
1847 in SFmode, DFmode and XFmode */
1848 2, /* cost of moving MMX register */
1849 {8, 8}, /* cost of loading MMX registers
1850 in SImode and DImode */
1851 {8, 8}, /* cost of storing MMX registers
1852 in SImode and DImode */
1853 2, /* cost of moving SSE register */
1854 {8, 8, 8}, /* cost of loading SSE registers
1855 in SImode, DImode and TImode */
1856 {8, 8, 8}, /* cost of storing SSE registers
1857 in SImode, DImode and TImode */
1858 5, /* MMX or SSE register to integer */
1859 32, /* size of l1 cache. */
1860 512, /* size of l2 cache. */
1861 64, /* size of prefetch block */
1862 6, /* number of parallel prefetches */
1863 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1864 value is increased to perhaps more appropriate value of 5. */
1865 3, /* Branch cost */
1866 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1867 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1868 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1869 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1870 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1871 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1873 /* stringop_algs for memcpy. */
1874 {{{libcall, {{16, loop}, {24, unrolled_loop}, {1024, rep_prefix_4_byte}, {-1, libcall}}}, /* Known alignment. */
1875 {libcall, {{16, loop}, {24, unrolled_loop}, {1024, rep_prefix_8_byte}, {-1, libcall}}}},
1876 {{libcall, {{16, loop}, {24, unrolled_loop}, {1024, rep_prefix_4_byte}, {-1, libcall}}}, /* Unknown alignment. */
1877 {libcall, {{16, loop}, {24, unrolled_loop}, {1024, rep_prefix_8_byte}, {-1, libcall}}}}},
1879 /* stringop_algs for memset. */
1880 {{{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, /* Known alignment. */
1881 {libcall, {{256, rep_prefix_8_byte}, {-1, libcall}}}},
1882 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, /* Unknown alignment. */
1883 {libcall, {{256, rep_prefix_8_byte}, {-1, libcall}}}}},
1884 1, /* scalar_stmt_cost. */
1885 1, /* scalar load_cost. */
1886 1, /* scalar_store_cost. */
1887 1, /* vec_stmt_cost. */
1888 1, /* vec_to_scalar_cost. */
1889 1, /* scalar_to_vec_cost. */
1890 1, /* vec_align_load_cost. */
1891 2, /* vec_unalign_load_cost. */
1892 1, /* vec_store_cost. */
1893 3, /* cond_taken_branch_cost. */
1894 1, /* cond_not_taken_branch_cost. */
1897 /* Generic64 should produce code tuned for Nocona, Core, K8, Amdfam10 and buldozer. */
1899 struct processor_costs generic64_cost = {
1900 COSTS_N_INSNS (1), /* cost of an add instruction */
1901 /* On all chips taken into consideration lea is 2 cycles and more. With
1902 this cost however our current implementation of synth_mult results in
1903 use of unnecessary temporary registers causing regression on several
1904 SPECfp benchmarks. */
1905 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1906 COSTS_N_INSNS (1), /* variable shift costs */
1907 COSTS_N_INSNS (1), /* constant shift costs */
1908 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1909 COSTS_N_INSNS (4), /* HI */
1910 COSTS_N_INSNS (3), /* SI */
1911 COSTS_N_INSNS (4), /* DI */
1912 COSTS_N_INSNS (2)}, /* other */
1913 0, /* cost of multiply per each bit set */
1914 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1915 COSTS_N_INSNS (26), /* HI */
1916 COSTS_N_INSNS (42), /* SI */
1917 COSTS_N_INSNS (74), /* DI */
1918 COSTS_N_INSNS (74)}, /* other */
1919 COSTS_N_INSNS (1), /* cost of movsx */
1920 COSTS_N_INSNS (1), /* cost of movzx */
1921 8, /* "large" insn */
1922 17, /* MOVE_RATIO */
1923 4, /* cost for loading QImode using movzbl */
1924 {4, 4, 4}, /* cost of loading integer registers
1925 in QImode, HImode and SImode.
1926 Relative to reg-reg move (2). */
1927 {4, 4, 4}, /* cost of storing integer registers */
1928 4, /* cost of reg,reg fld/fst */
1929 {12, 12, 12}, /* cost of loading fp registers
1930 in SFmode, DFmode and XFmode */
1931 {6, 6, 8}, /* cost of storing fp registers
1932 in SFmode, DFmode and XFmode */
1933 2, /* cost of moving MMX register */
1934 {8, 8}, /* cost of loading MMX registers
1935 in SImode and DImode */
1936 {8, 8}, /* cost of storing MMX registers
1937 in SImode and DImode */
1938 2, /* cost of moving SSE register */
1939 {8, 8, 8}, /* cost of loading SSE registers
1940 in SImode, DImode and TImode */
1941 {8, 8, 8}, /* cost of storing SSE registers
1942 in SImode, DImode and TImode */
1943 5, /* MMX or SSE register to integer */
1944 32, /* size of l1 cache. */
1945 512, /* size of l2 cache. */
1946 64, /* size of prefetch block */
1947 6, /* number of parallel prefetches */
1948 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1949 value is increased to perhaps more appropriate value of 5. */
1950 3, /* Branch cost */
1951 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1952 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1953 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1954 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1955 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1956 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1958 {{DUMMY_STRINGOP_ALGS,
1959 {libcall, {{16, rep_prefix_4_byte}, {128, rep_prefix_8_byte}, {4096, rep_prefix_1_byte}, {-1, libcall}}}},
1960 {DUMMY_STRINGOP_ALGS,
1961 {libcall, {{128, rep_prefix_4_byte}, {4096, rep_prefix_1_byte}, {-1, libcall}}}}},
1963 {{DUMMY_STRINGOP_ALGS,
1964 {libcall, {{16, rep_prefix_4_byte}, {512, unrolled_loop}, {4096, rep_prefix_1_byte}, {-1, libcall}}}},
1965 {DUMMY_STRINGOP_ALGS,
1966 {libcall, {{16, rep_prefix_4_byte}, {512, unrolled_loop}, {4096, rep_prefix_1_byte}, {-1, libcall}}}}},
1967 1, /* scalar_stmt_cost. */
1968 1, /* scalar load_cost. */
1969 1, /* scalar_store_cost. */
1970 1, /* vec_stmt_cost. */
1971 1, /* vec_to_scalar_cost. */
1972 1, /* scalar_to_vec_cost. */
1973 1, /* vec_align_load_cost. */
1974 2, /* vec_unalign_load_cost. */
1975 1, /* vec_store_cost. */
1976 3, /* cond_taken_branch_cost. */
1977 1, /* cond_not_taken_branch_cost. */
1980 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona, Core
1981 Athlon, K8, amdfam10, buldozer. */
1983 struct processor_costs generic32_cost = {
1984 COSTS_N_INSNS (1), /* cost of an add instruction */
1985 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1986 COSTS_N_INSNS (1), /* variable shift costs */
1987 COSTS_N_INSNS (1), /* constant shift costs */
1988 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1989 COSTS_N_INSNS (4), /* HI */
1990 COSTS_N_INSNS (3), /* SI */
1991 COSTS_N_INSNS (4), /* DI */
1992 COSTS_N_INSNS (2)}, /* other */
1993 0, /* cost of multiply per each bit set */
1994 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1995 COSTS_N_INSNS (26), /* HI */
1996 COSTS_N_INSNS (42), /* SI */
1997 COSTS_N_INSNS (74), /* DI */
1998 COSTS_N_INSNS (74)}, /* other */
1999 COSTS_N_INSNS (1), /* cost of movsx */
2000 COSTS_N_INSNS (1), /* cost of movzx */
2001 8, /* "large" insn */
2002 17, /* MOVE_RATIO */
2003 4, /* cost for loading QImode using movzbl */
2004 {4, 4, 4}, /* cost of loading integer registers
2005 in QImode, HImode and SImode.
2006 Relative to reg-reg move (2). */
2007 {4, 4, 4}, /* cost of storing integer registers */
2008 4, /* cost of reg,reg fld/fst */
2009 {12, 12, 12}, /* cost of loading fp registers
2010 in SFmode, DFmode and XFmode */
2011 {6, 6, 8}, /* cost of storing fp registers
2012 in SFmode, DFmode and XFmode */
2013 2, /* cost of moving MMX register */
2014 {8, 8}, /* cost of loading MMX registers
2015 in SImode and DImode */
2016 {8, 8}, /* cost of storing MMX registers
2017 in SImode and DImode */
2018 2, /* cost of moving SSE register */
2019 {8, 8, 8}, /* cost of loading SSE registers
2020 in SImode, DImode and TImode */
2021 {8, 8, 8}, /* cost of storing SSE registers
2022 in SImode, DImode and TImode */
2023 5, /* MMX or SSE register to integer */
2024 32, /* size of l1 cache. */
2025 256, /* size of l2 cache. */
2026 64, /* size of prefetch block */
2027 6, /* number of parallel prefetches */
2028 3, /* Branch cost */
2029 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2030 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2031 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2032 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2033 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2034 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2035 /* stringop_algs for memcpy. */
2036 {{{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
2037 DUMMY_STRINGOP_ALGS},
2038 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
2039 DUMMY_STRINGOP_ALGS}},
2040 /* stringop_algs for memset. */
2041 {{{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
2042 DUMMY_STRINGOP_ALGS},
2043 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
2044 DUMMY_STRINGOP_ALGS}},
2045 1, /* scalar_stmt_cost. */
2046 1, /* scalar load_cost. */
2047 1, /* scalar_store_cost. */
2048 1, /* vec_stmt_cost. */
2049 1, /* vec_to_scalar_cost. */
2050 1, /* scalar_to_vec_cost. */
2051 1, /* vec_align_load_cost. */
2052 2, /* vec_unalign_load_cost. */
2053 1, /* vec_store_cost. */
2054 3, /* cond_taken_branch_cost. */
2055 1, /* cond_not_taken_branch_cost. */
2058 const struct processor_costs *ix86_cost = &pentium_cost;
2060 /* Processor feature/optimization bitmasks. */
2061 #define m_386 (1<<PROCESSOR_I386)
2062 #define m_486 (1<<PROCESSOR_I486)
2063 #define m_PENT (1<<PROCESSOR_PENTIUM)
2064 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2065 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2066 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2067 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2068 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
2069 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
2070 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
2071 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
2072 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
2073 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
2074 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
2075 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
2076 #define m_ATOM (1<<PROCESSOR_ATOM)
2078 #define m_GEODE (1<<PROCESSOR_GEODE)
2079 #define m_K6 (1<<PROCESSOR_K6)
2080 #define m_K6_GEODE (m_K6 | m_GEODE)
2081 #define m_K8 (1<<PROCESSOR_K8)
2082 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2083 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2084 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2085 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2086 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2087 #define m_BDVER (m_BDVER1 | m_BDVER2)
2088 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2089 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
2091 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
2092 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
2094 /* Generic instruction choice should be common subset of supported CPUs
2095 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
2096 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
2098 /* Feature tests against the various tunings. */
2099 unsigned char ix86_tune_features[X86_TUNE_LAST];
2101 /* Feature tests against the various tunings used to create ix86_tune_features
2102 based on the processor mask. */
2103 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2104 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
2105 negatively, so enabling for Generic64 seems like good code size
2106 tradeoff. We can't enable it for 32bit generic because it does not
2107 work well with PPro base chips. */
2108 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
2110 /* X86_TUNE_PUSH_MEMORY */
2111 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2113 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
2116 /* X86_TUNE_UNROLL_STRLEN */
2117 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
2119 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
2120 on simulation result. But after P4 was made, no performance benefit
2121 was observed with branch hints. It also increases the code size.
2122 As a result, icc never generates branch hints. */
2125 /* X86_TUNE_DOUBLE_WITH_ADD */
2128 /* X86_TUNE_USE_SAHF */
2129 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
2131 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
2132 partial dependencies. */
2133 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2135 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
2136 register stalls on Generic32 compilation setting as well. However
2137 in current implementation the partial register stalls are not eliminated
2138 very well - they can be introduced via subregs synthesized by combine
2139 and can happen in caller/callee saving sequences. Because this option
2140 pays back little on PPro based chips and is in conflict with partial reg
2141 dependencies used by Athlon/P4 based chips, it is better to leave it off
2142 for generic32 for now. */
2145 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
2146 m_CORE2I7 | m_GENERIC,
2148 /* X86_TUNE_USE_HIMODE_FIOP */
2149 m_386 | m_486 | m_K6_GEODE,
2151 /* X86_TUNE_USE_SIMODE_FIOP */
2152 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
2154 /* X86_TUNE_USE_MOV0 */
2157 /* X86_TUNE_USE_CLTD */
2158 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
2160 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
2163 /* X86_TUNE_SPLIT_LONG_MOVES */
2166 /* X86_TUNE_READ_MODIFY_WRITE */
2169 /* X86_TUNE_READ_MODIFY */
2172 /* X86_TUNE_PROMOTE_QIMODE */
2173 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2175 /* X86_TUNE_FAST_PREFIX */
2176 ~(m_386 | m_486 | m_PENT),
2178 /* X86_TUNE_SINGLE_STRINGOP */
2179 m_386 | m_P4_NOCONA,
2181 /* X86_TUNE_QIMODE_MATH */
2184 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2185 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2186 might be considered for Generic32 if our scheme for avoiding partial
2187 stalls was more effective. */
2190 /* X86_TUNE_PROMOTE_QI_REGS */
2193 /* X86_TUNE_PROMOTE_HI_REGS */
2196 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2197 over esp addition. */
2198 m_386 | m_486 | m_PENT | m_PPRO,
2200 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2201 over esp addition. */
2204 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2205 over esp subtraction. */
2206 m_386 | m_486 | m_PENT | m_K6_GEODE,
2208 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2209 over esp subtraction. */
2210 m_PENT | m_K6_GEODE,
2212 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2213 for DFmode copies */
2214 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2216 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2217 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2219 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2220 conflict here in between PPro/Pentium4 based chips that thread 128bit
2221 SSE registers as single units versus K8 based chips that divide SSE
2222 registers to two 64bit halves. This knob promotes all store destinations
2223 to be 128bit to allow register renaming on 128bit SSE units, but usually
2224 results in one extra microop on 64bit SSE units. Experimental results
2225 shows that disabling this option on P4 brings over 20% SPECfp regression,
2226 while enabling it on K8 brings roughly 2.4% regression that can be partly
2227 masked by careful scheduling of moves. */
2228 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2230 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2231 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
2233 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2236 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2239 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2240 are resolved on SSE register parts instead of whole registers, so we may
2241 maintain just lower part of scalar values in proper format leaving the
2242 upper part undefined. */
2245 /* X86_TUNE_SSE_TYPELESS_STORES */
2248 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2249 m_PPRO | m_P4_NOCONA,
2251 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2252 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2254 /* X86_TUNE_PROLOGUE_USING_MOVE */
2255 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2257 /* X86_TUNE_EPILOGUE_USING_MOVE */
2258 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2260 /* X86_TUNE_SHIFT1 */
2263 /* X86_TUNE_USE_FFREEP */
2266 /* X86_TUNE_INTER_UNIT_MOVES */
2267 ~(m_AMD_MULTIPLE | m_GENERIC),
2269 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2270 ~(m_AMDFAM10 | m_BDVER ),
2272 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2273 than 4 branch instructions in the 16 byte window. */
2274 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2276 /* X86_TUNE_SCHEDULE */
2277 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2279 /* X86_TUNE_USE_BT */
2280 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2282 /* X86_TUNE_USE_INCDEC */
2283 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2285 /* X86_TUNE_PAD_RETURNS */
2286 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2288 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2291 /* X86_TUNE_EXT_80387_CONSTANTS */
2292 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2294 /* X86_TUNE_SHORTEN_X87_SSE */
2297 /* X86_TUNE_AVOID_VECTOR_DECODE */
2298 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2300 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2301 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2304 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2305 vector path on AMD machines. */
2306 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2308 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2310 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2312 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2316 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2317 but one byte longer. */
2320 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2321 operand that cannot be represented using a modRM byte. The XOR
2322 replacement is long decoded, so this split helps here as well. */
2325 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2327 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2329 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2330 from integer to FP. */
2333 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2334 with a subsequent conditional jump instruction into a single
2335 compare-and-branch uop. */
2338 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2339 will impact LEA instruction selection. */
2342 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2346 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2347 at -O3. For the moment, the prefetching seems badly tuned for Intel
2349 m_K6_GEODE | m_AMD_MULTIPLE,
2351 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2352 the auto-vectorizer. */
2355 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2356 during reassociation of integer computation. */
2359 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2360 during reassociation of fp computation. */
2364 /* Feature tests against the various architecture variations. */
2365 unsigned char ix86_arch_features[X86_ARCH_LAST];
2367 /* Feature tests against the various architecture variations, used to create
2368 ix86_arch_features based on the processor mask. */
2369 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2370 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2371 ~(m_386 | m_486 | m_PENT | m_K6),
2373 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2376 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2379 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2382 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2386 static const unsigned int x86_accumulate_outgoing_args
2387 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2389 static const unsigned int x86_arch_always_fancy_math_387
2390 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2392 static const unsigned int x86_avx256_split_unaligned_load
2393 = m_COREI7 | m_GENERIC;
2395 static const unsigned int x86_avx256_split_unaligned_store
2396 = m_COREI7 | m_BDVER | m_GENERIC;
2398 /* In case the average insn count for single function invocation is
2399 lower than this constant, emit fast (but longer) prologue and
2401 #define FAST_PROLOGUE_INSN_COUNT 20
2403 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2404 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2405 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2406 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2408 /* Array of the smallest class containing reg number REGNO, indexed by
2409 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2411 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2413 /* ax, dx, cx, bx */
2414 AREG, DREG, CREG, BREG,
2415 /* si, di, bp, sp */
2416 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2418 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2419 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2422 /* flags, fpsr, fpcr, frame */
2423 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2425 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2428 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2431 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2432 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2433 /* SSE REX registers */
2434 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2438 /* The "default" register map used in 32bit mode. */
2440 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2442 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2443 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2444 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2445 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2446 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2447 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2448 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2451 /* The "default" register map used in 64bit mode. */
2453 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2455 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2456 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2457 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2458 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2459 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2460 8,9,10,11,12,13,14,15, /* extended integer registers */
2461 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2464 /* Define the register numbers to be used in Dwarf debugging information.
2465 The SVR4 reference port C compiler uses the following register numbers
2466 in its Dwarf output code:
2467 0 for %eax (gcc regno = 0)
2468 1 for %ecx (gcc regno = 2)
2469 2 for %edx (gcc regno = 1)
2470 3 for %ebx (gcc regno = 3)
2471 4 for %esp (gcc regno = 7)
2472 5 for %ebp (gcc regno = 6)
2473 6 for %esi (gcc regno = 4)
2474 7 for %edi (gcc regno = 5)
2475 The following three DWARF register numbers are never generated by
2476 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2477 believes these numbers have these meanings.
2478 8 for %eip (no gcc equivalent)
2479 9 for %eflags (gcc regno = 17)
2480 10 for %trapno (no gcc equivalent)
2481 It is not at all clear how we should number the FP stack registers
2482 for the x86 architecture. If the version of SDB on x86/svr4 were
2483 a bit less brain dead with respect to floating-point then we would
2484 have a precedent to follow with respect to DWARF register numbers
2485 for x86 FP registers, but the SDB on x86/svr4 is so completely
2486 broken with respect to FP registers that it is hardly worth thinking
2487 of it as something to strive for compatibility with.
2488 The version of x86/svr4 SDB I have at the moment does (partially)
2489 seem to believe that DWARF register number 11 is associated with
2490 the x86 register %st(0), but that's about all. Higher DWARF
2491 register numbers don't seem to be associated with anything in
2492 particular, and even for DWARF regno 11, SDB only seems to under-
2493 stand that it should say that a variable lives in %st(0) (when
2494 asked via an `=' command) if we said it was in DWARF regno 11,
2495 but SDB still prints garbage when asked for the value of the
2496 variable in question (via a `/' command).
2497 (Also note that the labels SDB prints for various FP stack regs
2498 when doing an `x' command are all wrong.)
2499 Note that these problems generally don't affect the native SVR4
2500 C compiler because it doesn't allow the use of -O with -g and
2501 because when it is *not* optimizing, it allocates a memory
2502 location for each floating-point variable, and the memory
2503 location is what gets described in the DWARF AT_location
2504 attribute for the variable in question.
2505 Regardless of the severe mental illness of the x86/svr4 SDB, we
2506 do something sensible here and we use the following DWARF
2507 register numbers. Note that these are all stack-top-relative
2509 11 for %st(0) (gcc regno = 8)
2510 12 for %st(1) (gcc regno = 9)
2511 13 for %st(2) (gcc regno = 10)
2512 14 for %st(3) (gcc regno = 11)
2513 15 for %st(4) (gcc regno = 12)
2514 16 for %st(5) (gcc regno = 13)
2515 17 for %st(6) (gcc regno = 14)
2516 18 for %st(7) (gcc regno = 15)
2518 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2520 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2521 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2522 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2523 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2524 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2525 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2526 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2529 /* Define parameter passing and return registers. */
2531 static int const x86_64_int_parameter_registers[6] =
2533 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2536 static int const x86_64_ms_abi_int_parameter_registers[4] =
2538 CX_REG, DX_REG, R8_REG, R9_REG
2541 static int const x86_64_int_return_registers[4] =
2543 AX_REG, DX_REG, DI_REG, SI_REG
2546 /* Define the structure for the machine field in struct function. */
2548 struct GTY(()) stack_local_entry {
2549 unsigned short mode;
2552 struct stack_local_entry *next;
2555 /* Structure describing stack frame layout.
2556 Stack grows downward:
2562 saved static chain if ix86_static_chain_on_stack
2564 saved frame pointer if frame_pointer_needed
2565 <- HARD_FRAME_POINTER
2571 <- sse_regs_save_offset
2574 [va_arg registers] |
2578 [padding2] | = to_allocate
2587 int outgoing_arguments_size;
2588 HOST_WIDE_INT frame;
2590 /* The offsets relative to ARG_POINTER. */
2591 HOST_WIDE_INT frame_pointer_offset;
2592 HOST_WIDE_INT hard_frame_pointer_offset;
2593 HOST_WIDE_INT stack_pointer_offset;
2594 HOST_WIDE_INT hfp_save_offset;
2595 HOST_WIDE_INT reg_save_offset;
2596 HOST_WIDE_INT sse_reg_save_offset;
2598 /* When save_regs_using_mov is set, emit prologue using
2599 move instead of push instructions. */
2600 bool save_regs_using_mov;
2603 /* Which cpu are we scheduling for. */
2604 enum attr_cpu ix86_schedule;
2606 /* Which cpu are we optimizing for. */
2607 enum processor_type ix86_tune;
2609 /* Which instruction set architecture to use. */
2610 enum processor_type ix86_arch;
2612 /* true if sse prefetch instruction is not NOOP. */
2613 int x86_prefetch_sse;
2615 /* -mstackrealign option */
2616 static const char ix86_force_align_arg_pointer_string[]
2617 = "force_align_arg_pointer";
2619 static rtx (*ix86_gen_leave) (void);
2620 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2621 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2622 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2623 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2624 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2625 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2626 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2627 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2628 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2630 /* Preferred alignment for stack boundary in bits. */
2631 unsigned int ix86_preferred_stack_boundary;
2633 /* Alignment for incoming stack boundary in bits specified at
2635 static unsigned int ix86_user_incoming_stack_boundary;
2637 /* Default alignment for incoming stack boundary in bits. */
2638 static unsigned int ix86_default_incoming_stack_boundary;
2640 /* Alignment for incoming stack boundary in bits. */
2641 unsigned int ix86_incoming_stack_boundary;
2643 /* Calling abi specific va_list type nodes. */
2644 static GTY(()) tree sysv_va_list_type_node;
2645 static GTY(()) tree ms_va_list_type_node;
2647 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2648 char internal_label_prefix[16];
2649 int internal_label_prefix_len;
2651 /* Fence to use after loop using movnt. */
2654 /* Register class used for passing given 64bit part of the argument.
2655 These represent classes as documented by the PS ABI, with the exception
2656 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2657 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2659 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2660 whenever possible (upper half does contain padding). */
2661 enum x86_64_reg_class
2664 X86_64_INTEGER_CLASS,
2665 X86_64_INTEGERSI_CLASS,
2672 X86_64_COMPLEX_X87_CLASS,
2676 #define MAX_CLASSES 4
2678 /* Table of constants used by fldpi, fldln2, etc.... */
2679 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2680 static bool ext_80387_constants_init = 0;
2683 static struct machine_function * ix86_init_machine_status (void);
2684 static rtx ix86_function_value (const_tree, const_tree, bool);
2685 static bool ix86_function_value_regno_p (const unsigned int);
2686 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2688 static rtx ix86_static_chain (const_tree, bool);
2689 static int ix86_function_regparm (const_tree, const_tree);
2690 static void ix86_compute_frame_layout (struct ix86_frame *);
2691 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2693 static void ix86_add_new_builtins (HOST_WIDE_INT);
2694 static tree ix86_canonical_va_list_type (tree);
2695 static void predict_jump (int);
2696 static unsigned int split_stack_prologue_scratch_regno (void);
2697 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2699 enum ix86_function_specific_strings
2701 IX86_FUNCTION_SPECIFIC_ARCH,
2702 IX86_FUNCTION_SPECIFIC_TUNE,
2703 IX86_FUNCTION_SPECIFIC_MAX
2706 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2707 const char *, enum fpmath_unit, bool);
2708 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2709 static void ix86_function_specific_save (struct cl_target_option *);
2710 static void ix86_function_specific_restore (struct cl_target_option *);
2711 static void ix86_function_specific_print (FILE *, int,
2712 struct cl_target_option *);
2713 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2714 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2715 struct gcc_options *);
2716 static bool ix86_can_inline_p (tree, tree);
2717 static void ix86_set_current_function (tree);
2718 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2720 static enum calling_abi ix86_function_abi (const_tree);
2721 static rtx promote_duplicated_reg (enum machine_mode, rtx);
2722 static rtx promote_duplicated_reg_to_size (rtx, int, int, int);
2725 #ifndef SUBTARGET32_DEFAULT_CPU
2726 #define SUBTARGET32_DEFAULT_CPU "i386"
2729 /* The svr4 ABI for the i386 says that records and unions are returned
2731 #ifndef DEFAULT_PCC_STRUCT_RETURN
2732 #define DEFAULT_PCC_STRUCT_RETURN 1
2735 /* Whether -mtune= or -march= were specified */
2736 static int ix86_tune_defaulted;
2737 static int ix86_arch_specified;
2739 /* Vectorization library interface and handlers. */
2740 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2742 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2743 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2745 /* Processor target table, indexed by processor number */
2748 const struct processor_costs *cost; /* Processor costs */
2749 const int align_loop; /* Default alignments. */
2750 const int align_loop_max_skip;
2751 const int align_jump;
2752 const int align_jump_max_skip;
2753 const int align_func;
2756 static const struct ptt processor_target_table[PROCESSOR_max] =
2758 {&i386_cost, 4, 3, 4, 3, 4},
2759 {&i486_cost, 16, 15, 16, 15, 16},
2760 {&pentium_cost, 16, 7, 16, 7, 16},
2761 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2762 {&geode_cost, 0, 0, 0, 0, 0},
2763 {&k6_cost, 32, 7, 32, 7, 32},
2764 {&athlon_cost, 16, 7, 16, 7, 16},
2765 {&pentium4_cost, 0, 0, 0, 0, 0},
2766 {&k8_cost, 16, 7, 16, 7, 16},
2767 {&nocona_cost, 0, 0, 0, 0, 0},
2768 /* Core 2 32-bit. */
2769 {&core_cost, 16, 10, 16, 10, 16},
2770 /* Core 2 64-bit. */
2771 {&core_cost, 16, 10, 16, 10, 16},
2772 /* Core i7 32-bit. */
2773 {&core_cost, 16, 10, 16, 10, 16},
2774 /* Core i7 64-bit. */
2775 {&core_cost, 16, 10, 16, 10, 16},
2776 {&generic32_cost, 16, 7, 16, 7, 16},
2777 {&generic64_cost, 16, 10, 16, 10, 16},
2778 {&amdfam10_cost, 32, 24, 32, 7, 32},
2779 {&bdver1_cost, 32, 24, 32, 7, 32},
2780 {&bdver2_cost, 32, 24, 32, 7, 32},
2781 {&btver1_cost, 32, 24, 32, 7, 32},
2782 {&atom_cost, 16, 15, 16, 7, 16}
2785 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2815 /* Return true if a red-zone is in use. */
2818 ix86_using_red_zone (void)
2820 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2823 /* Return a string that documents the current -m options. The caller is
2824 responsible for freeing the string. */
2827 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2828 const char *tune, enum fpmath_unit fpmath,
2831 struct ix86_target_opts
2833 const char *option; /* option string */
2834 HOST_WIDE_INT mask; /* isa mask options */
2837 /* This table is ordered so that options like -msse4.2 that imply
2838 preceding options while match those first. */
2839 static struct ix86_target_opts isa_opts[] =
2841 { "-m64", OPTION_MASK_ISA_64BIT },
2842 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2843 { "-mfma", OPTION_MASK_ISA_FMA },
2844 { "-mxop", OPTION_MASK_ISA_XOP },
2845 { "-mlwp", OPTION_MASK_ISA_LWP },
2846 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2847 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2848 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2849 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2850 { "-msse3", OPTION_MASK_ISA_SSE3 },
2851 { "-msse2", OPTION_MASK_ISA_SSE2 },
2852 { "-msse", OPTION_MASK_ISA_SSE },
2853 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2854 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2855 { "-mmmx", OPTION_MASK_ISA_MMX },
2856 { "-mabm", OPTION_MASK_ISA_ABM },
2857 { "-mbmi", OPTION_MASK_ISA_BMI },
2858 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2859 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2860 { "-mtbm", OPTION_MASK_ISA_TBM },
2861 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2862 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2863 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2864 { "-maes", OPTION_MASK_ISA_AES },
2865 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2866 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2867 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2868 { "-mf16c", OPTION_MASK_ISA_F16C },
2872 static struct ix86_target_opts flag_opts[] =
2874 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2875 { "-m80387", MASK_80387 },
2876 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2877 { "-malign-double", MASK_ALIGN_DOUBLE },
2878 { "-mcld", MASK_CLD },
2879 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2880 { "-mieee-fp", MASK_IEEE_FP },
2881 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2882 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2883 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2884 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2885 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2886 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2887 { "-mno-red-zone", MASK_NO_RED_ZONE },
2888 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2889 { "-mrecip", MASK_RECIP },
2890 { "-mrtd", MASK_RTD },
2891 { "-msseregparm", MASK_SSEREGPARM },
2892 { "-mstack-arg-probe", MASK_STACK_PROBE },
2893 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2894 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2895 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2896 { "-mvzeroupper", MASK_VZEROUPPER },
2897 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2898 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2899 { "-mprefer-avx128", MASK_PREFER_AVX128},
2902 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2905 char target_other[40];
2914 memset (opts, '\0', sizeof (opts));
2916 /* Add -march= option. */
2919 opts[num][0] = "-march=";
2920 opts[num++][1] = arch;
2923 /* Add -mtune= option. */
2926 opts[num][0] = "-mtune=";
2927 opts[num++][1] = tune;
2930 /* Pick out the options in isa options. */
2931 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2933 if ((isa & isa_opts[i].mask) != 0)
2935 opts[num++][0] = isa_opts[i].option;
2936 isa &= ~ isa_opts[i].mask;
2940 if (isa && add_nl_p)
2942 opts[num++][0] = isa_other;
2943 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2947 /* Add flag options. */
2948 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2950 if ((flags & flag_opts[i].mask) != 0)
2952 opts[num++][0] = flag_opts[i].option;
2953 flags &= ~ flag_opts[i].mask;
2957 if (flags && add_nl_p)
2959 opts[num++][0] = target_other;
2960 sprintf (target_other, "(other flags: %#x)", flags);
2963 /* Add -fpmath= option. */
2966 opts[num][0] = "-mfpmath=";
2967 switch ((int) fpmath)
2970 opts[num++][1] = "387";
2974 opts[num++][1] = "sse";
2977 case FPMATH_387 | FPMATH_SSE:
2978 opts[num++][1] = "sse+387";
2990 gcc_assert (num < ARRAY_SIZE (opts));
2992 /* Size the string. */
2994 sep_len = (add_nl_p) ? 3 : 1;
2995 for (i = 0; i < num; i++)
2998 for (j = 0; j < 2; j++)
3000 len += strlen (opts[i][j]);
3003 /* Build the string. */
3004 ret = ptr = (char *) xmalloc (len);
3007 for (i = 0; i < num; i++)
3011 for (j = 0; j < 2; j++)
3012 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3019 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3027 for (j = 0; j < 2; j++)
3030 memcpy (ptr, opts[i][j], len2[j]);
3032 line_len += len2[j];
3037 gcc_assert (ret + len >= ptr);
3042 /* Return true, if profiling code should be emitted before
3043 prologue. Otherwise it returns false.
3044 Note: For x86 with "hotfix" it is sorried. */
3046 ix86_profile_before_prologue (void)
3048 return flag_fentry != 0;
3051 /* Function that is callable from the debugger to print the current
3054 ix86_debug_options (void)
3056 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3057 ix86_arch_string, ix86_tune_string,
3062 fprintf (stderr, "%s\n\n", opts);
3066 fputs ("<no options>\n\n", stderr);
3071 /* Override various settings based on options. If MAIN_ARGS_P, the
3072 options are from the command line, otherwise they are from
3076 ix86_option_override_internal (bool main_args_p)
3079 unsigned int ix86_arch_mask, ix86_tune_mask;
3080 const bool ix86_tune_specified = (ix86_tune_string != NULL);
3085 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3086 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3087 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3088 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3089 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3090 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3091 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3092 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3093 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3094 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3095 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3096 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3097 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3098 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3099 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3100 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3101 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3102 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3103 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3104 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3105 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3106 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3107 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3108 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3109 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3110 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3111 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3112 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3113 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3114 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3115 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3116 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3117 /* if this reaches 64, need to widen struct pta flags below */
3121 const char *const name; /* processor name or nickname. */
3122 const enum processor_type processor;
3123 const enum attr_cpu schedule;
3124 const unsigned HOST_WIDE_INT flags;
3126 const processor_alias_table[] =
3128 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3129 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3130 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3131 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3132 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3133 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3134 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3135 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3136 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
3137 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3138 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3139 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
3140 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3142 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3144 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3145 PTA_MMX | PTA_SSE | PTA_SSE2},
3146 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3147 PTA_MMX |PTA_SSE | PTA_SSE2},
3148 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3149 PTA_MMX | PTA_SSE | PTA_SSE2},
3150 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3151 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
3152 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3153 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3154 | PTA_CX16 | PTA_NO_SAHF},
3155 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
3156 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3157 | PTA_SSSE3 | PTA_CX16},
3158 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
3159 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3160 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
3161 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
3162 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3163 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
3164 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
3165 {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
3166 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3167 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
3168 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
3169 | PTA_RDRND | PTA_F16C},
3170 {"core-avx2", PROCESSOR_COREI7_64, CPU_COREI7,
3171 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3172 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
3173 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
3174 | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
3175 | PTA_FMA | PTA_MOVBE},
3176 {"atom", PROCESSOR_ATOM, CPU_ATOM,
3177 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3178 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
3179 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3180 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
3181 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3182 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3183 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3184 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3185 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3186 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3187 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3188 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3189 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3190 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3191 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3192 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3193 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3194 {"x86-64", PROCESSOR_K8, CPU_K8,
3195 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3196 {"k8", PROCESSOR_K8, CPU_K8,
3197 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3198 | PTA_SSE2 | PTA_NO_SAHF},
3199 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3200 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3201 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3202 {"opteron", PROCESSOR_K8, CPU_K8,
3203 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3204 | PTA_SSE2 | PTA_NO_SAHF},
3205 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3206 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3207 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3208 {"athlon64", PROCESSOR_K8, CPU_K8,
3209 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3210 | PTA_SSE2 | PTA_NO_SAHF},
3211 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3212 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3213 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3214 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3215 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3216 | PTA_SSE2 | PTA_NO_SAHF},
3217 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3218 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3219 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3220 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3221 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3222 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3223 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3224 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3225 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3226 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3227 | PTA_XOP | PTA_LWP},
3228 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3229 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3230 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3231 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3232 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3234 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3235 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3236 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3237 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3238 0 /* flags are only used for -march switch. */ },
3239 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3240 PTA_64BIT /* flags are only used for -march switch. */ },
3243 /* -mrecip options. */
3246 const char *string; /* option name */
3247 unsigned int mask; /* mask bits to set */
3249 const recip_options[] =
3251 { "all", RECIP_MASK_ALL },
3252 { "none", RECIP_MASK_NONE },
3253 { "div", RECIP_MASK_DIV },
3254 { "sqrt", RECIP_MASK_SQRT },
3255 { "vec-div", RECIP_MASK_VEC_DIV },
3256 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3259 int const pta_size = ARRAY_SIZE (processor_alias_table);
3261 /* Set up prefix/suffix so the error messages refer to either the command
3262 line argument, or the attribute(target). */
3271 prefix = "option(\"";
3276 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3277 SUBTARGET_OVERRIDE_OPTIONS;
3280 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3281 SUBSUBTARGET_OVERRIDE_OPTIONS;
3285 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3287 /* -fPIC is the default for x86_64. */
3288 if (TARGET_MACHO && TARGET_64BIT)
3291 /* Need to check -mtune=generic first. */
3292 if (ix86_tune_string)
3294 if (!strcmp (ix86_tune_string, "generic")
3295 || !strcmp (ix86_tune_string, "i686")
3296 /* As special support for cross compilers we read -mtune=native
3297 as -mtune=generic. With native compilers we won't see the
3298 -mtune=native, as it was changed by the driver. */
3299 || !strcmp (ix86_tune_string, "native"))
3302 ix86_tune_string = "generic64";
3304 ix86_tune_string = "generic32";
3306 /* If this call is for setting the option attribute, allow the
3307 generic32/generic64 that was previously set. */
3308 else if (!main_args_p
3309 && (!strcmp (ix86_tune_string, "generic32")
3310 || !strcmp (ix86_tune_string, "generic64")))
3312 else if (!strncmp (ix86_tune_string, "generic", 7))
3313 error ("bad value (%s) for %stune=%s %s",
3314 ix86_tune_string, prefix, suffix, sw);
3315 else if (!strcmp (ix86_tune_string, "x86-64"))
3316 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3317 "%stune=k8%s or %stune=generic%s instead as appropriate",
3318 prefix, suffix, prefix, suffix, prefix, suffix);
3322 if (ix86_arch_string)
3323 ix86_tune_string = ix86_arch_string;
3324 if (!ix86_tune_string)
3326 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3327 ix86_tune_defaulted = 1;
3330 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3331 need to use a sensible tune option. */
3332 if (!strcmp (ix86_tune_string, "generic")
3333 || !strcmp (ix86_tune_string, "x86-64")
3334 || !strcmp (ix86_tune_string, "i686"))
3337 ix86_tune_string = "generic64";
3339 ix86_tune_string = "generic32";
3343 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3345 /* rep; movq isn't available in 32-bit code. */
3346 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3347 ix86_stringop_alg = no_stringop;
3350 if (!ix86_arch_string)
3351 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3353 ix86_arch_specified = 1;
3355 if (!global_options_set.x_ix86_abi)
3356 ix86_abi = DEFAULT_ABI;
3358 if (global_options_set.x_ix86_cmodel)
3360 switch (ix86_cmodel)
3365 ix86_cmodel = CM_SMALL_PIC;
3367 error ("code model %qs not supported in the %s bit mode",
3374 ix86_cmodel = CM_MEDIUM_PIC;
3376 error ("code model %qs not supported in the %s bit mode",
3378 else if (TARGET_X32)
3379 error ("code model %qs not supported in x32 mode",
3386 ix86_cmodel = CM_LARGE_PIC;
3388 error ("code model %qs not supported in the %s bit mode",
3390 else if (TARGET_X32)
3391 error ("code model %qs not supported in x32 mode",
3397 error ("code model %s does not support PIC mode", "32");
3399 error ("code model %qs not supported in the %s bit mode",
3406 error ("code model %s does not support PIC mode", "kernel");
3407 ix86_cmodel = CM_32;
3410 error ("code model %qs not supported in the %s bit mode",
3420 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3421 use of rip-relative addressing. This eliminates fixups that
3422 would otherwise be needed if this object is to be placed in a
3423 DLL, and is essentially just as efficient as direct addressing. */
3424 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3425 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3426 else if (TARGET_64BIT)
3427 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3429 ix86_cmodel = CM_32;
3431 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3433 error ("-masm=intel not supported in this configuration");
3434 ix86_asm_dialect = ASM_ATT;
3436 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3437 sorry ("%i-bit mode not compiled in",
3438 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3440 for (i = 0; i < pta_size; i++)
3441 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3443 ix86_schedule = processor_alias_table[i].schedule;
3444 ix86_arch = processor_alias_table[i].processor;
3445 /* Default cpu tuning to the architecture. */
3446 ix86_tune = ix86_arch;
3448 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3449 error ("CPU you selected does not support x86-64 "
3452 if (processor_alias_table[i].flags & PTA_MMX
3453 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3454 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3455 if (processor_alias_table[i].flags & PTA_3DNOW
3456 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3457 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3458 if (processor_alias_table[i].flags & PTA_3DNOW_A
3459 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3460 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3461 if (processor_alias_table[i].flags & PTA_SSE
3462 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3463 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3464 if (processor_alias_table[i].flags & PTA_SSE2
3465 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3466 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3467 if (processor_alias_table[i].flags & PTA_SSE3
3468 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3469 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3470 if (processor_alias_table[i].flags & PTA_SSSE3
3471 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3472 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3473 if (processor_alias_table[i].flags & PTA_SSE4_1
3474 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3475 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3476 if (processor_alias_table[i].flags & PTA_SSE4_2
3477 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3478 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3479 if (processor_alias_table[i].flags & PTA_AVX
3480 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3481 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3482 if (processor_alias_table[i].flags & PTA_AVX2
3483 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3484 ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3485 if (processor_alias_table[i].flags & PTA_FMA
3486 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3487 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3488 if (processor_alias_table[i].flags & PTA_SSE4A
3489 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3490 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3491 if (processor_alias_table[i].flags & PTA_FMA4
3492 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3493 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3494 if (processor_alias_table[i].flags & PTA_XOP
3495 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3496 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3497 if (processor_alias_table[i].flags & PTA_LWP
3498 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3499 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3500 if (processor_alias_table[i].flags & PTA_ABM
3501 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3502 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3503 if (processor_alias_table[i].flags & PTA_BMI
3504 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3505 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3506 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3507 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3508 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3509 if (processor_alias_table[i].flags & PTA_TBM
3510 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3511 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3512 if (processor_alias_table[i].flags & PTA_BMI2
3513 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3514 ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3515 if (processor_alias_table[i].flags & PTA_CX16
3516 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3517 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3518 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3519 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3520 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3521 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3522 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3523 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3524 if (processor_alias_table[i].flags & PTA_MOVBE
3525 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3526 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3527 if (processor_alias_table[i].flags & PTA_AES
3528 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3529 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3530 if (processor_alias_table[i].flags & PTA_PCLMUL
3531 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3532 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3533 if (processor_alias_table[i].flags & PTA_FSGSBASE
3534 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3535 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3536 if (processor_alias_table[i].flags & PTA_RDRND
3537 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3538 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3539 if (processor_alias_table[i].flags & PTA_F16C
3540 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3541 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3542 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3543 x86_prefetch_sse = true;
3548 if (!strcmp (ix86_arch_string, "generic"))
3549 error ("generic CPU can be used only for %stune=%s %s",
3550 prefix, suffix, sw);
3551 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3552 error ("bad value (%s) for %sarch=%s %s",
3553 ix86_arch_string, prefix, suffix, sw);
3555 ix86_arch_mask = 1u << ix86_arch;
3556 for (i = 0; i < X86_ARCH_LAST; ++i)
3557 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3559 for (i = 0; i < pta_size; i++)
3560 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3562 ix86_schedule = processor_alias_table[i].schedule;
3563 ix86_tune = processor_alias_table[i].processor;
3566 if (!(processor_alias_table[i].flags & PTA_64BIT))
3568 if (ix86_tune_defaulted)
3570 ix86_tune_string = "x86-64";
3571 for (i = 0; i < pta_size; i++)
3572 if (! strcmp (ix86_tune_string,
3573 processor_alias_table[i].name))
3575 ix86_schedule = processor_alias_table[i].schedule;
3576 ix86_tune = processor_alias_table[i].processor;
3579 error ("CPU you selected does not support x86-64 "
3585 /* Adjust tuning when compiling for 32-bit ABI. */
3588 case PROCESSOR_GENERIC64:
3589 ix86_tune = PROCESSOR_GENERIC32;
3590 ix86_schedule = CPU_PENTIUMPRO;
3593 case PROCESSOR_CORE2_64:
3594 ix86_tune = PROCESSOR_CORE2_32;
3597 case PROCESSOR_COREI7_64:
3598 ix86_tune = PROCESSOR_COREI7_32;
3605 /* Intel CPUs have always interpreted SSE prefetch instructions as
3606 NOPs; so, we can enable SSE prefetch instructions even when
3607 -mtune (rather than -march) points us to a processor that has them.
3608 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3609 higher processors. */
3611 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3612 x86_prefetch_sse = true;
3616 if (ix86_tune_specified && i == pta_size)
3617 error ("bad value (%s) for %stune=%s %s",
3618 ix86_tune_string, prefix, suffix, sw);
3620 ix86_tune_mask = 1u << ix86_tune;
3621 for (i = 0; i < X86_TUNE_LAST; ++i)
3622 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3624 #ifndef USE_IX86_FRAME_POINTER
3625 #define USE_IX86_FRAME_POINTER 0
3628 #ifndef USE_X86_64_FRAME_POINTER
3629 #define USE_X86_64_FRAME_POINTER 0
3632 /* Set the default values for switches whose default depends on TARGET_64BIT
3633 in case they weren't overwritten by command line options. */
3636 if (optimize > 1 && !global_options_set.x_flag_zee)
3638 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3639 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3640 if (flag_asynchronous_unwind_tables == 2)
3641 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3642 if (flag_pcc_struct_return == 2)
3643 flag_pcc_struct_return = 0;
3647 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3648 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3649 if (flag_asynchronous_unwind_tables == 2)
3650 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3651 if (flag_pcc_struct_return == 2)
3652 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3656 ix86_cost = &ix86_size_cost;
3658 ix86_cost = processor_target_table[ix86_tune].cost;
3660 /* Arrange to set up i386_stack_locals for all functions. */
3661 init_machine_status = ix86_init_machine_status;
3663 /* Validate -mregparm= value. */
3664 if (global_options_set.x_ix86_regparm)
3667 warning (0, "-mregparm is ignored in 64-bit mode");
3668 if (ix86_regparm > REGPARM_MAX)
3670 error ("-mregparm=%d is not between 0 and %d",
3671 ix86_regparm, REGPARM_MAX);
3676 ix86_regparm = REGPARM_MAX;
3678 /* Default align_* from the processor table. */
3679 if (align_loops == 0)
3681 align_loops = processor_target_table[ix86_tune].align_loop;
3682 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3684 if (align_jumps == 0)
3686 align_jumps = processor_target_table[ix86_tune].align_jump;
3687 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3689 if (align_functions == 0)
3691 align_functions = processor_target_table[ix86_tune].align_func;
3694 /* Provide default for -mbranch-cost= value. */
3695 if (!global_options_set.x_ix86_branch_cost)
3696 ix86_branch_cost = ix86_cost->branch_cost;
3700 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3702 /* Enable by default the SSE and MMX builtins. Do allow the user to
3703 explicitly disable any of these. In particular, disabling SSE and
3704 MMX for kernel code is extremely useful. */
3705 if (!ix86_arch_specified)
3707 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3708 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3711 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3715 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3717 if (!ix86_arch_specified)
3719 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3721 /* i386 ABI does not specify red zone. It still makes sense to use it
3722 when programmer takes care to stack from being destroyed. */
3723 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3724 target_flags |= MASK_NO_RED_ZONE;
3727 /* Keep nonleaf frame pointers. */
3728 if (flag_omit_frame_pointer)
3729 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3730 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3731 flag_omit_frame_pointer = 1;
3733 /* If we're doing fast math, we don't care about comparison order
3734 wrt NaNs. This lets us use a shorter comparison sequence. */
3735 if (flag_finite_math_only)
3736 target_flags &= ~MASK_IEEE_FP;
3738 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3739 since the insns won't need emulation. */
3740 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3741 target_flags &= ~MASK_NO_FANCY_MATH_387;
3743 /* Likewise, if the target doesn't have a 387, or we've specified
3744 software floating point, don't use 387 inline intrinsics. */
3746 target_flags |= MASK_NO_FANCY_MATH_387;
3748 /* Turn on MMX builtins for -msse. */
3751 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3752 x86_prefetch_sse = true;
3755 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3756 if (TARGET_SSE4_2 || TARGET_ABM)
3757 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3759 /* Turn on lzcnt instruction for -mabm. */
3761 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
3763 /* Validate -mpreferred-stack-boundary= value or default it to
3764 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3765 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3766 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3768 int min = (TARGET_64BIT ? 4 : 2);
3769 int max = (TARGET_SEH ? 4 : 12);
3771 if (ix86_preferred_stack_boundary_arg < min
3772 || ix86_preferred_stack_boundary_arg > max)
3775 error ("-mpreferred-stack-boundary is not supported "
3778 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3779 ix86_preferred_stack_boundary_arg, min, max);
3782 ix86_preferred_stack_boundary
3783 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3786 /* Set the default value for -mstackrealign. */
3787 if (ix86_force_align_arg_pointer == -1)
3788 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3790 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3792 /* Validate -mincoming-stack-boundary= value or default it to
3793 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3794 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3795 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3797 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3798 || ix86_incoming_stack_boundary_arg > 12)
3799 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3800 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3803 ix86_user_incoming_stack_boundary
3804 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3805 ix86_incoming_stack_boundary
3806 = ix86_user_incoming_stack_boundary;
3810 /* Accept -msseregparm only if at least SSE support is enabled. */
3811 if (TARGET_SSEREGPARM
3813 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3815 if (global_options_set.x_ix86_fpmath)
3817 if (ix86_fpmath & FPMATH_SSE)
3821 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3822 ix86_fpmath = FPMATH_387;
3824 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3826 warning (0, "387 instruction set disabled, using SSE arithmetics");
3827 ix86_fpmath = FPMATH_SSE;
3832 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3834 /* If the i387 is disabled, then do not return values in it. */
3836 target_flags &= ~MASK_FLOAT_RETURNS;
3838 /* Use external vectorized library in vectorizing intrinsics. */
3839 if (global_options_set.x_ix86_veclibabi_type)
3840 switch (ix86_veclibabi_type)
3842 case ix86_veclibabi_type_svml:
3843 ix86_veclib_handler = ix86_veclibabi_svml;
3846 case ix86_veclibabi_type_acml:
3847 ix86_veclib_handler = ix86_veclibabi_acml;
3854 if ((!USE_IX86_FRAME_POINTER
3855 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3856 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3858 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3860 /* ??? Unwind info is not correct around the CFG unless either a frame
3861 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3862 unwind info generation to be aware of the CFG and propagating states
3864 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3865 || flag_exceptions || flag_non_call_exceptions)
3866 && flag_omit_frame_pointer
3867 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3869 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3870 warning (0, "unwind tables currently require either a frame pointer "
3871 "or %saccumulate-outgoing-args%s for correctness",
3873 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3876 /* If stack probes are required, the space used for large function
3877 arguments on the stack must also be probed, so enable
3878 -maccumulate-outgoing-args so this happens in the prologue. */
3879 if (TARGET_STACK_PROBE
3880 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3882 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3883 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3884 "for correctness", prefix, suffix);
3885 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3888 /* For sane SSE instruction set generation we need fcomi instruction.
3889 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3890 expands to a sequence that includes conditional move. */
3891 if (TARGET_SSE || TARGET_RDRND)
3894 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3897 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3898 p = strchr (internal_label_prefix, 'X');
3899 internal_label_prefix_len = p - internal_label_prefix;
3903 /* When scheduling description is not available, disable scheduler pass
3904 so it won't slow down the compilation and make x87 code slower. */
3905 if (!TARGET_SCHEDULE)
3906 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3908 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3909 ix86_cost->simultaneous_prefetches,
3910 global_options.x_param_values,
3911 global_options_set.x_param_values);
3912 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3913 global_options.x_param_values,
3914 global_options_set.x_param_values);
3915 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3916 global_options.x_param_values,
3917 global_options_set.x_param_values);
3918 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3919 global_options.x_param_values,
3920 global_options_set.x_param_values);
3922 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3923 if (flag_prefetch_loop_arrays < 0
3926 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3927 flag_prefetch_loop_arrays = 1;
3929 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3930 can be optimized to ap = __builtin_next_arg (0). */
3931 if (!TARGET_64BIT && !flag_split_stack)
3932 targetm.expand_builtin_va_start = NULL;
3936 ix86_gen_leave = gen_leave_rex64;
3937 ix86_gen_add3 = gen_adddi3;
3938 ix86_gen_sub3 = gen_subdi3;
3939 ix86_gen_sub3_carry = gen_subdi3_carry;
3940 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3941 ix86_gen_monitor = gen_sse3_monitor64;
3942 ix86_gen_andsp = gen_anddi3;
3943 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3944 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3945 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3949 ix86_gen_leave = gen_leave;
3950 ix86_gen_add3 = gen_addsi3;
3951 ix86_gen_sub3 = gen_subsi3;
3952 ix86_gen_sub3_carry = gen_subsi3_carry;
3953 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3954 ix86_gen_monitor = gen_sse3_monitor;
3955 ix86_gen_andsp = gen_andsi3;
3956 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3957 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3958 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3962 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3964 target_flags |= MASK_CLD & ~target_flags_explicit;
3967 if (!TARGET_64BIT && flag_pic)
3969 if (flag_fentry > 0)
3970 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3974 else if (TARGET_SEH)
3976 if (flag_fentry == 0)
3977 sorry ("-mno-fentry isn%'t compatible with SEH");
3980 else if (flag_fentry < 0)
3982 #if defined(PROFILE_BEFORE_PROLOGUE)
3991 /* When not optimize for size, enable vzeroupper optimization for
3992 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3993 AVX unaligned load/store. */
3996 if (flag_expensive_optimizations
3997 && !(target_flags_explicit & MASK_VZEROUPPER))
3998 target_flags |= MASK_VZEROUPPER;
3999 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
4000 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4001 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4002 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
4003 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4004 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4005 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
4006 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
4007 target_flags |= MASK_PREFER_AVX128;
4012 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4013 target_flags &= ~MASK_VZEROUPPER;
4016 if (ix86_recip_name)
4018 char *p = ASTRDUP (ix86_recip_name);
4020 unsigned int mask, i;
4023 while ((q = strtok (p, ",")) != NULL)
4034 if (!strcmp (q, "default"))
4035 mask = RECIP_MASK_ALL;
4038 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4039 if (!strcmp (q, recip_options[i].string))
4041 mask = recip_options[i].mask;
4045 if (i == ARRAY_SIZE (recip_options))
4047 error ("unknown option for -mrecip=%s", q);
4049 mask = RECIP_MASK_NONE;
4053 recip_mask_explicit |= mask;
4055 recip_mask &= ~mask;
4062 recip_mask |= RECIP_MASK_ALL & ~recip_mask_explicit;
4063 else if (target_flags_explicit & MASK_RECIP)
4064 recip_mask &= ~(RECIP_MASK_ALL & ~recip_mask_explicit);
4066 /* Save the initial options in case the user does function specific
4069 target_option_default_node = target_option_current_node
4070 = build_target_option_node ();
4073 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4076 function_pass_avx256_p (const_rtx val)
4081 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
4084 if (GET_CODE (val) == PARALLEL)
4089 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
4091 r = XVECEXP (val, 0, i);
4092 if (GET_CODE (r) == EXPR_LIST
4094 && REG_P (XEXP (r, 0))
4095 && (GET_MODE (XEXP (r, 0)) == OImode
4096 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
4104 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4107 ix86_option_override (void)
4109 ix86_option_override_internal (true);
4112 /* Update register usage after having seen the compiler flags. */
4115 ix86_conditional_register_usage (void)
4120 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4122 if (fixed_regs[i] > 1)
4123 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
4124 if (call_used_regs[i] > 1)
4125 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
4128 /* The PIC register, if it exists, is fixed. */
4129 j = PIC_OFFSET_TABLE_REGNUM;
4130 if (j != INVALID_REGNUM)
4131 fixed_regs[j] = call_used_regs[j] = 1;
4133 /* The 64-bit MS_ABI changes the set of call-used registers. */
4134 if (TARGET_64BIT_MS_ABI)
4136 call_used_regs[SI_REG] = 0;
4137 call_used_regs[DI_REG] = 0;
4138 call_used_regs[XMM6_REG] = 0;
4139 call_used_regs[XMM7_REG] = 0;
4140 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4141 call_used_regs[i] = 0;
4144 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4145 other call-clobbered regs for 64-bit. */
4148 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4150 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4151 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4152 && call_used_regs[i])
4153 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4156 /* If MMX is disabled, squash the registers. */
4158 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4159 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4160 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4162 /* If SSE is disabled, squash the registers. */
4164 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4165 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4166 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4168 /* If the FPU is disabled, squash the registers. */
4169 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4170 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4171 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4172 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4174 /* If 32-bit, squash the 64-bit registers. */
4177 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4179 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4185 /* Save the current options */
4188 ix86_function_specific_save (struct cl_target_option *ptr)
4190 ptr->arch = ix86_arch;
4191 ptr->schedule = ix86_schedule;
4192 ptr->tune = ix86_tune;
4193 ptr->branch_cost = ix86_branch_cost;
4194 ptr->tune_defaulted = ix86_tune_defaulted;
4195 ptr->arch_specified = ix86_arch_specified;
4196 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4197 ptr->ix86_target_flags_explicit = target_flags_explicit;
4198 ptr->x_recip_mask_explicit = recip_mask_explicit;
4200 /* The fields are char but the variables are not; make sure the
4201 values fit in the fields. */
4202 gcc_assert (ptr->arch == ix86_arch);
4203 gcc_assert (ptr->schedule == ix86_schedule);
4204 gcc_assert (ptr->tune == ix86_tune);
4205 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4208 /* Restore the current options */
4211 ix86_function_specific_restore (struct cl_target_option *ptr)
4213 enum processor_type old_tune = ix86_tune;
4214 enum processor_type old_arch = ix86_arch;
4215 unsigned int ix86_arch_mask, ix86_tune_mask;
4218 ix86_arch = (enum processor_type) ptr->arch;
4219 ix86_schedule = (enum attr_cpu) ptr->schedule;
4220 ix86_tune = (enum processor_type) ptr->tune;
4221 ix86_branch_cost = ptr->branch_cost;
4222 ix86_tune_defaulted = ptr->tune_defaulted;
4223 ix86_arch_specified = ptr->arch_specified;
4224 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4225 target_flags_explicit = ptr->ix86_target_flags_explicit;
4226 recip_mask_explicit = ptr->x_recip_mask_explicit;
4228 /* Recreate the arch feature tests if the arch changed */
4229 if (old_arch != ix86_arch)
4231 ix86_arch_mask = 1u << ix86_arch;
4232 for (i = 0; i < X86_ARCH_LAST; ++i)
4233 ix86_arch_features[i]
4234 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4237 /* Recreate the tune optimization tests */
4238 if (old_tune != ix86_tune)
4240 ix86_tune_mask = 1u << ix86_tune;
4241 for (i = 0; i < X86_TUNE_LAST; ++i)
4242 ix86_tune_features[i]
4243 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4247 /* Print the current options */
4250 ix86_function_specific_print (FILE *file, int indent,
4251 struct cl_target_option *ptr)
4254 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4255 NULL, NULL, ptr->x_ix86_fpmath, false);
4257 fprintf (file, "%*sarch = %d (%s)\n",
4260 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4261 ? cpu_names[ptr->arch]
4264 fprintf (file, "%*stune = %d (%s)\n",
4267 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4268 ? cpu_names[ptr->tune]
4271 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4275 fprintf (file, "%*s%s\n", indent, "", target_string);
4276 free (target_string);
4281 /* Inner function to process the attribute((target(...))), take an argument and
4282 set the current options from the argument. If we have a list, recursively go
4286 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4287 struct gcc_options *enum_opts_set)
4292 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4293 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4294 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4295 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4296 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4312 enum ix86_opt_type type;
4317 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4318 IX86_ATTR_ISA ("abm", OPT_mabm),
4319 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4320 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4321 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4322 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4323 IX86_ATTR_ISA ("aes", OPT_maes),
4324 IX86_ATTR_ISA ("avx", OPT_mavx),
4325 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4326 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4327 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4328 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4329 IX86_ATTR_ISA ("sse", OPT_msse),
4330 IX86_ATTR_ISA ("sse2", OPT_msse2),
4331 IX86_ATTR_ISA ("sse3", OPT_msse3),
4332 IX86_ATTR_ISA ("sse4", OPT_msse4),
4333 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4334 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4335 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4336 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4337 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4338 IX86_ATTR_ISA ("fma", OPT_mfma),
4339 IX86_ATTR_ISA ("xop", OPT_mxop),
4340 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4341 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4342 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4343 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4346 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4348 /* string options */
4349 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4350 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4353 IX86_ATTR_YES ("cld",
4357 IX86_ATTR_NO ("fancy-math-387",
4358 OPT_mfancy_math_387,
4359 MASK_NO_FANCY_MATH_387),
4361 IX86_ATTR_YES ("ieee-fp",
4365 IX86_ATTR_YES ("inline-all-stringops",
4366 OPT_minline_all_stringops,
4367 MASK_INLINE_ALL_STRINGOPS),
4369 IX86_ATTR_YES ("inline-stringops-dynamically",
4370 OPT_minline_stringops_dynamically,
4371 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4373 IX86_ATTR_NO ("align-stringops",
4374 OPT_mno_align_stringops,
4375 MASK_NO_ALIGN_STRINGOPS),
4377 IX86_ATTR_YES ("recip",
4383 /* If this is a list, recurse to get the options. */
4384 if (TREE_CODE (args) == TREE_LIST)
4388 for (; args; args = TREE_CHAIN (args))
4389 if (TREE_VALUE (args)
4390 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4391 p_strings, enum_opts_set))
4397 else if (TREE_CODE (args) != STRING_CST)
4400 /* Handle multiple arguments separated by commas. */
4401 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4403 while (next_optstr && *next_optstr != '\0')
4405 char *p = next_optstr;
4407 char *comma = strchr (next_optstr, ',');
4408 const char *opt_string;
4409 size_t len, opt_len;
4414 enum ix86_opt_type type = ix86_opt_unknown;
4420 len = comma - next_optstr;
4421 next_optstr = comma + 1;
4429 /* Recognize no-xxx. */
4430 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4439 /* Find the option. */
4442 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4444 type = attrs[i].type;
4445 opt_len = attrs[i].len;
4446 if (ch == attrs[i].string[0]
4447 && ((type != ix86_opt_str && type != ix86_opt_enum)
4450 && memcmp (p, attrs[i].string, opt_len) == 0)
4453 mask = attrs[i].mask;
4454 opt_string = attrs[i].string;
4459 /* Process the option. */
4462 error ("attribute(target(\"%s\")) is unknown", orig_p);
4466 else if (type == ix86_opt_isa)
4468 struct cl_decoded_option decoded;
4470 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4471 ix86_handle_option (&global_options, &global_options_set,
4472 &decoded, input_location);
4475 else if (type == ix86_opt_yes || type == ix86_opt_no)
4477 if (type == ix86_opt_no)
4478 opt_set_p = !opt_set_p;
4481 target_flags |= mask;
4483 target_flags &= ~mask;
4486 else if (type == ix86_opt_str)
4490 error ("option(\"%s\") was already specified", opt_string);
4494 p_strings[opt] = xstrdup (p + opt_len);
4497 else if (type == ix86_opt_enum)
4502 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4504 set_option (&global_options, enum_opts_set, opt, value,
4505 p + opt_len, DK_UNSPECIFIED, input_location,
4509 error ("attribute(target(\"%s\")) is unknown", orig_p);
4521 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4524 ix86_valid_target_attribute_tree (tree args)
4526 const char *orig_arch_string = ix86_arch_string;
4527 const char *orig_tune_string = ix86_tune_string;
4528 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4529 int orig_tune_defaulted = ix86_tune_defaulted;
4530 int orig_arch_specified = ix86_arch_specified;
4531 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4534 struct cl_target_option *def
4535 = TREE_TARGET_OPTION (target_option_default_node);
4536 struct gcc_options enum_opts_set;
4538 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4540 /* Process each of the options on the chain. */
4541 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4545 /* If the changed options are different from the default, rerun
4546 ix86_option_override_internal, and then save the options away.
4547 The string options are are attribute options, and will be undone
4548 when we copy the save structure. */
4549 if (ix86_isa_flags != def->x_ix86_isa_flags
4550 || target_flags != def->x_target_flags
4551 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4552 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4553 || enum_opts_set.x_ix86_fpmath)
4555 /* If we are using the default tune= or arch=, undo the string assigned,
4556 and use the default. */
4557 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4558 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4559 else if (!orig_arch_specified)
4560 ix86_arch_string = NULL;
4562 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4563 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4564 else if (orig_tune_defaulted)
4565 ix86_tune_string = NULL;
4567 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4568 if (enum_opts_set.x_ix86_fpmath)
4569 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4570 else if (!TARGET_64BIT && TARGET_SSE)
4572 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4573 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4576 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4577 ix86_option_override_internal (false);
4579 /* Add any builtin functions with the new isa if any. */
4580 ix86_add_new_builtins (ix86_isa_flags);
4582 /* Save the current options unless we are validating options for
4584 t = build_target_option_node ();
4586 ix86_arch_string = orig_arch_string;
4587 ix86_tune_string = orig_tune_string;
4588 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4590 /* Free up memory allocated to hold the strings */
4591 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4592 free (option_strings[i]);
4598 /* Hook to validate attribute((target("string"))). */
4601 ix86_valid_target_attribute_p (tree fndecl,
4602 tree ARG_UNUSED (name),
4604 int ARG_UNUSED (flags))
4606 struct cl_target_option cur_target;
4608 tree old_optimize = build_optimization_node ();
4609 tree new_target, new_optimize;
4610 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4612 /* If the function changed the optimization levels as well as setting target
4613 options, start with the optimizations specified. */
4614 if (func_optimize && func_optimize != old_optimize)
4615 cl_optimization_restore (&global_options,
4616 TREE_OPTIMIZATION (func_optimize));
4618 /* The target attributes may also change some optimization flags, so update
4619 the optimization options if necessary. */
4620 cl_target_option_save (&cur_target, &global_options);
4621 new_target = ix86_valid_target_attribute_tree (args);
4622 new_optimize = build_optimization_node ();
4629 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4631 if (old_optimize != new_optimize)
4632 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4635 cl_target_option_restore (&global_options, &cur_target);
4637 if (old_optimize != new_optimize)
4638 cl_optimization_restore (&global_options,
4639 TREE_OPTIMIZATION (old_optimize));
4645 /* Hook to determine if one function can safely inline another. */
4648 ix86_can_inline_p (tree caller, tree callee)
4651 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4652 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4654 /* If callee has no option attributes, then it is ok to inline. */
4658 /* If caller has no option attributes, but callee does then it is not ok to
4660 else if (!caller_tree)
4665 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4666 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4668 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4669 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4671 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4672 != callee_opts->x_ix86_isa_flags)
4675 /* See if we have the same non-isa options. */
4676 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4679 /* See if arch, tune, etc. are the same. */
4680 else if (caller_opts->arch != callee_opts->arch)
4683 else if (caller_opts->tune != callee_opts->tune)
4686 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4689 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4700 /* Remember the last target of ix86_set_current_function. */
4701 static GTY(()) tree ix86_previous_fndecl;
4703 /* Establish appropriate back-end context for processing the function
4704 FNDECL. The argument might be NULL to indicate processing at top
4705 level, outside of any function scope. */
4707 ix86_set_current_function (tree fndecl)
4709 /* Only change the context if the function changes. This hook is called
4710 several times in the course of compiling a function, and we don't want to
4711 slow things down too much or call target_reinit when it isn't safe. */
4712 if (fndecl && fndecl != ix86_previous_fndecl)
4714 tree old_tree = (ix86_previous_fndecl
4715 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4718 tree new_tree = (fndecl
4719 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4722 ix86_previous_fndecl = fndecl;
4723 if (old_tree == new_tree)
4728 cl_target_option_restore (&global_options,
4729 TREE_TARGET_OPTION (new_tree));
4735 struct cl_target_option *def
4736 = TREE_TARGET_OPTION (target_option_current_node);
4738 cl_target_option_restore (&global_options, def);
4745 /* Return true if this goes in large data/bss. */
4748 ix86_in_large_data_p (tree exp)
4750 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4753 /* Functions are never large data. */
4754 if (TREE_CODE (exp) == FUNCTION_DECL)
4757 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4759 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4760 if (strcmp (section, ".ldata") == 0
4761 || strcmp (section, ".lbss") == 0)
4767 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4769 /* If this is an incomplete type with size 0, then we can't put it
4770 in data because it might be too big when completed. */
4771 if (!size || size > ix86_section_threshold)
4778 /* Switch to the appropriate section for output of DECL.
4779 DECL is either a `VAR_DECL' node or a constant of some sort.
4780 RELOC indicates whether forming the initial value of DECL requires
4781 link-time relocations. */
4783 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4787 x86_64_elf_select_section (tree decl, int reloc,
4788 unsigned HOST_WIDE_INT align)
4790 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4791 && ix86_in_large_data_p (decl))
4793 const char *sname = NULL;
4794 unsigned int flags = SECTION_WRITE;
4795 switch (categorize_decl_for_section (decl, reloc))
4800 case SECCAT_DATA_REL:
4801 sname = ".ldata.rel";
4803 case SECCAT_DATA_REL_LOCAL:
4804 sname = ".ldata.rel.local";
4806 case SECCAT_DATA_REL_RO:
4807 sname = ".ldata.rel.ro";
4809 case SECCAT_DATA_REL_RO_LOCAL:
4810 sname = ".ldata.rel.ro.local";
4814 flags |= SECTION_BSS;
4817 case SECCAT_RODATA_MERGE_STR:
4818 case SECCAT_RODATA_MERGE_STR_INIT:
4819 case SECCAT_RODATA_MERGE_CONST:
4823 case SECCAT_SRODATA:
4830 /* We don't split these for medium model. Place them into
4831 default sections and hope for best. */
4836 /* We might get called with string constants, but get_named_section
4837 doesn't like them as they are not DECLs. Also, we need to set
4838 flags in that case. */
4840 return get_section (sname, flags, NULL);
4841 return get_named_section (decl, sname, reloc);
4844 return default_elf_select_section (decl, reloc, align);
4847 /* Build up a unique section name, expressed as a
4848 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4849 RELOC indicates whether the initial value of EXP requires
4850 link-time relocations. */
4852 static void ATTRIBUTE_UNUSED
4853 x86_64_elf_unique_section (tree decl, int reloc)
4855 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4856 && ix86_in_large_data_p (decl))
4858 const char *prefix = NULL;
4859 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4860 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4862 switch (categorize_decl_for_section (decl, reloc))
4865 case SECCAT_DATA_REL:
4866 case SECCAT_DATA_REL_LOCAL:
4867 case SECCAT_DATA_REL_RO:
4868 case SECCAT_DATA_REL_RO_LOCAL:
4869 prefix = one_only ? ".ld" : ".ldata";
4872 prefix = one_only ? ".lb" : ".lbss";
4875 case SECCAT_RODATA_MERGE_STR:
4876 case SECCAT_RODATA_MERGE_STR_INIT:
4877 case SECCAT_RODATA_MERGE_CONST:
4878 prefix = one_only ? ".lr" : ".lrodata";
4880 case SECCAT_SRODATA:
4887 /* We don't split these for medium model. Place them into
4888 default sections and hope for best. */
4893 const char *name, *linkonce;
4896 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4897 name = targetm.strip_name_encoding (name);
4899 /* If we're using one_only, then there needs to be a .gnu.linkonce
4900 prefix to the section name. */
4901 linkonce = one_only ? ".gnu.linkonce" : "";
4903 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4905 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4909 default_unique_section (decl, reloc);
4912 #ifdef COMMON_ASM_OP
4913 /* This says how to output assembler code to declare an
4914 uninitialized external linkage data object.
4916 For medium model x86-64 we need to use .largecomm opcode for
4919 x86_elf_aligned_common (FILE *file,
4920 const char *name, unsigned HOST_WIDE_INT size,
4923 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4924 && size > (unsigned int)ix86_section_threshold)
4925 fputs (".largecomm\t", file);
4927 fputs (COMMON_ASM_OP, file);
4928 assemble_name (file, name);
4929 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4930 size, align / BITS_PER_UNIT);
4934 /* Utility function for targets to use in implementing
4935 ASM_OUTPUT_ALIGNED_BSS. */
4938 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4939 const char *name, unsigned HOST_WIDE_INT size,
4942 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4943 && size > (unsigned int)ix86_section_threshold)
4944 switch_to_section (get_named_section (decl, ".lbss", 0));
4946 switch_to_section (bss_section);
4947 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4948 #ifdef ASM_DECLARE_OBJECT_NAME
4949 last_assemble_variable_decl = decl;
4950 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4952 /* Standard thing is just output label for the object. */
4953 ASM_OUTPUT_LABEL (file, name);
4954 #endif /* ASM_DECLARE_OBJECT_NAME */
4955 ASM_OUTPUT_SKIP (file, size ? size : 1);
4958 /* Decide whether we must probe the stack before any space allocation
4959 on this target. It's essentially TARGET_STACK_PROBE except when
4960 -fstack-check causes the stack to be already probed differently. */
4963 ix86_target_stack_probe (void)
4965 /* Do not probe the stack twice if static stack checking is enabled. */
4966 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4969 return TARGET_STACK_PROBE;
4972 /* Decide whether we can make a sibling call to a function. DECL is the
4973 declaration of the function being targeted by the call and EXP is the
4974 CALL_EXPR representing the call. */
4977 ix86_function_ok_for_sibcall (tree decl, tree exp)
4979 tree type, decl_or_type;
4982 /* If we are generating position-independent code, we cannot sibcall
4983 optimize any indirect call, or a direct call to a global function,
4984 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4988 && (!decl || !targetm.binds_local_p (decl)))
4991 /* If we need to align the outgoing stack, then sibcalling would
4992 unalign the stack, which may break the called function. */
4993 if (ix86_minimum_incoming_stack_boundary (true)
4994 < PREFERRED_STACK_BOUNDARY)
4999 decl_or_type = decl;
5000 type = TREE_TYPE (decl);
5004 /* We're looking at the CALL_EXPR, we need the type of the function. */
5005 type = CALL_EXPR_FN (exp); /* pointer expression */
5006 type = TREE_TYPE (type); /* pointer type */
5007 type = TREE_TYPE (type); /* function type */
5008 decl_or_type = type;
5011 /* Check that the return value locations are the same. Like
5012 if we are returning floats on the 80387 register stack, we cannot
5013 make a sibcall from a function that doesn't return a float to a
5014 function that does or, conversely, from a function that does return
5015 a float to a function that doesn't; the necessary stack adjustment
5016 would not be executed. This is also the place we notice
5017 differences in the return value ABI. Note that it is ok for one
5018 of the functions to have void return type as long as the return
5019 value of the other is passed in a register. */
5020 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5021 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5023 if (STACK_REG_P (a) || STACK_REG_P (b))
5025 if (!rtx_equal_p (a, b))
5028 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5030 /* Disable sibcall if we need to generate vzeroupper after
5032 if (TARGET_VZEROUPPER
5033 && cfun->machine->callee_return_avx256_p
5034 && !cfun->machine->caller_return_avx256_p)
5037 else if (!rtx_equal_p (a, b))
5042 /* The SYSV ABI has more call-clobbered registers;
5043 disallow sibcalls from MS to SYSV. */
5044 if (cfun->machine->call_abi == MS_ABI
5045 && ix86_function_type_abi (type) == SYSV_ABI)
5050 /* If this call is indirect, we'll need to be able to use a
5051 call-clobbered register for the address of the target function.
5052 Make sure that all such registers are not used for passing
5053 parameters. Note that DLLIMPORT functions are indirect. */
5055 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5057 if (ix86_function_regparm (type, NULL) >= 3)
5059 /* ??? Need to count the actual number of registers to be used,
5060 not the possible number of registers. Fix later. */
5066 /* Otherwise okay. That also includes certain types of indirect calls. */
5070 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5071 and "sseregparm" calling convention attributes;
5072 arguments as in struct attribute_spec.handler. */
5075 ix86_handle_cconv_attribute (tree *node, tree name,
5077 int flags ATTRIBUTE_UNUSED,
5080 if (TREE_CODE (*node) != FUNCTION_TYPE
5081 && TREE_CODE (*node) != METHOD_TYPE
5082 && TREE_CODE (*node) != FIELD_DECL
5083 && TREE_CODE (*node) != TYPE_DECL)
5085 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5087 *no_add_attrs = true;
5091 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5092 if (is_attribute_p ("regparm", name))
5096 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5098 error ("fastcall and regparm attributes are not compatible");
5101 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5103 error ("regparam and thiscall attributes are not compatible");
5106 cst = TREE_VALUE (args);
5107 if (TREE_CODE (cst) != INTEGER_CST)
5109 warning (OPT_Wattributes,
5110 "%qE attribute requires an integer constant argument",
5112 *no_add_attrs = true;
5114 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5116 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5118 *no_add_attrs = true;
5126 /* Do not warn when emulating the MS ABI. */
5127 if ((TREE_CODE (*node) != FUNCTION_TYPE
5128 && TREE_CODE (*node) != METHOD_TYPE)
5129 || ix86_function_type_abi (*node) != MS_ABI)
5130 warning (OPT_Wattributes, "%qE attribute ignored",
5132 *no_add_attrs = true;
5136 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5137 if (is_attribute_p ("fastcall", name))
5139 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5141 error ("fastcall and cdecl attributes are not compatible");
5143 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5145 error ("fastcall and stdcall attributes are not compatible");
5147 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5149 error ("fastcall and regparm attributes are not compatible");
5151 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5153 error ("fastcall and thiscall attributes are not compatible");
5157 /* Can combine stdcall with fastcall (redundant), regparm and
5159 else if (is_attribute_p ("stdcall", name))
5161 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5163 error ("stdcall and cdecl attributes are not compatible");
5165 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5167 error ("stdcall and fastcall attributes are not compatible");
5169 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5171 error ("stdcall and thiscall attributes are not compatible");
5175 /* Can combine cdecl with regparm and sseregparm. */
5176 else if (is_attribute_p ("cdecl", name))
5178 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5180 error ("stdcall and cdecl attributes are not compatible");
5182 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5184 error ("fastcall and cdecl attributes are not compatible");
5186 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5188 error ("cdecl and thiscall attributes are not compatible");
5191 else if (is_attribute_p ("thiscall", name))
5193 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5194 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5196 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5198 error ("stdcall and thiscall attributes are not compatible");
5200 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5202 error ("fastcall and thiscall attributes are not compatible");
5204 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5206 error ("cdecl and thiscall attributes are not compatible");
5210 /* Can combine sseregparm with all attributes. */
5215 /* The transactional memory builtins are implicitly regparm or fastcall
5216 depending on the ABI. Override the generic do-nothing attribute that
5217 these builtins were declared with, and replace it with one of the two
5218 attributes that we expect elsewhere. */
5221 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5222 tree args ATTRIBUTE_UNUSED,
5223 int flags ATTRIBUTE_UNUSED,
5228 /* In no case do we want to add the placeholder attribute. */
5229 *no_add_attrs = true;
5231 /* The 64-bit ABI is unchanged for transactional memory. */
5235 /* ??? Is there a better way to validate 32-bit windows? We have
5236 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5237 if (CHECK_STACK_LIMIT > 0)
5238 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5241 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5242 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5244 decl_attributes (node, alt, flags);
5249 /* This function determines from TYPE the calling-convention. */
5252 ix86_get_callcvt (const_tree type)
5254 unsigned int ret = 0;
5259 return IX86_CALLCVT_CDECL;
5261 attrs = TYPE_ATTRIBUTES (type);
5262 if (attrs != NULL_TREE)
5264 if (lookup_attribute ("cdecl", attrs))
5265 ret |= IX86_CALLCVT_CDECL;
5266 else if (lookup_attribute ("stdcall", attrs))
5267 ret |= IX86_CALLCVT_STDCALL;
5268 else if (lookup_attribute ("fastcall", attrs))
5269 ret |= IX86_CALLCVT_FASTCALL;
5270 else if (lookup_attribute ("thiscall", attrs))
5271 ret |= IX86_CALLCVT_THISCALL;
5273 /* Regparam isn't allowed for thiscall and fastcall. */
5274 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5276 if (lookup_attribute ("regparm", attrs))
5277 ret |= IX86_CALLCVT_REGPARM;
5278 if (lookup_attribute ("sseregparm", attrs))
5279 ret |= IX86_CALLCVT_SSEREGPARM;
5282 if (IX86_BASE_CALLCVT(ret) != 0)
5286 is_stdarg = stdarg_p (type);
5287 if (TARGET_RTD && !is_stdarg)
5288 return IX86_CALLCVT_STDCALL | ret;
5292 || TREE_CODE (type) != METHOD_TYPE
5293 || ix86_function_type_abi (type) != MS_ABI)
5294 return IX86_CALLCVT_CDECL | ret;
5296 return IX86_CALLCVT_THISCALL;
5299 /* Return 0 if the attributes for two types are incompatible, 1 if they
5300 are compatible, and 2 if they are nearly compatible (which causes a
5301 warning to be generated). */
5304 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5306 unsigned int ccvt1, ccvt2;
5308 if (TREE_CODE (type1) != FUNCTION_TYPE
5309 && TREE_CODE (type1) != METHOD_TYPE)
5312 ccvt1 = ix86_get_callcvt (type1);
5313 ccvt2 = ix86_get_callcvt (type2);
5316 if (ix86_function_regparm (type1, NULL)
5317 != ix86_function_regparm (type2, NULL))
5323 /* Return the regparm value for a function with the indicated TYPE and DECL.
5324 DECL may be NULL when calling function indirectly
5325 or considering a libcall. */
5328 ix86_function_regparm (const_tree type, const_tree decl)
5335 return (ix86_function_type_abi (type) == SYSV_ABI
5336 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5337 ccvt = ix86_get_callcvt (type);
5338 regparm = ix86_regparm;
5340 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5342 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5345 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5349 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5351 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5354 /* Use register calling convention for local functions when possible. */
5356 && TREE_CODE (decl) == FUNCTION_DECL
5358 && !(profile_flag && !flag_fentry))
5360 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5361 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5362 if (i && i->local && i->can_change_signature)
5364 int local_regparm, globals = 0, regno;
5366 /* Make sure no regparm register is taken by a
5367 fixed register variable. */
5368 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5369 if (fixed_regs[local_regparm])
5372 /* We don't want to use regparm(3) for nested functions as
5373 these use a static chain pointer in the third argument. */
5374 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5377 /* In 32-bit mode save a register for the split stack. */
5378 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5381 /* Each fixed register usage increases register pressure,
5382 so less registers should be used for argument passing.
5383 This functionality can be overriden by an explicit
5385 for (regno = 0; regno <= DI_REG; regno++)
5386 if (fixed_regs[regno])
5390 = globals < local_regparm ? local_regparm - globals : 0;
5392 if (local_regparm > regparm)
5393 regparm = local_regparm;
5400 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5401 DFmode (2) arguments in SSE registers for a function with the
5402 indicated TYPE and DECL. DECL may be NULL when calling function
5403 indirectly or considering a libcall. Otherwise return 0. */
5406 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5408 gcc_assert (!TARGET_64BIT);
5410 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5411 by the sseregparm attribute. */
5412 if (TARGET_SSEREGPARM
5413 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5420 error ("calling %qD with attribute sseregparm without "
5421 "SSE/SSE2 enabled", decl);
5423 error ("calling %qT with attribute sseregparm without "
5424 "SSE/SSE2 enabled", type);
5432 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5433 (and DFmode for SSE2) arguments in SSE registers. */
5434 if (decl && TARGET_SSE_MATH && optimize
5435 && !(profile_flag && !flag_fentry))
5437 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5438 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5439 if (i && i->local && i->can_change_signature)
5440 return TARGET_SSE2 ? 2 : 1;
5446 /* Return true if EAX is live at the start of the function. Used by
5447 ix86_expand_prologue to determine if we need special help before
5448 calling allocate_stack_worker. */
5451 ix86_eax_live_at_start_p (void)
5453 /* Cheat. Don't bother working forward from ix86_function_regparm
5454 to the function type to whether an actual argument is located in
5455 eax. Instead just look at cfg info, which is still close enough
5456 to correct at this point. This gives false positives for broken
5457 functions that might use uninitialized data that happens to be
5458 allocated in eax, but who cares? */
5459 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5463 ix86_keep_aggregate_return_pointer (tree fntype)
5469 attr = lookup_attribute ("callee_pop_aggregate_return",
5470 TYPE_ATTRIBUTES (fntype));
5472 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5474 /* For 32-bit MS-ABI the default is to keep aggregate
5476 if (ix86_function_type_abi (fntype) == MS_ABI)
5479 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5482 /* Value is the number of bytes of arguments automatically
5483 popped when returning from a subroutine call.
5484 FUNDECL is the declaration node of the function (as a tree),
5485 FUNTYPE is the data type of the function (as a tree),
5486 or for a library call it is an identifier node for the subroutine name.
5487 SIZE is the number of bytes of arguments passed on the stack.
5489 On the 80386, the RTD insn may be used to pop them if the number
5490 of args is fixed, but if the number is variable then the caller
5491 must pop them all. RTD can't be used for library calls now
5492 because the library is compiled with the Unix compiler.
5493 Use of RTD is a selectable option, since it is incompatible with
5494 standard Unix calling sequences. If the option is not selected,
5495 the caller must always pop the args.
5497 The attribute stdcall is equivalent to RTD on a per module basis. */
5500 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5504 /* None of the 64-bit ABIs pop arguments. */
5508 ccvt = ix86_get_callcvt (funtype);
5510 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5511 | IX86_CALLCVT_THISCALL)) != 0
5512 && ! stdarg_p (funtype))
5515 /* Lose any fake structure return argument if it is passed on the stack. */
5516 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5517 && !ix86_keep_aggregate_return_pointer (funtype))
5519 int nregs = ix86_function_regparm (funtype, fundecl);
5521 return GET_MODE_SIZE (Pmode);
5527 /* Argument support functions. */
5529 /* Return true when register may be used to pass function parameters. */
5531 ix86_function_arg_regno_p (int regno)
5534 const int *parm_regs;
5539 return (regno < REGPARM_MAX
5540 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5542 return (regno < REGPARM_MAX
5543 || (TARGET_MMX && MMX_REGNO_P (regno)
5544 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5545 || (TARGET_SSE && SSE_REGNO_P (regno)
5546 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5551 if (SSE_REGNO_P (regno) && TARGET_SSE)
5556 if (TARGET_SSE && SSE_REGNO_P (regno)
5557 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5561 /* TODO: The function should depend on current function ABI but
5562 builtins.c would need updating then. Therefore we use the
5565 /* RAX is used as hidden argument to va_arg functions. */
5566 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5569 if (ix86_abi == MS_ABI)
5570 parm_regs = x86_64_ms_abi_int_parameter_registers;
5572 parm_regs = x86_64_int_parameter_registers;
5573 for (i = 0; i < (ix86_abi == MS_ABI
5574 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5575 if (regno == parm_regs[i])
5580 /* Return if we do not know how to pass TYPE solely in registers. */
5583 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5585 if (must_pass_in_stack_var_size_or_pad (mode, type))
5588 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5589 The layout_type routine is crafty and tries to trick us into passing
5590 currently unsupported vector types on the stack by using TImode. */
5591 return (!TARGET_64BIT && mode == TImode
5592 && type && TREE_CODE (type) != VECTOR_TYPE);
5595 /* It returns the size, in bytes, of the area reserved for arguments passed
5596 in registers for the function represented by fndecl dependent to the used
5599 ix86_reg_parm_stack_space (const_tree fndecl)
5601 enum calling_abi call_abi = SYSV_ABI;
5602 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5603 call_abi = ix86_function_abi (fndecl);
5605 call_abi = ix86_function_type_abi (fndecl);
5606 if (TARGET_64BIT && call_abi == MS_ABI)
5611 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5614 ix86_function_type_abi (const_tree fntype)
5616 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5618 enum calling_abi abi = ix86_abi;
5619 if (abi == SYSV_ABI)
5621 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5624 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5632 ix86_function_ms_hook_prologue (const_tree fn)
5634 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5636 if (decl_function_context (fn) != NULL_TREE)
5637 error_at (DECL_SOURCE_LOCATION (fn),
5638 "ms_hook_prologue is not compatible with nested function");
5645 static enum calling_abi
5646 ix86_function_abi (const_tree fndecl)
5650 return ix86_function_type_abi (TREE_TYPE (fndecl));
5653 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5656 ix86_cfun_abi (void)
5660 return cfun->machine->call_abi;
5663 /* Write the extra assembler code needed to declare a function properly. */
5666 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5669 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5673 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5674 unsigned int filler_cc = 0xcccccccc;
5676 for (i = 0; i < filler_count; i += 4)
5677 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5680 #ifdef SUBTARGET_ASM_UNWIND_INIT
5681 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5684 ASM_OUTPUT_LABEL (asm_out_file, fname);
5686 /* Output magic byte marker, if hot-patch attribute is set. */
5691 /* leaq [%rsp + 0], %rsp */
5692 asm_fprintf (asm_out_file, ASM_BYTE
5693 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5697 /* movl.s %edi, %edi
5699 movl.s %esp, %ebp */
5700 asm_fprintf (asm_out_file, ASM_BYTE
5701 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5707 extern void init_regs (void);
5709 /* Implementation of call abi switching target hook. Specific to FNDECL
5710 the specific call register sets are set. See also
5711 ix86_conditional_register_usage for more details. */
5713 ix86_call_abi_override (const_tree fndecl)
5715 if (fndecl == NULL_TREE)
5716 cfun->machine->call_abi = ix86_abi;
5718 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5721 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5722 expensive re-initialization of init_regs each time we switch function context
5723 since this is needed only during RTL expansion. */
5725 ix86_maybe_switch_abi (void)
5728 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5732 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5733 for a call to a function whose data type is FNTYPE.
5734 For a library call, FNTYPE is 0. */
5737 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5738 tree fntype, /* tree ptr for function decl */
5739 rtx libname, /* SYMBOL_REF of library name or 0 */
5743 struct cgraph_local_info *i;
5746 memset (cum, 0, sizeof (*cum));
5748 /* Initialize for the current callee. */
5751 cfun->machine->callee_pass_avx256_p = false;
5752 cfun->machine->callee_return_avx256_p = false;
5757 i = cgraph_local_info (fndecl);
5758 cum->call_abi = ix86_function_abi (fndecl);
5759 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5764 cum->call_abi = ix86_function_type_abi (fntype);
5766 fnret_type = TREE_TYPE (fntype);
5771 if (TARGET_VZEROUPPER && fnret_type)
5773 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5775 if (function_pass_avx256_p (fnret_value))
5777 /* The return value of this function uses 256bit AVX modes. */
5779 cfun->machine->callee_return_avx256_p = true;
5781 cfun->machine->caller_return_avx256_p = true;
5785 cum->caller = caller;
5787 /* Set up the number of registers to use for passing arguments. */
5789 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5790 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5791 "or subtarget optimization implying it");
5792 cum->nregs = ix86_regparm;
5795 cum->nregs = (cum->call_abi == SYSV_ABI
5796 ? X86_64_REGPARM_MAX
5797 : X86_64_MS_REGPARM_MAX);
5801 cum->sse_nregs = SSE_REGPARM_MAX;
5804 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5805 ? X86_64_SSE_REGPARM_MAX
5806 : X86_64_MS_SSE_REGPARM_MAX);
5810 cum->mmx_nregs = MMX_REGPARM_MAX;
5811 cum->warn_avx = true;
5812 cum->warn_sse = true;
5813 cum->warn_mmx = true;
5815 /* Because type might mismatch in between caller and callee, we need to
5816 use actual type of function for local calls.
5817 FIXME: cgraph_analyze can be told to actually record if function uses
5818 va_start so for local functions maybe_vaarg can be made aggressive
5820 FIXME: once typesytem is fixed, we won't need this code anymore. */
5821 if (i && i->local && i->can_change_signature)
5822 fntype = TREE_TYPE (fndecl);
5823 cum->maybe_vaarg = (fntype
5824 ? (!prototype_p (fntype) || stdarg_p (fntype))
5829 /* If there are variable arguments, then we won't pass anything
5830 in registers in 32-bit mode. */
5831 if (stdarg_p (fntype))
5842 /* Use ecx and edx registers if function has fastcall attribute,
5843 else look for regparm information. */
5846 unsigned int ccvt = ix86_get_callcvt (fntype);
5847 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5850 cum->fastcall = 1; /* Same first register as in fastcall. */
5852 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5858 cum->nregs = ix86_function_regparm (fntype, fndecl);
5861 /* Set up the number of SSE registers used for passing SFmode
5862 and DFmode arguments. Warn for mismatching ABI. */
5863 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5867 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5868 But in the case of vector types, it is some vector mode.
5870 When we have only some of our vector isa extensions enabled, then there
5871 are some modes for which vector_mode_supported_p is false. For these
5872 modes, the generic vector support in gcc will choose some non-vector mode
5873 in order to implement the type. By computing the natural mode, we'll
5874 select the proper ABI location for the operand and not depend on whatever
5875 the middle-end decides to do with these vector types.
5877 The midde-end can't deal with the vector types > 16 bytes. In this
5878 case, we return the original mode and warn ABI change if CUM isn't
5881 static enum machine_mode
5882 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5884 enum machine_mode mode = TYPE_MODE (type);
5886 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5888 HOST_WIDE_INT size = int_size_in_bytes (type);
5889 if ((size == 8 || size == 16 || size == 32)
5890 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5891 && TYPE_VECTOR_SUBPARTS (type) > 1)
5893 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5895 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5896 mode = MIN_MODE_VECTOR_FLOAT;
5898 mode = MIN_MODE_VECTOR_INT;
5900 /* Get the mode which has this inner mode and number of units. */
5901 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5902 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5903 && GET_MODE_INNER (mode) == innermode)
5905 if (size == 32 && !TARGET_AVX)
5907 static bool warnedavx;
5914 warning (0, "AVX vector argument without AVX "
5915 "enabled changes the ABI");
5917 return TYPE_MODE (type);
5930 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5931 this may not agree with the mode that the type system has chosen for the
5932 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5933 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5936 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5941 if (orig_mode != BLKmode)
5942 tmp = gen_rtx_REG (orig_mode, regno);
5945 tmp = gen_rtx_REG (mode, regno);
5946 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5947 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5953 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5954 of this code is to classify each 8bytes of incoming argument by the register
5955 class and assign registers accordingly. */
5957 /* Return the union class of CLASS1 and CLASS2.
5958 See the x86-64 PS ABI for details. */
5960 static enum x86_64_reg_class
5961 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5963 /* Rule #1: If both classes are equal, this is the resulting class. */
5964 if (class1 == class2)
5967 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5969 if (class1 == X86_64_NO_CLASS)
5971 if (class2 == X86_64_NO_CLASS)
5974 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5975 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5976 return X86_64_MEMORY_CLASS;
5978 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5979 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5980 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5981 return X86_64_INTEGERSI_CLASS;
5982 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5983 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5984 return X86_64_INTEGER_CLASS;
5986 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5988 if (class1 == X86_64_X87_CLASS
5989 || class1 == X86_64_X87UP_CLASS
5990 || class1 == X86_64_COMPLEX_X87_CLASS
5991 || class2 == X86_64_X87_CLASS
5992 || class2 == X86_64_X87UP_CLASS
5993 || class2 == X86_64_COMPLEX_X87_CLASS)
5994 return X86_64_MEMORY_CLASS;
5996 /* Rule #6: Otherwise class SSE is used. */
5997 return X86_64_SSE_CLASS;
6000 /* Classify the argument of type TYPE and mode MODE.
6001 CLASSES will be filled by the register class used to pass each word
6002 of the operand. The number of words is returned. In case the parameter
6003 should be passed in memory, 0 is returned. As a special case for zero
6004 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6006 BIT_OFFSET is used internally for handling records and specifies offset
6007 of the offset in bits modulo 256 to avoid overflow cases.
6009 See the x86-64 PS ABI for details.
6013 classify_argument (enum machine_mode mode, const_tree type,
6014 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6016 HOST_WIDE_INT bytes =
6017 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6018 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6020 /* Variable sized entities are always passed/returned in memory. */
6024 if (mode != VOIDmode
6025 && targetm.calls.must_pass_in_stack (mode, type))
6028 if (type && AGGREGATE_TYPE_P (type))
6032 enum x86_64_reg_class subclasses[MAX_CLASSES];
6034 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6038 for (i = 0; i < words; i++)
6039 classes[i] = X86_64_NO_CLASS;
6041 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6042 signalize memory class, so handle it as special case. */
6045 classes[0] = X86_64_NO_CLASS;
6049 /* Classify each field of record and merge classes. */
6050 switch (TREE_CODE (type))
6053 /* And now merge the fields of structure. */
6054 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6056 if (TREE_CODE (field) == FIELD_DECL)
6060 if (TREE_TYPE (field) == error_mark_node)
6063 /* Bitfields are always classified as integer. Handle them
6064 early, since later code would consider them to be
6065 misaligned integers. */
6066 if (DECL_BIT_FIELD (field))
6068 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6069 i < ((int_bit_position (field) + (bit_offset % 64))
6070 + tree_low_cst (DECL_SIZE (field), 0)
6073 merge_classes (X86_64_INTEGER_CLASS,
6080 type = TREE_TYPE (field);
6082 /* Flexible array member is ignored. */
6083 if (TYPE_MODE (type) == BLKmode
6084 && TREE_CODE (type) == ARRAY_TYPE
6085 && TYPE_SIZE (type) == NULL_TREE
6086 && TYPE_DOMAIN (type) != NULL_TREE
6087 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6092 if (!warned && warn_psabi)
6095 inform (input_location,
6096 "the ABI of passing struct with"
6097 " a flexible array member has"
6098 " changed in GCC 4.4");
6102 num = classify_argument (TYPE_MODE (type), type,
6104 (int_bit_position (field)
6105 + bit_offset) % 256);
6108 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6109 for (i = 0; i < num && (i + pos) < words; i++)
6111 merge_classes (subclasses[i], classes[i + pos]);
6118 /* Arrays are handled as small records. */
6121 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6122 TREE_TYPE (type), subclasses, bit_offset);
6126 /* The partial classes are now full classes. */
6127 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6128 subclasses[0] = X86_64_SSE_CLASS;
6129 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6130 && !((bit_offset % 64) == 0 && bytes == 4))
6131 subclasses[0] = X86_64_INTEGER_CLASS;
6133 for (i = 0; i < words; i++)
6134 classes[i] = subclasses[i % num];
6139 case QUAL_UNION_TYPE:
6140 /* Unions are similar to RECORD_TYPE but offset is always 0.
6142 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6144 if (TREE_CODE (field) == FIELD_DECL)
6148 if (TREE_TYPE (field) == error_mark_node)
6151 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6152 TREE_TYPE (field), subclasses,
6156 for (i = 0; i < num; i++)
6157 classes[i] = merge_classes (subclasses[i], classes[i]);
6168 /* When size > 16 bytes, if the first one isn't
6169 X86_64_SSE_CLASS or any other ones aren't
6170 X86_64_SSEUP_CLASS, everything should be passed in
6172 if (classes[0] != X86_64_SSE_CLASS)
6175 for (i = 1; i < words; i++)
6176 if (classes[i] != X86_64_SSEUP_CLASS)
6180 /* Final merger cleanup. */
6181 for (i = 0; i < words; i++)
6183 /* If one class is MEMORY, everything should be passed in
6185 if (classes[i] == X86_64_MEMORY_CLASS)
6188 /* The X86_64_SSEUP_CLASS should be always preceded by
6189 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6190 if (classes[i] == X86_64_SSEUP_CLASS
6191 && classes[i - 1] != X86_64_SSE_CLASS
6192 && classes[i - 1] != X86_64_SSEUP_CLASS)
6194 /* The first one should never be X86_64_SSEUP_CLASS. */
6195 gcc_assert (i != 0);
6196 classes[i] = X86_64_SSE_CLASS;
6199 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6200 everything should be passed in memory. */
6201 if (classes[i] == X86_64_X87UP_CLASS
6202 && (classes[i - 1] != X86_64_X87_CLASS))
6206 /* The first one should never be X86_64_X87UP_CLASS. */
6207 gcc_assert (i != 0);
6208 if (!warned && warn_psabi)
6211 inform (input_location,
6212 "the ABI of passing union with long double"
6213 " has changed in GCC 4.4");
6221 /* Compute alignment needed. We align all types to natural boundaries with
6222 exception of XFmode that is aligned to 64bits. */
6223 if (mode != VOIDmode && mode != BLKmode)
6225 int mode_alignment = GET_MODE_BITSIZE (mode);
6228 mode_alignment = 128;
6229 else if (mode == XCmode)
6230 mode_alignment = 256;
6231 if (COMPLEX_MODE_P (mode))
6232 mode_alignment /= 2;
6233 /* Misaligned fields are always returned in memory. */
6234 if (bit_offset % mode_alignment)
6238 /* for V1xx modes, just use the base mode */
6239 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6240 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6241 mode = GET_MODE_INNER (mode);
6243 /* Classification of atomic types. */
6248 classes[0] = X86_64_SSE_CLASS;
6251 classes[0] = X86_64_SSE_CLASS;
6252 classes[1] = X86_64_SSEUP_CLASS;
6262 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6266 classes[0] = X86_64_INTEGERSI_CLASS;
6269 else if (size <= 64)
6271 classes[0] = X86_64_INTEGER_CLASS;
6274 else if (size <= 64+32)
6276 classes[0] = X86_64_INTEGER_CLASS;
6277 classes[1] = X86_64_INTEGERSI_CLASS;
6280 else if (size <= 64+64)
6282 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6290 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6294 /* OImode shouldn't be used directly. */
6299 if (!(bit_offset % 64))
6300 classes[0] = X86_64_SSESF_CLASS;
6302 classes[0] = X86_64_SSE_CLASS;
6305 classes[0] = X86_64_SSEDF_CLASS;
6308 classes[0] = X86_64_X87_CLASS;
6309 classes[1] = X86_64_X87UP_CLASS;
6312 classes[0] = X86_64_SSE_CLASS;
6313 classes[1] = X86_64_SSEUP_CLASS;
6316 classes[0] = X86_64_SSE_CLASS;
6317 if (!(bit_offset % 64))
6323 if (!warned && warn_psabi)
6326 inform (input_location,
6327 "the ABI of passing structure with complex float"
6328 " member has changed in GCC 4.4");
6330 classes[1] = X86_64_SSESF_CLASS;
6334 classes[0] = X86_64_SSEDF_CLASS;
6335 classes[1] = X86_64_SSEDF_CLASS;
6338 classes[0] = X86_64_COMPLEX_X87_CLASS;
6341 /* This modes is larger than 16 bytes. */
6349 classes[0] = X86_64_SSE_CLASS;
6350 classes[1] = X86_64_SSEUP_CLASS;
6351 classes[2] = X86_64_SSEUP_CLASS;
6352 classes[3] = X86_64_SSEUP_CLASS;
6360 classes[0] = X86_64_SSE_CLASS;
6361 classes[1] = X86_64_SSEUP_CLASS;
6369 classes[0] = X86_64_SSE_CLASS;
6375 gcc_assert (VECTOR_MODE_P (mode));
6380 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6382 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6383 classes[0] = X86_64_INTEGERSI_CLASS;
6385 classes[0] = X86_64_INTEGER_CLASS;
6386 classes[1] = X86_64_INTEGER_CLASS;
6387 return 1 + (bytes > 8);
6391 /* Examine the argument and return set number of register required in each
6392 class. Return 0 iff parameter should be passed in memory. */
6394 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6395 int *int_nregs, int *sse_nregs)
6397 enum x86_64_reg_class regclass[MAX_CLASSES];
6398 int n = classify_argument (mode, type, regclass, 0);
6404 for (n--; n >= 0; n--)
6405 switch (regclass[n])
6407 case X86_64_INTEGER_CLASS:
6408 case X86_64_INTEGERSI_CLASS:
6411 case X86_64_SSE_CLASS:
6412 case X86_64_SSESF_CLASS:
6413 case X86_64_SSEDF_CLASS:
6416 case X86_64_NO_CLASS:
6417 case X86_64_SSEUP_CLASS:
6419 case X86_64_X87_CLASS:
6420 case X86_64_X87UP_CLASS:
6424 case X86_64_COMPLEX_X87_CLASS:
6425 return in_return ? 2 : 0;
6426 case X86_64_MEMORY_CLASS:
6432 /* Construct container for the argument used by GCC interface. See
6433 FUNCTION_ARG for the detailed description. */
6436 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6437 const_tree type, int in_return, int nintregs, int nsseregs,
6438 const int *intreg, int sse_regno)
6440 /* The following variables hold the static issued_error state. */
6441 static bool issued_sse_arg_error;
6442 static bool issued_sse_ret_error;
6443 static bool issued_x87_ret_error;
6445 enum machine_mode tmpmode;
6447 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6448 enum x86_64_reg_class regclass[MAX_CLASSES];
6452 int needed_sseregs, needed_intregs;
6453 rtx exp[MAX_CLASSES];
6456 n = classify_argument (mode, type, regclass, 0);
6459 if (!examine_argument (mode, type, in_return, &needed_intregs,
6462 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6465 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6466 some less clueful developer tries to use floating-point anyway. */
6467 if (needed_sseregs && !TARGET_SSE)
6471 if (!issued_sse_ret_error)
6473 error ("SSE register return with SSE disabled");
6474 issued_sse_ret_error = true;
6477 else if (!issued_sse_arg_error)
6479 error ("SSE register argument with SSE disabled");
6480 issued_sse_arg_error = true;
6485 /* Likewise, error if the ABI requires us to return values in the
6486 x87 registers and the user specified -mno-80387. */
6487 if (!TARGET_80387 && in_return)
6488 for (i = 0; i < n; i++)
6489 if (regclass[i] == X86_64_X87_CLASS
6490 || regclass[i] == X86_64_X87UP_CLASS
6491 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6493 if (!issued_x87_ret_error)
6495 error ("x87 register return with x87 disabled");
6496 issued_x87_ret_error = true;
6501 /* First construct simple cases. Avoid SCmode, since we want to use
6502 single register to pass this type. */
6503 if (n == 1 && mode != SCmode)
6504 switch (regclass[0])
6506 case X86_64_INTEGER_CLASS:
6507 case X86_64_INTEGERSI_CLASS:
6508 return gen_rtx_REG (mode, intreg[0]);
6509 case X86_64_SSE_CLASS:
6510 case X86_64_SSESF_CLASS:
6511 case X86_64_SSEDF_CLASS:
6512 if (mode != BLKmode)
6513 return gen_reg_or_parallel (mode, orig_mode,
6514 SSE_REGNO (sse_regno));
6516 case X86_64_X87_CLASS:
6517 case X86_64_COMPLEX_X87_CLASS:
6518 return gen_rtx_REG (mode, FIRST_STACK_REG);
6519 case X86_64_NO_CLASS:
6520 /* Zero sized array, struct or class. */
6525 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6526 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6527 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6529 && regclass[0] == X86_64_SSE_CLASS
6530 && regclass[1] == X86_64_SSEUP_CLASS
6531 && regclass[2] == X86_64_SSEUP_CLASS
6532 && regclass[3] == X86_64_SSEUP_CLASS
6534 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6537 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6538 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6539 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6540 && regclass[1] == X86_64_INTEGER_CLASS
6541 && (mode == CDImode || mode == TImode || mode == TFmode)
6542 && intreg[0] + 1 == intreg[1])
6543 return gen_rtx_REG (mode, intreg[0]);
6545 /* Otherwise figure out the entries of the PARALLEL. */
6546 for (i = 0; i < n; i++)
6550 switch (regclass[i])
6552 case X86_64_NO_CLASS:
6554 case X86_64_INTEGER_CLASS:
6555 case X86_64_INTEGERSI_CLASS:
6556 /* Merge TImodes on aligned occasions here too. */
6557 if (i * 8 + 8 > bytes)
6558 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6559 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6563 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6564 if (tmpmode == BLKmode)
6566 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6567 gen_rtx_REG (tmpmode, *intreg),
6571 case X86_64_SSESF_CLASS:
6572 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6573 gen_rtx_REG (SFmode,
6574 SSE_REGNO (sse_regno)),
6578 case X86_64_SSEDF_CLASS:
6579 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6580 gen_rtx_REG (DFmode,
6581 SSE_REGNO (sse_regno)),
6585 case X86_64_SSE_CLASS:
6593 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6603 && regclass[1] == X86_64_SSEUP_CLASS
6604 && regclass[2] == X86_64_SSEUP_CLASS
6605 && regclass[3] == X86_64_SSEUP_CLASS);
6612 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6613 gen_rtx_REG (tmpmode,
6614 SSE_REGNO (sse_regno)),
6623 /* Empty aligned struct, union or class. */
6627 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6628 for (i = 0; i < nexps; i++)
6629 XVECEXP (ret, 0, i) = exp [i];
6633 /* Update the data in CUM to advance over an argument of mode MODE
6634 and data type TYPE. (TYPE is null for libcalls where that information
6635 may not be available.) */
6638 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6639 const_tree type, HOST_WIDE_INT bytes,
6640 HOST_WIDE_INT words)
6656 cum->words += words;
6657 cum->nregs -= words;
6658 cum->regno += words;
6660 if (cum->nregs <= 0)
6668 /* OImode shouldn't be used directly. */
6672 if (cum->float_in_sse < 2)
6675 if (cum->float_in_sse < 1)
6692 if (!type || !AGGREGATE_TYPE_P (type))
6694 cum->sse_words += words;
6695 cum->sse_nregs -= 1;
6696 cum->sse_regno += 1;
6697 if (cum->sse_nregs <= 0)
6711 if (!type || !AGGREGATE_TYPE_P (type))
6713 cum->mmx_words += words;
6714 cum->mmx_nregs -= 1;
6715 cum->mmx_regno += 1;
6716 if (cum->mmx_nregs <= 0)
6727 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6728 const_tree type, HOST_WIDE_INT words, bool named)
6730 int int_nregs, sse_nregs;
6732 /* Unnamed 256bit vector mode parameters are passed on stack. */
6733 if (!named && VALID_AVX256_REG_MODE (mode))
6736 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6737 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6739 cum->nregs -= int_nregs;
6740 cum->sse_nregs -= sse_nregs;
6741 cum->regno += int_nregs;
6742 cum->sse_regno += sse_nregs;
6746 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6747 cum->words = (cum->words + align - 1) & ~(align - 1);
6748 cum->words += words;
6753 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6754 HOST_WIDE_INT words)
6756 /* Otherwise, this should be passed indirect. */
6757 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6759 cum->words += words;
6767 /* Update the data in CUM to advance over an argument of mode MODE and
6768 data type TYPE. (TYPE is null for libcalls where that information
6769 may not be available.) */
6772 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6773 const_tree type, bool named)
6775 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6776 HOST_WIDE_INT bytes, words;
6778 if (mode == BLKmode)
6779 bytes = int_size_in_bytes (type);
6781 bytes = GET_MODE_SIZE (mode);
6782 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6785 mode = type_natural_mode (type, NULL);
6787 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6788 function_arg_advance_ms_64 (cum, bytes, words);
6789 else if (TARGET_64BIT)
6790 function_arg_advance_64 (cum, mode, type, words, named);
6792 function_arg_advance_32 (cum, mode, type, bytes, words);
6795 /* Define where to put the arguments to a function.
6796 Value is zero to push the argument on the stack,
6797 or a hard register in which to store the argument.
6799 MODE is the argument's machine mode.
6800 TYPE is the data type of the argument (as a tree).
6801 This is null for libcalls where that information may
6803 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6804 the preceding args and about the function being called.
6805 NAMED is nonzero if this argument is a named parameter
6806 (otherwise it is an extra parameter matching an ellipsis). */
6809 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6810 enum machine_mode orig_mode, const_tree type,
6811 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6813 static bool warnedsse, warnedmmx;
6815 /* Avoid the AL settings for the Unix64 ABI. */
6816 if (mode == VOIDmode)
6832 if (words <= cum->nregs)
6834 int regno = cum->regno;
6836 /* Fastcall allocates the first two DWORD (SImode) or
6837 smaller arguments to ECX and EDX if it isn't an
6843 || (type && AGGREGATE_TYPE_P (type)))
6846 /* ECX not EAX is the first allocated register. */
6847 if (regno == AX_REG)
6850 return gen_rtx_REG (mode, regno);
6855 if (cum->float_in_sse < 2)
6858 if (cum->float_in_sse < 1)
6862 /* In 32bit, we pass TImode in xmm registers. */
6869 if (!type || !AGGREGATE_TYPE_P (type))
6871 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6874 warning (0, "SSE vector argument without SSE enabled "
6878 return gen_reg_or_parallel (mode, orig_mode,
6879 cum->sse_regno + FIRST_SSE_REG);
6884 /* OImode shouldn't be used directly. */
6893 if (!type || !AGGREGATE_TYPE_P (type))
6896 return gen_reg_or_parallel (mode, orig_mode,
6897 cum->sse_regno + FIRST_SSE_REG);
6907 if (!type || !AGGREGATE_TYPE_P (type))
6909 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6912 warning (0, "MMX vector argument without MMX enabled "
6916 return gen_reg_or_parallel (mode, orig_mode,
6917 cum->mmx_regno + FIRST_MMX_REG);
6926 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6927 enum machine_mode orig_mode, const_tree type, bool named)
6929 /* Handle a hidden AL argument containing number of registers
6930 for varargs x86-64 functions. */
6931 if (mode == VOIDmode)
6932 return GEN_INT (cum->maybe_vaarg
6933 ? (cum->sse_nregs < 0
6934 ? X86_64_SSE_REGPARM_MAX
6949 /* Unnamed 256bit vector mode parameters are passed on stack. */
6955 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6957 &x86_64_int_parameter_registers [cum->regno],
6962 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6963 enum machine_mode orig_mode, bool named,
6964 HOST_WIDE_INT bytes)
6968 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6969 We use value of -2 to specify that current function call is MSABI. */
6970 if (mode == VOIDmode)
6971 return GEN_INT (-2);
6973 /* If we've run out of registers, it goes on the stack. */
6974 if (cum->nregs == 0)
6977 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6979 /* Only floating point modes are passed in anything but integer regs. */
6980 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6983 regno = cum->regno + FIRST_SSE_REG;
6988 /* Unnamed floating parameters are passed in both the
6989 SSE and integer registers. */
6990 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6991 t2 = gen_rtx_REG (mode, regno);
6992 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6993 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6994 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6997 /* Handle aggregated types passed in register. */
6998 if (orig_mode == BLKmode)
7000 if (bytes > 0 && bytes <= 8)
7001 mode = (bytes > 4 ? DImode : SImode);
7002 if (mode == BLKmode)
7006 return gen_reg_or_parallel (mode, orig_mode, regno);
7009 /* Return where to put the arguments to a function.
7010 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7012 MODE is the argument's machine mode. TYPE is the data type of the
7013 argument. It is null for libcalls where that information may not be
7014 available. CUM gives information about the preceding args and about
7015 the function being called. NAMED is nonzero if this argument is a
7016 named parameter (otherwise it is an extra parameter matching an
7020 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7021 const_tree type, bool named)
7023 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7024 enum machine_mode mode = omode;
7025 HOST_WIDE_INT bytes, words;
7028 if (mode == BLKmode)
7029 bytes = int_size_in_bytes (type);
7031 bytes = GET_MODE_SIZE (mode);
7032 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7034 /* To simplify the code below, represent vector types with a vector mode
7035 even if MMX/SSE are not active. */
7036 if (type && TREE_CODE (type) == VECTOR_TYPE)
7037 mode = type_natural_mode (type, cum);
7039 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7040 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7041 else if (TARGET_64BIT)
7042 arg = function_arg_64 (cum, mode, omode, type, named);
7044 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7046 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
7048 /* This argument uses 256bit AVX modes. */
7050 cfun->machine->callee_pass_avx256_p = true;
7052 cfun->machine->caller_pass_avx256_p = true;
7058 /* A C expression that indicates when an argument must be passed by
7059 reference. If nonzero for an argument, a copy of that argument is
7060 made in memory and a pointer to the argument is passed instead of
7061 the argument itself. The pointer is passed in whatever way is
7062 appropriate for passing a pointer to that type. */
7065 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
7066 enum machine_mode mode ATTRIBUTE_UNUSED,
7067 const_tree type, bool named ATTRIBUTE_UNUSED)
7069 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7071 /* See Windows x64 Software Convention. */
7072 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7074 int msize = (int) GET_MODE_SIZE (mode);
7077 /* Arrays are passed by reference. */
7078 if (TREE_CODE (type) == ARRAY_TYPE)
7081 if (AGGREGATE_TYPE_P (type))
7083 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7084 are passed by reference. */
7085 msize = int_size_in_bytes (type);
7089 /* __m128 is passed by reference. */
7091 case 1: case 2: case 4: case 8:
7097 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7103 /* Return true when TYPE should be 128bit aligned for 32bit argument
7104 passing ABI. XXX: This function is obsolete and is only used for
7105 checking psABI compatibility with previous versions of GCC. */
7108 ix86_compat_aligned_value_p (const_tree type)
7110 enum machine_mode mode = TYPE_MODE (type);
7111 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7115 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7117 if (TYPE_ALIGN (type) < 128)
7120 if (AGGREGATE_TYPE_P (type))
7122 /* Walk the aggregates recursively. */
7123 switch (TREE_CODE (type))
7127 case QUAL_UNION_TYPE:
7131 /* Walk all the structure fields. */
7132 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7134 if (TREE_CODE (field) == FIELD_DECL
7135 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7142 /* Just for use if some languages passes arrays by value. */
7143 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7154 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7155 XXX: This function is obsolete and is only used for checking psABI
7156 compatibility with previous versions of GCC. */
7159 ix86_compat_function_arg_boundary (enum machine_mode mode,
7160 const_tree type, unsigned int align)
7162 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7163 natural boundaries. */
7164 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7166 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7167 make an exception for SSE modes since these require 128bit
7170 The handling here differs from field_alignment. ICC aligns MMX
7171 arguments to 4 byte boundaries, while structure fields are aligned
7172 to 8 byte boundaries. */
7175 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7176 align = PARM_BOUNDARY;
7180 if (!ix86_compat_aligned_value_p (type))
7181 align = PARM_BOUNDARY;
7184 if (align > BIGGEST_ALIGNMENT)
7185 align = BIGGEST_ALIGNMENT;
7189 /* Return true when TYPE should be 128bit aligned for 32bit argument
7193 ix86_contains_aligned_value_p (const_tree type)
7195 enum machine_mode mode = TYPE_MODE (type);
7197 if (mode == XFmode || mode == XCmode)
7200 if (TYPE_ALIGN (type) < 128)
7203 if (AGGREGATE_TYPE_P (type))
7205 /* Walk the aggregates recursively. */
7206 switch (TREE_CODE (type))
7210 case QUAL_UNION_TYPE:
7214 /* Walk all the structure fields. */
7215 for (field = TYPE_FIELDS (type);
7217 field = DECL_CHAIN (field))
7219 if (TREE_CODE (field) == FIELD_DECL
7220 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7227 /* Just for use if some languages passes arrays by value. */
7228 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7237 return TYPE_ALIGN (type) >= 128;
7242 /* Gives the alignment boundary, in bits, of an argument with the
7243 specified mode and type. */
7246 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7251 /* Since the main variant type is used for call, we convert it to
7252 the main variant type. */
7253 type = TYPE_MAIN_VARIANT (type);
7254 align = TYPE_ALIGN (type);
7257 align = GET_MODE_ALIGNMENT (mode);
7258 if (align < PARM_BOUNDARY)
7259 align = PARM_BOUNDARY;
7263 unsigned int saved_align = align;
7267 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7270 if (mode == XFmode || mode == XCmode)
7271 align = PARM_BOUNDARY;
7273 else if (!ix86_contains_aligned_value_p (type))
7274 align = PARM_BOUNDARY;
7277 align = PARM_BOUNDARY;
7282 && align != ix86_compat_function_arg_boundary (mode, type,
7286 inform (input_location,
7287 "The ABI for passing parameters with %d-byte"
7288 " alignment has changed in GCC 4.6",
7289 align / BITS_PER_UNIT);
7296 /* Return true if N is a possible register number of function value. */
7299 ix86_function_value_regno_p (const unsigned int regno)
7306 case FIRST_FLOAT_REG:
7307 /* TODO: The function should depend on current function ABI but
7308 builtins.c would need updating then. Therefore we use the
7310 if (TARGET_64BIT && ix86_abi == MS_ABI)
7312 return TARGET_FLOAT_RETURNS_IN_80387;
7318 if (TARGET_MACHO || TARGET_64BIT)
7326 /* Define how to find the value returned by a function.
7327 VALTYPE is the data type of the value (as a tree).
7328 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7329 otherwise, FUNC is 0. */
7332 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7333 const_tree fntype, const_tree fn)
7337 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7338 we normally prevent this case when mmx is not available. However
7339 some ABIs may require the result to be returned like DImode. */
7340 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7341 regno = FIRST_MMX_REG;
7343 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7344 we prevent this case when sse is not available. However some ABIs
7345 may require the result to be returned like integer TImode. */
7346 else if (mode == TImode
7347 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7348 regno = FIRST_SSE_REG;
7350 /* 32-byte vector modes in %ymm0. */
7351 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7352 regno = FIRST_SSE_REG;
7354 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7355 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7356 regno = FIRST_FLOAT_REG;
7358 /* Most things go in %eax. */
7361 /* Override FP return register with %xmm0 for local functions when
7362 SSE math is enabled or for functions with sseregparm attribute. */
7363 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7365 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7366 if ((sse_level >= 1 && mode == SFmode)
7367 || (sse_level == 2 && mode == DFmode))
7368 regno = FIRST_SSE_REG;
7371 /* OImode shouldn't be used directly. */
7372 gcc_assert (mode != OImode);
7374 return gen_rtx_REG (orig_mode, regno);
7378 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7383 /* Handle libcalls, which don't provide a type node. */
7384 if (valtype == NULL)
7398 regno = FIRST_SSE_REG;
7402 regno = FIRST_FLOAT_REG;
7410 return gen_rtx_REG (mode, regno);
7412 else if (POINTER_TYPE_P (valtype))
7414 /* Pointers are always returned in Pmode. */
7418 ret = construct_container (mode, orig_mode, valtype, 1,
7419 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7420 x86_64_int_return_registers, 0);
7422 /* For zero sized structures, construct_container returns NULL, but we
7423 need to keep rest of compiler happy by returning meaningful value. */
7425 ret = gen_rtx_REG (orig_mode, AX_REG);
7431 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7433 unsigned int regno = AX_REG;
7437 switch (GET_MODE_SIZE (mode))
7440 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7441 && !COMPLEX_MODE_P (mode))
7442 regno = FIRST_SSE_REG;
7446 if (mode == SFmode || mode == DFmode)
7447 regno = FIRST_SSE_REG;
7453 return gen_rtx_REG (orig_mode, regno);
7457 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7458 enum machine_mode orig_mode, enum machine_mode mode)
7460 const_tree fn, fntype;
7463 if (fntype_or_decl && DECL_P (fntype_or_decl))
7464 fn = fntype_or_decl;
7465 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7467 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7468 return function_value_ms_64 (orig_mode, mode);
7469 else if (TARGET_64BIT)
7470 return function_value_64 (orig_mode, mode, valtype);
7472 return function_value_32 (orig_mode, mode, fntype, fn);
7476 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7477 bool outgoing ATTRIBUTE_UNUSED)
7479 enum machine_mode mode, orig_mode;
7481 orig_mode = TYPE_MODE (valtype);
7482 mode = type_natural_mode (valtype, NULL);
7483 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7486 /* Pointer function arguments and return values are promoted to Pmode. */
7488 static enum machine_mode
7489 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7490 int *punsignedp, const_tree fntype,
7493 if (type != NULL_TREE && POINTER_TYPE_P (type))
7495 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7498 return default_promote_function_mode (type, mode, punsignedp, fntype,
7503 ix86_libcall_value (enum machine_mode mode)
7505 return ix86_function_value_1 (NULL, NULL, mode, mode);
7508 /* Return true iff type is returned in memory. */
7510 static bool ATTRIBUTE_UNUSED
7511 return_in_memory_32 (const_tree type, enum machine_mode mode)
7515 if (mode == BLKmode)
7518 size = int_size_in_bytes (type);
7520 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7523 if (VECTOR_MODE_P (mode) || mode == TImode)
7525 /* User-created vectors small enough to fit in EAX. */
7529 /* MMX/3dNow values are returned in MM0,
7530 except when it doesn't exits or the ABI prescribes otherwise. */
7532 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7534 /* SSE values are returned in XMM0, except when it doesn't exist. */
7538 /* AVX values are returned in YMM0, except when it doesn't exist. */
7549 /* OImode shouldn't be used directly. */
7550 gcc_assert (mode != OImode);
7555 static bool ATTRIBUTE_UNUSED
7556 return_in_memory_64 (const_tree type, enum machine_mode mode)
7558 int needed_intregs, needed_sseregs;
7559 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7562 static bool ATTRIBUTE_UNUSED
7563 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7565 HOST_WIDE_INT size = int_size_in_bytes (type);
7567 /* __m128 is returned in xmm0. */
7568 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7569 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7572 /* Otherwise, the size must be exactly in [1248]. */
7573 return size != 1 && size != 2 && size != 4 && size != 8;
7577 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7579 #ifdef SUBTARGET_RETURN_IN_MEMORY
7580 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7582 const enum machine_mode mode = type_natural_mode (type, NULL);
7586 if (ix86_function_type_abi (fntype) == MS_ABI)
7587 return return_in_memory_ms_64 (type, mode);
7589 return return_in_memory_64 (type, mode);
7592 return return_in_memory_32 (type, mode);
7596 /* When returning SSE vector types, we have a choice of either
7597 (1) being abi incompatible with a -march switch, or
7598 (2) generating an error.
7599 Given no good solution, I think the safest thing is one warning.
7600 The user won't be able to use -Werror, but....
7602 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7603 called in response to actually generating a caller or callee that
7604 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7605 via aggregate_value_p for general type probing from tree-ssa. */
7608 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7610 static bool warnedsse, warnedmmx;
7612 if (!TARGET_64BIT && type)
7614 /* Look at the return type of the function, not the function type. */
7615 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7617 if (!TARGET_SSE && !warnedsse)
7620 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7623 warning (0, "SSE vector return without SSE enabled "
7628 if (!TARGET_MMX && !warnedmmx)
7630 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7633 warning (0, "MMX vector return without MMX enabled "
7643 /* Create the va_list data type. */
7645 /* Returns the calling convention specific va_list date type.
7646 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7649 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7651 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7653 /* For i386 we use plain pointer to argument area. */
7654 if (!TARGET_64BIT || abi == MS_ABI)
7655 return build_pointer_type (char_type_node);
7657 record = lang_hooks.types.make_type (RECORD_TYPE);
7658 type_decl = build_decl (BUILTINS_LOCATION,
7659 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7661 f_gpr = build_decl (BUILTINS_LOCATION,
7662 FIELD_DECL, get_identifier ("gp_offset"),
7663 unsigned_type_node);
7664 f_fpr = build_decl (BUILTINS_LOCATION,
7665 FIELD_DECL, get_identifier ("fp_offset"),
7666 unsigned_type_node);
7667 f_ovf = build_decl (BUILTINS_LOCATION,
7668 FIELD_DECL, get_identifier ("overflow_arg_area"),
7670 f_sav = build_decl (BUILTINS_LOCATION,
7671 FIELD_DECL, get_identifier ("reg_save_area"),
7674 va_list_gpr_counter_field = f_gpr;
7675 va_list_fpr_counter_field = f_fpr;
7677 DECL_FIELD_CONTEXT (f_gpr) = record;
7678 DECL_FIELD_CONTEXT (f_fpr) = record;
7679 DECL_FIELD_CONTEXT (f_ovf) = record;
7680 DECL_FIELD_CONTEXT (f_sav) = record;
7682 TYPE_STUB_DECL (record) = type_decl;
7683 TYPE_NAME (record) = type_decl;
7684 TYPE_FIELDS (record) = f_gpr;
7685 DECL_CHAIN (f_gpr) = f_fpr;
7686 DECL_CHAIN (f_fpr) = f_ovf;
7687 DECL_CHAIN (f_ovf) = f_sav;
7689 layout_type (record);
7691 /* The correct type is an array type of one element. */
7692 return build_array_type (record, build_index_type (size_zero_node));
7695 /* Setup the builtin va_list data type and for 64-bit the additional
7696 calling convention specific va_list data types. */
7699 ix86_build_builtin_va_list (void)
7701 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7703 /* Initialize abi specific va_list builtin types. */
7707 if (ix86_abi == MS_ABI)
7709 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7710 if (TREE_CODE (t) != RECORD_TYPE)
7711 t = build_variant_type_copy (t);
7712 sysv_va_list_type_node = t;
7717 if (TREE_CODE (t) != RECORD_TYPE)
7718 t = build_variant_type_copy (t);
7719 sysv_va_list_type_node = t;
7721 if (ix86_abi != MS_ABI)
7723 t = ix86_build_builtin_va_list_abi (MS_ABI);
7724 if (TREE_CODE (t) != RECORD_TYPE)
7725 t = build_variant_type_copy (t);
7726 ms_va_list_type_node = t;
7731 if (TREE_CODE (t) != RECORD_TYPE)
7732 t = build_variant_type_copy (t);
7733 ms_va_list_type_node = t;
7740 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7743 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7749 /* GPR size of varargs save area. */
7750 if (cfun->va_list_gpr_size)
7751 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7753 ix86_varargs_gpr_size = 0;
7755 /* FPR size of varargs save area. We don't need it if we don't pass
7756 anything in SSE registers. */
7757 if (TARGET_SSE && cfun->va_list_fpr_size)
7758 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7760 ix86_varargs_fpr_size = 0;
7762 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7765 save_area = frame_pointer_rtx;
7766 set = get_varargs_alias_set ();
7768 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7769 if (max > X86_64_REGPARM_MAX)
7770 max = X86_64_REGPARM_MAX;
7772 for (i = cum->regno; i < max; i++)
7774 mem = gen_rtx_MEM (Pmode,
7775 plus_constant (save_area, i * UNITS_PER_WORD));
7776 MEM_NOTRAP_P (mem) = 1;
7777 set_mem_alias_set (mem, set);
7778 emit_move_insn (mem, gen_rtx_REG (Pmode,
7779 x86_64_int_parameter_registers[i]));
7782 if (ix86_varargs_fpr_size)
7784 enum machine_mode smode;
7787 /* Now emit code to save SSE registers. The AX parameter contains number
7788 of SSE parameter registers used to call this function, though all we
7789 actually check here is the zero/non-zero status. */
7791 label = gen_label_rtx ();
7792 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7793 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7796 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7797 we used movdqa (i.e. TImode) instead? Perhaps even better would
7798 be if we could determine the real mode of the data, via a hook
7799 into pass_stdarg. Ignore all that for now. */
7801 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7802 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7804 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7805 if (max > X86_64_SSE_REGPARM_MAX)
7806 max = X86_64_SSE_REGPARM_MAX;
7808 for (i = cum->sse_regno; i < max; ++i)
7810 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7811 mem = gen_rtx_MEM (smode, mem);
7812 MEM_NOTRAP_P (mem) = 1;
7813 set_mem_alias_set (mem, set);
7814 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7816 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7824 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7826 alias_set_type set = get_varargs_alias_set ();
7829 /* Reset to zero, as there might be a sysv vaarg used
7831 ix86_varargs_gpr_size = 0;
7832 ix86_varargs_fpr_size = 0;
7834 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7838 mem = gen_rtx_MEM (Pmode,
7839 plus_constant (virtual_incoming_args_rtx,
7840 i * UNITS_PER_WORD));
7841 MEM_NOTRAP_P (mem) = 1;
7842 set_mem_alias_set (mem, set);
7844 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7845 emit_move_insn (mem, reg);
7850 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7851 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7854 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7855 CUMULATIVE_ARGS next_cum;
7858 /* This argument doesn't appear to be used anymore. Which is good,
7859 because the old code here didn't suppress rtl generation. */
7860 gcc_assert (!no_rtl);
7865 fntype = TREE_TYPE (current_function_decl);
7867 /* For varargs, we do not want to skip the dummy va_dcl argument.
7868 For stdargs, we do want to skip the last named argument. */
7870 if (stdarg_p (fntype))
7871 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7874 if (cum->call_abi == MS_ABI)
7875 setup_incoming_varargs_ms_64 (&next_cum);
7877 setup_incoming_varargs_64 (&next_cum);
7880 /* Checks if TYPE is of kind va_list char *. */
7883 is_va_list_char_pointer (tree type)
7887 /* For 32-bit it is always true. */
7890 canonic = ix86_canonical_va_list_type (type);
7891 return (canonic == ms_va_list_type_node
7892 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7895 /* Implement va_start. */
7898 ix86_va_start (tree valist, rtx nextarg)
7900 HOST_WIDE_INT words, n_gpr, n_fpr;
7901 tree f_gpr, f_fpr, f_ovf, f_sav;
7902 tree gpr, fpr, ovf, sav, t;
7906 if (flag_split_stack
7907 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7909 unsigned int scratch_regno;
7911 /* When we are splitting the stack, we can't refer to the stack
7912 arguments using internal_arg_pointer, because they may be on
7913 the old stack. The split stack prologue will arrange to
7914 leave a pointer to the old stack arguments in a scratch
7915 register, which we here copy to a pseudo-register. The split
7916 stack prologue can't set the pseudo-register directly because
7917 it (the prologue) runs before any registers have been saved. */
7919 scratch_regno = split_stack_prologue_scratch_regno ();
7920 if (scratch_regno != INVALID_REGNUM)
7924 reg = gen_reg_rtx (Pmode);
7925 cfun->machine->split_stack_varargs_pointer = reg;
7928 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7932 push_topmost_sequence ();
7933 emit_insn_after (seq, entry_of_function ());
7934 pop_topmost_sequence ();
7938 /* Only 64bit target needs something special. */
7939 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7941 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7942 std_expand_builtin_va_start (valist, nextarg);
7947 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7948 next = expand_binop (ptr_mode, add_optab,
7949 cfun->machine->split_stack_varargs_pointer,
7950 crtl->args.arg_offset_rtx,
7951 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7952 convert_move (va_r, next, 0);
7957 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7958 f_fpr = DECL_CHAIN (f_gpr);
7959 f_ovf = DECL_CHAIN (f_fpr);
7960 f_sav = DECL_CHAIN (f_ovf);
7962 valist = build_simple_mem_ref (valist);
7963 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7964 /* The following should be folded into the MEM_REF offset. */
7965 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7967 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7969 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7971 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7974 /* Count number of gp and fp argument registers used. */
7975 words = crtl->args.info.words;
7976 n_gpr = crtl->args.info.regno;
7977 n_fpr = crtl->args.info.sse_regno;
7979 if (cfun->va_list_gpr_size)
7981 type = TREE_TYPE (gpr);
7982 t = build2 (MODIFY_EXPR, type,
7983 gpr, build_int_cst (type, n_gpr * 8));
7984 TREE_SIDE_EFFECTS (t) = 1;
7985 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7988 if (TARGET_SSE && cfun->va_list_fpr_size)
7990 type = TREE_TYPE (fpr);
7991 t = build2 (MODIFY_EXPR, type, fpr,
7992 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7993 TREE_SIDE_EFFECTS (t) = 1;
7994 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7997 /* Find the overflow area. */
7998 type = TREE_TYPE (ovf);
7999 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8000 ovf_rtx = crtl->args.internal_arg_pointer;
8002 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8003 t = make_tree (type, ovf_rtx);
8005 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8006 t = build2 (MODIFY_EXPR, type, ovf, t);
8007 TREE_SIDE_EFFECTS (t) = 1;
8008 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8010 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8012 /* Find the register save area.
8013 Prologue of the function save it right above stack frame. */
8014 type = TREE_TYPE (sav);
8015 t = make_tree (type, frame_pointer_rtx);
8016 if (!ix86_varargs_gpr_size)
8017 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8018 t = build2 (MODIFY_EXPR, type, sav, t);
8019 TREE_SIDE_EFFECTS (t) = 1;
8020 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8024 /* Implement va_arg. */
8027 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8030 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8031 tree f_gpr, f_fpr, f_ovf, f_sav;
8032 tree gpr, fpr, ovf, sav, t;
8034 tree lab_false, lab_over = NULL_TREE;
8039 enum machine_mode nat_mode;
8040 unsigned int arg_boundary;
8042 /* Only 64bit target needs something special. */
8043 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8044 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8046 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8047 f_fpr = DECL_CHAIN (f_gpr);
8048 f_ovf = DECL_CHAIN (f_fpr);
8049 f_sav = DECL_CHAIN (f_ovf);
8051 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8052 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8053 valist = build_va_arg_indirect_ref (valist);
8054 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8055 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8056 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8058 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8060 type = build_pointer_type (type);
8061 size = int_size_in_bytes (type);
8062 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8064 nat_mode = type_natural_mode (type, NULL);
8073 /* Unnamed 256bit vector mode parameters are passed on stack. */
8074 if (!TARGET_64BIT_MS_ABI)
8081 container = construct_container (nat_mode, TYPE_MODE (type),
8082 type, 0, X86_64_REGPARM_MAX,
8083 X86_64_SSE_REGPARM_MAX, intreg,
8088 /* Pull the value out of the saved registers. */
8090 addr = create_tmp_var (ptr_type_node, "addr");
8094 int needed_intregs, needed_sseregs;
8096 tree int_addr, sse_addr;
8098 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8099 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8101 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8103 need_temp = (!REG_P (container)
8104 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8105 || TYPE_ALIGN (type) > 128));
8107 /* In case we are passing structure, verify that it is consecutive block
8108 on the register save area. If not we need to do moves. */
8109 if (!need_temp && !REG_P (container))
8111 /* Verify that all registers are strictly consecutive */
8112 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8116 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8118 rtx slot = XVECEXP (container, 0, i);
8119 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8120 || INTVAL (XEXP (slot, 1)) != i * 16)
8128 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8130 rtx slot = XVECEXP (container, 0, i);
8131 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8132 || INTVAL (XEXP (slot, 1)) != i * 8)
8144 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8145 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8148 /* First ensure that we fit completely in registers. */
8151 t = build_int_cst (TREE_TYPE (gpr),
8152 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8153 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8154 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8155 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8156 gimplify_and_add (t, pre_p);
8160 t = build_int_cst (TREE_TYPE (fpr),
8161 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8162 + X86_64_REGPARM_MAX * 8);
8163 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8164 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8165 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8166 gimplify_and_add (t, pre_p);
8169 /* Compute index to start of area used for integer regs. */
8172 /* int_addr = gpr + sav; */
8173 t = fold_build_pointer_plus (sav, gpr);
8174 gimplify_assign (int_addr, t, pre_p);
8178 /* sse_addr = fpr + sav; */
8179 t = fold_build_pointer_plus (sav, fpr);
8180 gimplify_assign (sse_addr, t, pre_p);
8184 int i, prev_size = 0;
8185 tree temp = create_tmp_var (type, "va_arg_tmp");
8188 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8189 gimplify_assign (addr, t, pre_p);
8191 for (i = 0; i < XVECLEN (container, 0); i++)
8193 rtx slot = XVECEXP (container, 0, i);
8194 rtx reg = XEXP (slot, 0);
8195 enum machine_mode mode = GET_MODE (reg);
8201 tree dest_addr, dest;
8202 int cur_size = GET_MODE_SIZE (mode);
8204 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8205 prev_size = INTVAL (XEXP (slot, 1));
8206 if (prev_size + cur_size > size)
8208 cur_size = size - prev_size;
8209 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8210 if (mode == BLKmode)
8213 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8214 if (mode == GET_MODE (reg))
8215 addr_type = build_pointer_type (piece_type);
8217 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8219 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8222 if (SSE_REGNO_P (REGNO (reg)))
8224 src_addr = sse_addr;
8225 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8229 src_addr = int_addr;
8230 src_offset = REGNO (reg) * 8;
8232 src_addr = fold_convert (addr_type, src_addr);
8233 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8235 dest_addr = fold_convert (daddr_type, addr);
8236 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8237 if (cur_size == GET_MODE_SIZE (mode))
8239 src = build_va_arg_indirect_ref (src_addr);
8240 dest = build_va_arg_indirect_ref (dest_addr);
8242 gimplify_assign (dest, src, pre_p);
8247 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8248 3, dest_addr, src_addr,
8249 size_int (cur_size));
8250 gimplify_and_add (copy, pre_p);
8252 prev_size += cur_size;
8258 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8259 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8260 gimplify_assign (gpr, t, pre_p);
8265 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8266 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8267 gimplify_assign (fpr, t, pre_p);
8270 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8272 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8275 /* ... otherwise out of the overflow area. */
8277 /* When we align parameter on stack for caller, if the parameter
8278 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8279 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8280 here with caller. */
8281 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8282 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8283 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8285 /* Care for on-stack alignment if needed. */
8286 if (arg_boundary <= 64 || size == 0)
8290 HOST_WIDE_INT align = arg_boundary / 8;
8291 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8292 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8293 build_int_cst (TREE_TYPE (t), -align));
8296 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8297 gimplify_assign (addr, t, pre_p);
8299 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8300 gimplify_assign (unshare_expr (ovf), t, pre_p);
8303 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8305 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8306 addr = fold_convert (ptrtype, addr);
8309 addr = build_va_arg_indirect_ref (addr);
8310 return build_va_arg_indirect_ref (addr);
8313 /* Return true if OPNUM's MEM should be matched
8314 in movabs* patterns. */
8317 ix86_check_movabs (rtx insn, int opnum)
8321 set = PATTERN (insn);
8322 if (GET_CODE (set) == PARALLEL)
8323 set = XVECEXP (set, 0, 0);
8324 gcc_assert (GET_CODE (set) == SET);
8325 mem = XEXP (set, opnum);
8326 while (GET_CODE (mem) == SUBREG)
8327 mem = SUBREG_REG (mem);
8328 gcc_assert (MEM_P (mem));
8329 return volatile_ok || !MEM_VOLATILE_P (mem);
8332 /* Initialize the table of extra 80387 mathematical constants. */
8335 init_ext_80387_constants (void)
8337 static const char * cst[5] =
8339 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8340 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8341 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8342 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8343 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8347 for (i = 0; i < 5; i++)
8349 real_from_string (&ext_80387_constants_table[i], cst[i]);
8350 /* Ensure each constant is rounded to XFmode precision. */
8351 real_convert (&ext_80387_constants_table[i],
8352 XFmode, &ext_80387_constants_table[i]);
8355 ext_80387_constants_init = 1;
8358 /* Return non-zero if the constant is something that
8359 can be loaded with a special instruction. */
8362 standard_80387_constant_p (rtx x)
8364 enum machine_mode mode = GET_MODE (x);
8368 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8371 if (x == CONST0_RTX (mode))
8373 if (x == CONST1_RTX (mode))
8376 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8378 /* For XFmode constants, try to find a special 80387 instruction when
8379 optimizing for size or on those CPUs that benefit from them. */
8381 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8385 if (! ext_80387_constants_init)
8386 init_ext_80387_constants ();
8388 for (i = 0; i < 5; i++)
8389 if (real_identical (&r, &ext_80387_constants_table[i]))
8393 /* Load of the constant -0.0 or -1.0 will be split as
8394 fldz;fchs or fld1;fchs sequence. */
8395 if (real_isnegzero (&r))
8397 if (real_identical (&r, &dconstm1))
8403 /* Return the opcode of the special instruction to be used to load
8407 standard_80387_constant_opcode (rtx x)
8409 switch (standard_80387_constant_p (x))
8433 /* Return the CONST_DOUBLE representing the 80387 constant that is
8434 loaded by the specified special instruction. The argument IDX
8435 matches the return value from standard_80387_constant_p. */
8438 standard_80387_constant_rtx (int idx)
8442 if (! ext_80387_constants_init)
8443 init_ext_80387_constants ();
8459 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8463 /* Return 1 if X is all 0s and 2 if x is all 1s
8464 in supported SSE/AVX vector mode. */
8467 standard_sse_constant_p (rtx x)
8469 enum machine_mode mode = GET_MODE (x);
8471 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8473 if (vector_all_ones_operand (x, mode))
8495 /* Return the opcode of the special instruction to be used to load
8499 standard_sse_constant_opcode (rtx insn, rtx x)
8501 switch (standard_sse_constant_p (x))
8504 switch (get_attr_mode (insn))
8507 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8508 return "%vpxor\t%0, %d0";
8510 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8511 return "%vxorpd\t%0, %d0";
8513 return "%vxorps\t%0, %d0";
8516 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8517 return "vpxor\t%x0, %x0, %x0";
8519 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8520 return "vxorpd\t%x0, %x0, %x0";
8522 return "vxorps\t%x0, %x0, %x0";
8530 return "vpcmpeqd\t%0, %0, %0";
8532 return "pcmpeqd\t%0, %0";
8540 /* Returns true if OP contains a symbol reference */
8543 symbolic_reference_mentioned_p (rtx op)
8548 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8551 fmt = GET_RTX_FORMAT (GET_CODE (op));
8552 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8558 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8559 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8563 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8570 /* Return true if it is appropriate to emit `ret' instructions in the
8571 body of a function. Do this only if the epilogue is simple, needing a
8572 couple of insns. Prior to reloading, we can't tell how many registers
8573 must be saved, so return false then. Return false if there is no frame
8574 marker to de-allocate. */
8577 ix86_can_use_return_insn_p (void)
8579 struct ix86_frame frame;
8581 if (! reload_completed || frame_pointer_needed)
8584 /* Don't allow more than 32k pop, since that's all we can do
8585 with one instruction. */
8586 if (crtl->args.pops_args && crtl->args.size >= 32768)
8589 ix86_compute_frame_layout (&frame);
8590 return (frame.stack_pointer_offset == UNITS_PER_WORD
8591 && (frame.nregs + frame.nsseregs) == 0);
8594 /* Value should be nonzero if functions must have frame pointers.
8595 Zero means the frame pointer need not be set up (and parms may
8596 be accessed via the stack pointer) in functions that seem suitable. */
8599 ix86_frame_pointer_required (void)
8601 /* If we accessed previous frames, then the generated code expects
8602 to be able to access the saved ebp value in our frame. */
8603 if (cfun->machine->accesses_prev_frame)
8606 /* Several x86 os'es need a frame pointer for other reasons,
8607 usually pertaining to setjmp. */
8608 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8611 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8612 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
8615 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8616 turns off the frame pointer by default. Turn it back on now if
8617 we've not got a leaf function. */
8618 if (TARGET_OMIT_LEAF_FRAME_POINTER
8619 && (!current_function_is_leaf
8620 || ix86_current_function_calls_tls_descriptor))
8623 if (crtl->profile && !flag_fentry)
8629 /* Record that the current function accesses previous call frames. */
8632 ix86_setup_frame_addresses (void)
8634 cfun->machine->accesses_prev_frame = 1;
8637 #ifndef USE_HIDDEN_LINKONCE
8638 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8639 # define USE_HIDDEN_LINKONCE 1
8641 # define USE_HIDDEN_LINKONCE 0
8645 static int pic_labels_used;
8647 /* Fills in the label name that should be used for a pc thunk for
8648 the given register. */
8651 get_pc_thunk_name (char name[32], unsigned int regno)
8653 gcc_assert (!TARGET_64BIT);
8655 if (USE_HIDDEN_LINKONCE)
8656 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
8658 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8662 /* This function generates code for -fpic that loads %ebx with
8663 the return address of the caller and then returns. */
8666 ix86_code_end (void)
8671 for (regno = AX_REG; regno <= SP_REG; regno++)
8676 if (!(pic_labels_used & (1 << regno)))
8679 get_pc_thunk_name (name, regno);
8681 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8682 get_identifier (name),
8683 build_function_type_list (void_type_node, NULL_TREE));
8684 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8685 NULL_TREE, void_type_node);
8686 TREE_PUBLIC (decl) = 1;
8687 TREE_STATIC (decl) = 1;
8692 switch_to_section (darwin_sections[text_coal_section]);
8693 fputs ("\t.weak_definition\t", asm_out_file);
8694 assemble_name (asm_out_file, name);
8695 fputs ("\n\t.private_extern\t", asm_out_file);
8696 assemble_name (asm_out_file, name);
8697 putc ('\n', asm_out_file);
8698 ASM_OUTPUT_LABEL (asm_out_file, name);
8699 DECL_WEAK (decl) = 1;
8703 if (USE_HIDDEN_LINKONCE)
8705 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8707 targetm.asm_out.unique_section (decl, 0);
8708 switch_to_section (get_named_section (decl, NULL, 0));
8710 targetm.asm_out.globalize_label (asm_out_file, name);
8711 fputs ("\t.hidden\t", asm_out_file);
8712 assemble_name (asm_out_file, name);
8713 putc ('\n', asm_out_file);
8714 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8718 switch_to_section (text_section);
8719 ASM_OUTPUT_LABEL (asm_out_file, name);
8722 DECL_INITIAL (decl) = make_node (BLOCK);
8723 current_function_decl = decl;
8724 init_function_start (decl);
8725 first_function_block_is_cold = false;
8726 /* Make sure unwind info is emitted for the thunk if needed. */
8727 final_start_function (emit_barrier (), asm_out_file, 1);
8729 /* Pad stack IP move with 4 instructions (two NOPs count
8730 as one instruction). */
8731 if (TARGET_PAD_SHORT_FUNCTION)
8736 fputs ("\tnop\n", asm_out_file);
8739 xops[0] = gen_rtx_REG (Pmode, regno);
8740 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8741 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8742 fputs ("\tret\n", asm_out_file);
8743 final_end_function ();
8744 init_insn_lengths ();
8745 free_after_compilation (cfun);
8747 current_function_decl = NULL;
8750 if (flag_split_stack)
8751 file_end_indicate_split_stack ();
8754 /* Emit code for the SET_GOT patterns. */
8757 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8763 if (TARGET_VXWORKS_RTP && flag_pic)
8765 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8766 xops[2] = gen_rtx_MEM (Pmode,
8767 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8768 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8770 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8771 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8772 an unadorned address. */
8773 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8774 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8775 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8779 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8783 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8785 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8788 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8789 is what will be referenced by the Mach-O PIC subsystem. */
8791 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8794 targetm.asm_out.internal_label (asm_out_file, "L",
8795 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8800 get_pc_thunk_name (name, REGNO (dest));
8801 pic_labels_used |= 1 << REGNO (dest);
8803 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8804 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8805 output_asm_insn ("call\t%X2", xops);
8806 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8807 is what will be referenced by the Mach-O PIC subsystem. */
8810 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8812 targetm.asm_out.internal_label (asm_out_file, "L",
8813 CODE_LABEL_NUMBER (label));
8818 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8823 /* Generate an "push" pattern for input ARG. */
8828 struct machine_function *m = cfun->machine;
8830 if (m->fs.cfa_reg == stack_pointer_rtx)
8831 m->fs.cfa_offset += UNITS_PER_WORD;
8832 m->fs.sp_offset += UNITS_PER_WORD;
8834 return gen_rtx_SET (VOIDmode,
8836 gen_rtx_PRE_DEC (Pmode,
8837 stack_pointer_rtx)),
8841 /* Generate an "pop" pattern for input ARG. */
8846 return gen_rtx_SET (VOIDmode,
8849 gen_rtx_POST_INC (Pmode,
8850 stack_pointer_rtx)));
8853 /* Return >= 0 if there is an unused call-clobbered register available
8854 for the entire function. */
8857 ix86_select_alt_pic_regnum (void)
8859 if (current_function_is_leaf
8861 && !ix86_current_function_calls_tls_descriptor)
8864 /* Can't use the same register for both PIC and DRAP. */
8866 drap = REGNO (crtl->drap_reg);
8869 for (i = 2; i >= 0; --i)
8870 if (i != drap && !df_regs_ever_live_p (i))
8874 return INVALID_REGNUM;
8877 /* Return TRUE if we need to save REGNO. */
8880 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8882 if (pic_offset_table_rtx
8883 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8884 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8886 || crtl->calls_eh_return
8887 || crtl->uses_const_pool))
8888 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8890 if (crtl->calls_eh_return && maybe_eh_return)
8895 unsigned test = EH_RETURN_DATA_REGNO (i);
8896 if (test == INVALID_REGNUM)
8903 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8906 return (df_regs_ever_live_p (regno)
8907 && !call_used_regs[regno]
8908 && !fixed_regs[regno]
8909 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8912 /* Return number of saved general prupose registers. */
8915 ix86_nsaved_regs (void)
8920 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8921 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8926 /* Return number of saved SSE registrers. */
8929 ix86_nsaved_sseregs (void)
8934 if (!TARGET_64BIT_MS_ABI)
8936 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8937 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8942 /* Given FROM and TO register numbers, say whether this elimination is
8943 allowed. If stack alignment is needed, we can only replace argument
8944 pointer with hard frame pointer, or replace frame pointer with stack
8945 pointer. Otherwise, frame pointer elimination is automatically
8946 handled and all other eliminations are valid. */
8949 ix86_can_eliminate (const int from, const int to)
8951 if (stack_realign_fp)
8952 return ((from == ARG_POINTER_REGNUM
8953 && to == HARD_FRAME_POINTER_REGNUM)
8954 || (from == FRAME_POINTER_REGNUM
8955 && to == STACK_POINTER_REGNUM));
8957 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8960 /* Return the offset between two registers, one to be eliminated, and the other
8961 its replacement, at the start of a routine. */
8964 ix86_initial_elimination_offset (int from, int to)
8966 struct ix86_frame frame;
8967 ix86_compute_frame_layout (&frame);
8969 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8970 return frame.hard_frame_pointer_offset;
8971 else if (from == FRAME_POINTER_REGNUM
8972 && to == HARD_FRAME_POINTER_REGNUM)
8973 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8976 gcc_assert (to == STACK_POINTER_REGNUM);
8978 if (from == ARG_POINTER_REGNUM)
8979 return frame.stack_pointer_offset;
8981 gcc_assert (from == FRAME_POINTER_REGNUM);
8982 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8986 /* In a dynamically-aligned function, we can't know the offset from
8987 stack pointer to frame pointer, so we must ensure that setjmp
8988 eliminates fp against the hard fp (%ebp) rather than trying to
8989 index from %esp up to the top of the frame across a gap that is
8990 of unknown (at compile-time) size. */
8992 ix86_builtin_setjmp_frame_value (void)
8994 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8997 /* When using -fsplit-stack, the allocation routines set a field in
8998 the TCB to the bottom of the stack plus this much space, measured
9001 #define SPLIT_STACK_AVAILABLE 256
9003 /* Fill structure ix86_frame about frame of currently computed function. */
9006 ix86_compute_frame_layout (struct ix86_frame *frame)
9008 unsigned int stack_alignment_needed;
9009 HOST_WIDE_INT offset;
9010 unsigned int preferred_alignment;
9011 HOST_WIDE_INT size = get_frame_size ();
9012 HOST_WIDE_INT to_allocate;
9014 frame->nregs = ix86_nsaved_regs ();
9015 frame->nsseregs = ix86_nsaved_sseregs ();
9017 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9018 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9020 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9021 function prologues and leaf. */
9022 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9023 && (!current_function_is_leaf || cfun->calls_alloca != 0
9024 || ix86_current_function_calls_tls_descriptor))
9026 preferred_alignment = 16;
9027 stack_alignment_needed = 16;
9028 crtl->preferred_stack_boundary = 128;
9029 crtl->stack_alignment_needed = 128;
9032 gcc_assert (!size || stack_alignment_needed);
9033 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9034 gcc_assert (preferred_alignment <= stack_alignment_needed);
9036 /* For SEH we have to limit the amount of code movement into the prologue.
9037 At present we do this via a BLOCKAGE, at which point there's very little
9038 scheduling that can be done, which means that there's very little point
9039 in doing anything except PUSHs. */
9041 cfun->machine->use_fast_prologue_epilogue = false;
9043 /* During reload iteration the amount of registers saved can change.
9044 Recompute the value as needed. Do not recompute when amount of registers
9045 didn't change as reload does multiple calls to the function and does not
9046 expect the decision to change within single iteration. */
9047 else if (!optimize_function_for_size_p (cfun)
9048 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9050 int count = frame->nregs;
9051 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9053 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9055 /* The fast prologue uses move instead of push to save registers. This
9056 is significantly longer, but also executes faster as modern hardware
9057 can execute the moves in parallel, but can't do that for push/pop.
9059 Be careful about choosing what prologue to emit: When function takes
9060 many instructions to execute we may use slow version as well as in
9061 case function is known to be outside hot spot (this is known with
9062 feedback only). Weight the size of function by number of registers
9063 to save as it is cheap to use one or two push instructions but very
9064 slow to use many of them. */
9066 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9067 if (node->frequency < NODE_FREQUENCY_NORMAL
9068 || (flag_branch_probabilities
9069 && node->frequency < NODE_FREQUENCY_HOT))
9070 cfun->machine->use_fast_prologue_epilogue = false;
9072 cfun->machine->use_fast_prologue_epilogue
9073 = !expensive_function_p (count);
9076 frame->save_regs_using_mov
9077 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9078 /* If static stack checking is enabled and done with probes,
9079 the registers need to be saved before allocating the frame. */
9080 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9082 /* Skip return address. */
9083 offset = UNITS_PER_WORD;
9085 /* Skip pushed static chain. */
9086 if (ix86_static_chain_on_stack)
9087 offset += UNITS_PER_WORD;
9089 /* Skip saved base pointer. */
9090 if (frame_pointer_needed)
9091 offset += UNITS_PER_WORD;
9092 frame->hfp_save_offset = offset;
9094 /* The traditional frame pointer location is at the top of the frame. */
9095 frame->hard_frame_pointer_offset = offset;
9097 /* Register save area */
9098 offset += frame->nregs * UNITS_PER_WORD;
9099 frame->reg_save_offset = offset;
9101 /* Align and set SSE register save area. */
9102 if (frame->nsseregs)
9104 /* The only ABI that has saved SSE registers (Win64) also has a
9105 16-byte aligned default stack, and thus we don't need to be
9106 within the re-aligned local stack frame to save them. */
9107 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9108 offset = (offset + 16 - 1) & -16;
9109 offset += frame->nsseregs * 16;
9111 frame->sse_reg_save_offset = offset;
9113 /* The re-aligned stack starts here. Values before this point are not
9114 directly comparable with values below this point. In order to make
9115 sure that no value happens to be the same before and after, force
9116 the alignment computation below to add a non-zero value. */
9117 if (stack_realign_fp)
9118 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9121 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9122 offset += frame->va_arg_size;
9124 /* Align start of frame for local function. */
9125 if (stack_realign_fp
9126 || offset != frame->sse_reg_save_offset
9128 || !current_function_is_leaf
9129 || cfun->calls_alloca
9130 || ix86_current_function_calls_tls_descriptor)
9131 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9133 /* Frame pointer points here. */
9134 frame->frame_pointer_offset = offset;
9138 /* Add outgoing arguments area. Can be skipped if we eliminated
9139 all the function calls as dead code.
9140 Skipping is however impossible when function calls alloca. Alloca
9141 expander assumes that last crtl->outgoing_args_size
9142 of stack frame are unused. */
9143 if (ACCUMULATE_OUTGOING_ARGS
9144 && (!current_function_is_leaf || cfun->calls_alloca
9145 || ix86_current_function_calls_tls_descriptor))
9147 offset += crtl->outgoing_args_size;
9148 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9151 frame->outgoing_arguments_size = 0;
9153 /* Align stack boundary. Only needed if we're calling another function
9155 if (!current_function_is_leaf || cfun->calls_alloca
9156 || ix86_current_function_calls_tls_descriptor)
9157 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9159 /* We've reached end of stack frame. */
9160 frame->stack_pointer_offset = offset;
9162 /* Size prologue needs to allocate. */
9163 to_allocate = offset - frame->sse_reg_save_offset;
9165 if ((!to_allocate && frame->nregs <= 1)
9166 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9167 frame->save_regs_using_mov = false;
9169 if (ix86_using_red_zone ()
9170 && current_function_sp_is_unchanging
9171 && current_function_is_leaf
9172 && !ix86_current_function_calls_tls_descriptor)
9174 frame->red_zone_size = to_allocate;
9175 if (frame->save_regs_using_mov)
9176 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9177 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9178 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9181 frame->red_zone_size = 0;
9182 frame->stack_pointer_offset -= frame->red_zone_size;
9184 /* The SEH frame pointer location is near the bottom of the frame.
9185 This is enforced by the fact that the difference between the
9186 stack pointer and the frame pointer is limited to 240 bytes in
9187 the unwind data structure. */
9192 /* If we can leave the frame pointer where it is, do so. */
9193 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9194 if (diff > 240 || (diff & 15) != 0)
9196 /* Ideally we'd determine what portion of the local stack frame
9197 (within the constraint of the lowest 240) is most heavily used.
9198 But without that complication, simply bias the frame pointer
9199 by 128 bytes so as to maximize the amount of the local stack
9200 frame that is addressable with 8-bit offsets. */
9201 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9206 /* This is semi-inlined memory_address_length, but simplified
9207 since we know that we're always dealing with reg+offset, and
9208 to avoid having to create and discard all that rtl. */
9211 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9217 /* EBP and R13 cannot be encoded without an offset. */
9218 len = (regno == BP_REG || regno == R13_REG);
9220 else if (IN_RANGE (offset, -128, 127))
9223 /* ESP and R12 must be encoded with a SIB byte. */
9224 if (regno == SP_REG || regno == R12_REG)
9230 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9231 The valid base registers are taken from CFUN->MACHINE->FS. */
9234 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9236 const struct machine_function *m = cfun->machine;
9237 rtx base_reg = NULL;
9238 HOST_WIDE_INT base_offset = 0;
9240 if (m->use_fast_prologue_epilogue)
9242 /* Choose the base register most likely to allow the most scheduling
9243 opportunities. Generally FP is valid througout the function,
9244 while DRAP must be reloaded within the epilogue. But choose either
9245 over the SP due to increased encoding size. */
9249 base_reg = hard_frame_pointer_rtx;
9250 base_offset = m->fs.fp_offset - cfa_offset;
9252 else if (m->fs.drap_valid)
9254 base_reg = crtl->drap_reg;
9255 base_offset = 0 - cfa_offset;
9257 else if (m->fs.sp_valid)
9259 base_reg = stack_pointer_rtx;
9260 base_offset = m->fs.sp_offset - cfa_offset;
9265 HOST_WIDE_INT toffset;
9268 /* Choose the base register with the smallest address encoding.
9269 With a tie, choose FP > DRAP > SP. */
9272 base_reg = stack_pointer_rtx;
9273 base_offset = m->fs.sp_offset - cfa_offset;
9274 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9276 if (m->fs.drap_valid)
9278 toffset = 0 - cfa_offset;
9279 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9282 base_reg = crtl->drap_reg;
9283 base_offset = toffset;
9289 toffset = m->fs.fp_offset - cfa_offset;
9290 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9293 base_reg = hard_frame_pointer_rtx;
9294 base_offset = toffset;
9299 gcc_assert (base_reg != NULL);
9301 return plus_constant (base_reg, base_offset);
9304 /* Emit code to save registers in the prologue. */
9307 ix86_emit_save_regs (void)
9312 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9313 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9315 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9316 RTX_FRAME_RELATED_P (insn) = 1;
9320 /* Emit a single register save at CFA - CFA_OFFSET. */
9323 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9324 HOST_WIDE_INT cfa_offset)
9326 struct machine_function *m = cfun->machine;
9327 rtx reg = gen_rtx_REG (mode, regno);
9328 rtx mem, addr, base, insn;
9330 addr = choose_baseaddr (cfa_offset);
9331 mem = gen_frame_mem (mode, addr);
9333 /* For SSE saves, we need to indicate the 128-bit alignment. */
9334 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9336 insn = emit_move_insn (mem, reg);
9337 RTX_FRAME_RELATED_P (insn) = 1;
9340 if (GET_CODE (base) == PLUS)
9341 base = XEXP (base, 0);
9342 gcc_checking_assert (REG_P (base));
9344 /* When saving registers into a re-aligned local stack frame, avoid
9345 any tricky guessing by dwarf2out. */
9346 if (m->fs.realigned)
9348 gcc_checking_assert (stack_realign_drap);
9350 if (regno == REGNO (crtl->drap_reg))
9352 /* A bit of a hack. We force the DRAP register to be saved in
9353 the re-aligned stack frame, which provides us with a copy
9354 of the CFA that will last past the prologue. Install it. */
9355 gcc_checking_assert (cfun->machine->fs.fp_valid);
9356 addr = plus_constant (hard_frame_pointer_rtx,
9357 cfun->machine->fs.fp_offset - cfa_offset);
9358 mem = gen_rtx_MEM (mode, addr);
9359 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9363 /* The frame pointer is a stable reference within the
9364 aligned frame. Use it. */
9365 gcc_checking_assert (cfun->machine->fs.fp_valid);
9366 addr = plus_constant (hard_frame_pointer_rtx,
9367 cfun->machine->fs.fp_offset - cfa_offset);
9368 mem = gen_rtx_MEM (mode, addr);
9369 add_reg_note (insn, REG_CFA_EXPRESSION,
9370 gen_rtx_SET (VOIDmode, mem, reg));
9374 /* The memory may not be relative to the current CFA register,
9375 which means that we may need to generate a new pattern for
9376 use by the unwind info. */
9377 else if (base != m->fs.cfa_reg)
9379 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9380 mem = gen_rtx_MEM (mode, addr);
9381 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9385 /* Emit code to save registers using MOV insns.
9386 First register is stored at CFA - CFA_OFFSET. */
9388 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9392 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9393 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9395 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9396 cfa_offset -= UNITS_PER_WORD;
9400 /* Emit code to save SSE registers using MOV insns.
9401 First register is stored at CFA - CFA_OFFSET. */
9403 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9407 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9408 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9410 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9415 static GTY(()) rtx queued_cfa_restores;
9417 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9418 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9419 Don't add the note if the previously saved value will be left untouched
9420 within stack red-zone till return, as unwinders can find the same value
9421 in the register and on the stack. */
9424 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9426 if (!crtl->shrink_wrapped
9427 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9432 add_reg_note (insn, REG_CFA_RESTORE, reg);
9433 RTX_FRAME_RELATED_P (insn) = 1;
9437 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9440 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9443 ix86_add_queued_cfa_restore_notes (rtx insn)
9446 if (!queued_cfa_restores)
9448 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9450 XEXP (last, 1) = REG_NOTES (insn);
9451 REG_NOTES (insn) = queued_cfa_restores;
9452 queued_cfa_restores = NULL_RTX;
9453 RTX_FRAME_RELATED_P (insn) = 1;
9456 /* Expand prologue or epilogue stack adjustment.
9457 The pattern exist to put a dependency on all ebp-based memory accesses.
9458 STYLE should be negative if instructions should be marked as frame related,
9459 zero if %r11 register is live and cannot be freely used and positive
9463 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9464 int style, bool set_cfa)
9466 struct machine_function *m = cfun->machine;
9468 bool add_frame_related_expr = false;
9471 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9472 else if (x86_64_immediate_operand (offset, DImode))
9473 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9477 /* r11 is used by indirect sibcall return as well, set before the
9478 epilogue and used after the epilogue. */
9480 tmp = gen_rtx_REG (DImode, R11_REG);
9483 gcc_assert (src != hard_frame_pointer_rtx
9484 && dest != hard_frame_pointer_rtx);
9485 tmp = hard_frame_pointer_rtx;
9487 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9489 add_frame_related_expr = true;
9491 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9494 insn = emit_insn (insn);
9496 ix86_add_queued_cfa_restore_notes (insn);
9502 gcc_assert (m->fs.cfa_reg == src);
9503 m->fs.cfa_offset += INTVAL (offset);
9504 m->fs.cfa_reg = dest;
9506 r = gen_rtx_PLUS (Pmode, src, offset);
9507 r = gen_rtx_SET (VOIDmode, dest, r);
9508 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9509 RTX_FRAME_RELATED_P (insn) = 1;
9513 RTX_FRAME_RELATED_P (insn) = 1;
9514 if (add_frame_related_expr)
9516 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9517 r = gen_rtx_SET (VOIDmode, dest, r);
9518 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9522 if (dest == stack_pointer_rtx)
9524 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9525 bool valid = m->fs.sp_valid;
9527 if (src == hard_frame_pointer_rtx)
9529 valid = m->fs.fp_valid;
9530 ooffset = m->fs.fp_offset;
9532 else if (src == crtl->drap_reg)
9534 valid = m->fs.drap_valid;
9539 /* Else there are two possibilities: SP itself, which we set
9540 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9541 taken care of this by hand along the eh_return path. */
9542 gcc_checking_assert (src == stack_pointer_rtx
9543 || offset == const0_rtx);
9546 m->fs.sp_offset = ooffset - INTVAL (offset);
9547 m->fs.sp_valid = valid;
9551 /* Find an available register to be used as dynamic realign argument
9552 pointer regsiter. Such a register will be written in prologue and
9553 used in begin of body, so it must not be
9554 1. parameter passing register.
9556 We reuse static-chain register if it is available. Otherwise, we
9557 use DI for i386 and R13 for x86-64. We chose R13 since it has
9560 Return: the regno of chosen register. */
9563 find_drap_reg (void)
9565 tree decl = cfun->decl;
9569 /* Use R13 for nested function or function need static chain.
9570 Since function with tail call may use any caller-saved
9571 registers in epilogue, DRAP must not use caller-saved
9572 register in such case. */
9573 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9580 /* Use DI for nested function or function need static chain.
9581 Since function with tail call may use any caller-saved
9582 registers in epilogue, DRAP must not use caller-saved
9583 register in such case. */
9584 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9587 /* Reuse static chain register if it isn't used for parameter
9589 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9591 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9592 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9599 /* Return minimum incoming stack alignment. */
9602 ix86_minimum_incoming_stack_boundary (bool sibcall)
9604 unsigned int incoming_stack_boundary;
9606 /* Prefer the one specified at command line. */
9607 if (ix86_user_incoming_stack_boundary)
9608 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9609 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9610 if -mstackrealign is used, it isn't used for sibcall check and
9611 estimated stack alignment is 128bit. */
9614 && ix86_force_align_arg_pointer
9615 && crtl->stack_alignment_estimated == 128)
9616 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9618 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9620 /* Incoming stack alignment can be changed on individual functions
9621 via force_align_arg_pointer attribute. We use the smallest
9622 incoming stack boundary. */
9623 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9624 && lookup_attribute (ix86_force_align_arg_pointer_string,
9625 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9626 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9628 /* The incoming stack frame has to be aligned at least at
9629 parm_stack_boundary. */
9630 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9631 incoming_stack_boundary = crtl->parm_stack_boundary;
9633 /* Stack at entrance of main is aligned by runtime. We use the
9634 smallest incoming stack boundary. */
9635 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9636 && DECL_NAME (current_function_decl)
9637 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9638 && DECL_FILE_SCOPE_P (current_function_decl))
9639 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9641 return incoming_stack_boundary;
9644 /* Update incoming stack boundary and estimated stack alignment. */
9647 ix86_update_stack_boundary (void)
9649 ix86_incoming_stack_boundary
9650 = ix86_minimum_incoming_stack_boundary (false);
9652 /* x86_64 vararg needs 16byte stack alignment for register save
9656 && crtl->stack_alignment_estimated < 128)
9657 crtl->stack_alignment_estimated = 128;
9660 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9661 needed or an rtx for DRAP otherwise. */
9664 ix86_get_drap_rtx (void)
9666 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9667 crtl->need_drap = true;
9669 if (stack_realign_drap)
9671 /* Assign DRAP to vDRAP and returns vDRAP */
9672 unsigned int regno = find_drap_reg ();
9677 arg_ptr = gen_rtx_REG (Pmode, regno);
9678 crtl->drap_reg = arg_ptr;
9681 drap_vreg = copy_to_reg (arg_ptr);
9685 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9688 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9689 RTX_FRAME_RELATED_P (insn) = 1;
9697 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9700 ix86_internal_arg_pointer (void)
9702 return virtual_incoming_args_rtx;
9705 struct scratch_reg {
9710 /* Return a short-lived scratch register for use on function entry.
9711 In 32-bit mode, it is valid only after the registers are saved
9712 in the prologue. This register must be released by means of
9713 release_scratch_register_on_entry once it is dead. */
9716 get_scratch_register_on_entry (struct scratch_reg *sr)
9724 /* We always use R11 in 64-bit mode. */
9729 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9731 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9732 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9733 int regparm = ix86_function_regparm (fntype, decl);
9735 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9737 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9738 for the static chain register. */
9739 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9740 && drap_regno != AX_REG)
9742 else if (regparm < 2 && drap_regno != DX_REG)
9744 /* ecx is the static chain register. */
9745 else if (regparm < 3 && !fastcall_p && !static_chain_p
9746 && drap_regno != CX_REG)
9748 else if (ix86_save_reg (BX_REG, true))
9750 /* esi is the static chain register. */
9751 else if (!(regparm == 3 && static_chain_p)
9752 && ix86_save_reg (SI_REG, true))
9754 else if (ix86_save_reg (DI_REG, true))
9758 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9763 sr->reg = gen_rtx_REG (Pmode, regno);
9766 rtx insn = emit_insn (gen_push (sr->reg));
9767 RTX_FRAME_RELATED_P (insn) = 1;
9771 /* Release a scratch register obtained from the preceding function. */
9774 release_scratch_register_on_entry (struct scratch_reg *sr)
9778 rtx x, insn = emit_insn (gen_pop (sr->reg));
9780 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9781 RTX_FRAME_RELATED_P (insn) = 1;
9782 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9783 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9784 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9788 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9790 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9793 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9795 /* We skip the probe for the first interval + a small dope of 4 words and
9796 probe that many bytes past the specified size to maintain a protection
9797 area at the botton of the stack. */
9798 const int dope = 4 * UNITS_PER_WORD;
9799 rtx size_rtx = GEN_INT (size), last;
9801 /* See if we have a constant small number of probes to generate. If so,
9802 that's the easy case. The run-time loop is made up of 11 insns in the
9803 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9804 for n # of intervals. */
9805 if (size <= 5 * PROBE_INTERVAL)
9807 HOST_WIDE_INT i, adjust;
9808 bool first_probe = true;
9810 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9811 values of N from 1 until it exceeds SIZE. If only one probe is
9812 needed, this will not generate any code. Then adjust and probe
9813 to PROBE_INTERVAL + SIZE. */
9814 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9818 adjust = 2 * PROBE_INTERVAL + dope;
9819 first_probe = false;
9822 adjust = PROBE_INTERVAL;
9824 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9825 plus_constant (stack_pointer_rtx, -adjust)));
9826 emit_stack_probe (stack_pointer_rtx);
9830 adjust = size + PROBE_INTERVAL + dope;
9832 adjust = size + PROBE_INTERVAL - i;
9834 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9835 plus_constant (stack_pointer_rtx, -adjust)));
9836 emit_stack_probe (stack_pointer_rtx);
9838 /* Adjust back to account for the additional first interval. */
9839 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9840 plus_constant (stack_pointer_rtx,
9841 PROBE_INTERVAL + dope)));
9844 /* Otherwise, do the same as above, but in a loop. Note that we must be
9845 extra careful with variables wrapping around because we might be at
9846 the very top (or the very bottom) of the address space and we have
9847 to be able to handle this case properly; in particular, we use an
9848 equality test for the loop condition. */
9851 HOST_WIDE_INT rounded_size;
9852 struct scratch_reg sr;
9854 get_scratch_register_on_entry (&sr);
9857 /* Step 1: round SIZE to the previous multiple of the interval. */
9859 rounded_size = size & -PROBE_INTERVAL;
9862 /* Step 2: compute initial and final value of the loop counter. */
9864 /* SP = SP_0 + PROBE_INTERVAL. */
9865 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9866 plus_constant (stack_pointer_rtx,
9867 - (PROBE_INTERVAL + dope))));
9869 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9870 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9871 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9872 gen_rtx_PLUS (Pmode, sr.reg,
9873 stack_pointer_rtx)));
9878 while (SP != LAST_ADDR)
9880 SP = SP + PROBE_INTERVAL
9884 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9885 values of N from 1 until it is equal to ROUNDED_SIZE. */
9887 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9890 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9891 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9893 if (size != rounded_size)
9895 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9896 plus_constant (stack_pointer_rtx,
9897 rounded_size - size)));
9898 emit_stack_probe (stack_pointer_rtx);
9901 /* Adjust back to account for the additional first interval. */
9902 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9903 plus_constant (stack_pointer_rtx,
9904 PROBE_INTERVAL + dope)));
9906 release_scratch_register_on_entry (&sr);
9909 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9911 /* Even if the stack pointer isn't the CFA register, we need to correctly
9912 describe the adjustments made to it, in particular differentiate the
9913 frame-related ones from the frame-unrelated ones. */
9916 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9917 XVECEXP (expr, 0, 0)
9918 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9919 plus_constant (stack_pointer_rtx, -size));
9920 XVECEXP (expr, 0, 1)
9921 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9922 plus_constant (stack_pointer_rtx,
9923 PROBE_INTERVAL + dope + size));
9924 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9925 RTX_FRAME_RELATED_P (last) = 1;
9927 cfun->machine->fs.sp_offset += size;
9930 /* Make sure nothing is scheduled before we are done. */
9931 emit_insn (gen_blockage ());
9934 /* Adjust the stack pointer up to REG while probing it. */
9937 output_adjust_stack_and_probe (rtx reg)
9939 static int labelno = 0;
9940 char loop_lab[32], end_lab[32];
9943 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9944 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9946 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9948 /* Jump to END_LAB if SP == LAST_ADDR. */
9949 xops[0] = stack_pointer_rtx;
9951 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9952 fputs ("\tje\t", asm_out_file);
9953 assemble_name_raw (asm_out_file, end_lab);
9954 fputc ('\n', asm_out_file);
9956 /* SP = SP + PROBE_INTERVAL. */
9957 xops[1] = GEN_INT (PROBE_INTERVAL);
9958 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9961 xops[1] = const0_rtx;
9962 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9964 fprintf (asm_out_file, "\tjmp\t");
9965 assemble_name_raw (asm_out_file, loop_lab);
9966 fputc ('\n', asm_out_file);
9968 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9973 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9974 inclusive. These are offsets from the current stack pointer. */
9977 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9979 /* See if we have a constant small number of probes to generate. If so,
9980 that's the easy case. The run-time loop is made up of 7 insns in the
9981 generic case while the compile-time loop is made up of n insns for n #
9983 if (size <= 7 * PROBE_INTERVAL)
9987 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9988 it exceeds SIZE. If only one probe is needed, this will not
9989 generate any code. Then probe at FIRST + SIZE. */
9990 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9991 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9993 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9996 /* Otherwise, do the same as above, but in a loop. Note that we must be
9997 extra careful with variables wrapping around because we might be at
9998 the very top (or the very bottom) of the address space and we have
9999 to be able to handle this case properly; in particular, we use an
10000 equality test for the loop condition. */
10003 HOST_WIDE_INT rounded_size, last;
10004 struct scratch_reg sr;
10006 get_scratch_register_on_entry (&sr);
10009 /* Step 1: round SIZE to the previous multiple of the interval. */
10011 rounded_size = size & -PROBE_INTERVAL;
10014 /* Step 2: compute initial and final value of the loop counter. */
10016 /* TEST_OFFSET = FIRST. */
10017 emit_move_insn (sr.reg, GEN_INT (-first));
10019 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10020 last = first + rounded_size;
10023 /* Step 3: the loop
10025 while (TEST_ADDR != LAST_ADDR)
10027 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10031 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10032 until it is equal to ROUNDED_SIZE. */
10034 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10037 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10038 that SIZE is equal to ROUNDED_SIZE. */
10040 if (size != rounded_size)
10041 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
10044 rounded_size - size));
10046 release_scratch_register_on_entry (&sr);
10049 /* Make sure nothing is scheduled before we are done. */
10050 emit_insn (gen_blockage ());
10053 /* Probe a range of stack addresses from REG to END, inclusive. These are
10054 offsets from the current stack pointer. */
10057 output_probe_stack_range (rtx reg, rtx end)
10059 static int labelno = 0;
10060 char loop_lab[32], end_lab[32];
10063 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10064 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10066 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10068 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10071 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10072 fputs ("\tje\t", asm_out_file);
10073 assemble_name_raw (asm_out_file, end_lab);
10074 fputc ('\n', asm_out_file);
10076 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10077 xops[1] = GEN_INT (PROBE_INTERVAL);
10078 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10080 /* Probe at TEST_ADDR. */
10081 xops[0] = stack_pointer_rtx;
10083 xops[2] = const0_rtx;
10084 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10086 fprintf (asm_out_file, "\tjmp\t");
10087 assemble_name_raw (asm_out_file, loop_lab);
10088 fputc ('\n', asm_out_file);
10090 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10095 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10096 to be generated in correct form. */
10098 ix86_finalize_stack_realign_flags (void)
10100 /* Check if stack realign is really needed after reload, and
10101 stores result in cfun */
10102 unsigned int incoming_stack_boundary
10103 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10104 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10105 unsigned int stack_realign = (incoming_stack_boundary
10106 < (current_function_is_leaf
10107 ? crtl->max_used_stack_slot_alignment
10108 : crtl->stack_alignment_needed));
10110 if (crtl->stack_realign_finalized)
10112 /* After stack_realign_needed is finalized, we can't no longer
10114 gcc_assert (crtl->stack_realign_needed == stack_realign);
10118 /* If the only reason for frame_pointer_needed is that we conservatively
10119 assumed stack realignment might be needed, but in the end nothing that
10120 needed the stack alignment had been spilled, clear frame_pointer_needed
10121 and say we don't need stack realignment. */
10123 && !crtl->need_drap
10124 && frame_pointer_needed
10125 && current_function_is_leaf
10126 && flag_omit_frame_pointer
10127 && current_function_sp_is_unchanging
10128 && !ix86_current_function_calls_tls_descriptor
10129 && !crtl->accesses_prior_frames
10130 && !cfun->calls_alloca
10131 && !crtl->calls_eh_return
10132 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10133 && !ix86_frame_pointer_required ()
10134 && get_frame_size () == 0
10135 && ix86_nsaved_sseregs () == 0
10136 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10138 HARD_REG_SET set_up_by_prologue, prologue_used;
10141 CLEAR_HARD_REG_SET (prologue_used);
10142 CLEAR_HARD_REG_SET (set_up_by_prologue);
10143 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10144 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10145 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10146 HARD_FRAME_POINTER_REGNUM);
10150 FOR_BB_INSNS (bb, insn)
10151 if (NONDEBUG_INSN_P (insn)
10152 && requires_stack_frame_p (insn, prologue_used,
10153 set_up_by_prologue))
10155 crtl->stack_realign_needed = stack_realign;
10156 crtl->stack_realign_finalized = true;
10161 frame_pointer_needed = false;
10162 stack_realign = false;
10163 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10164 crtl->stack_alignment_needed = incoming_stack_boundary;
10165 crtl->stack_alignment_estimated = incoming_stack_boundary;
10166 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10167 crtl->preferred_stack_boundary = incoming_stack_boundary;
10168 df_finish_pass (true);
10169 df_scan_alloc (NULL);
10171 df_compute_regs_ever_live (true);
10175 crtl->stack_realign_needed = stack_realign;
10176 crtl->stack_realign_finalized = true;
10179 /* Expand the prologue into a bunch of separate insns. */
10182 ix86_expand_prologue (void)
10184 struct machine_function *m = cfun->machine;
10187 struct ix86_frame frame;
10188 HOST_WIDE_INT allocate;
10189 bool int_registers_saved;
10191 ix86_finalize_stack_realign_flags ();
10193 /* DRAP should not coexist with stack_realign_fp */
10194 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10196 memset (&m->fs, 0, sizeof (m->fs));
10198 /* Initialize CFA state for before the prologue. */
10199 m->fs.cfa_reg = stack_pointer_rtx;
10200 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10202 /* Track SP offset to the CFA. We continue tracking this after we've
10203 swapped the CFA register away from SP. In the case of re-alignment
10204 this is fudged; we're interested to offsets within the local frame. */
10205 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10206 m->fs.sp_valid = true;
10208 ix86_compute_frame_layout (&frame);
10210 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10212 /* We should have already generated an error for any use of
10213 ms_hook on a nested function. */
10214 gcc_checking_assert (!ix86_static_chain_on_stack);
10216 /* Check if profiling is active and we shall use profiling before
10217 prologue variant. If so sorry. */
10218 if (crtl->profile && flag_fentry != 0)
10219 sorry ("ms_hook_prologue attribute isn%'t compatible "
10220 "with -mfentry for 32-bit");
10222 /* In ix86_asm_output_function_label we emitted:
10223 8b ff movl.s %edi,%edi
10225 8b ec movl.s %esp,%ebp
10227 This matches the hookable function prologue in Win32 API
10228 functions in Microsoft Windows XP Service Pack 2 and newer.
10229 Wine uses this to enable Windows apps to hook the Win32 API
10230 functions provided by Wine.
10232 What that means is that we've already set up the frame pointer. */
10234 if (frame_pointer_needed
10235 && !(crtl->drap_reg && crtl->stack_realign_needed))
10239 /* We've decided to use the frame pointer already set up.
10240 Describe this to the unwinder by pretending that both
10241 push and mov insns happen right here.
10243 Putting the unwind info here at the end of the ms_hook
10244 is done so that we can make absolutely certain we get
10245 the required byte sequence at the start of the function,
10246 rather than relying on an assembler that can produce
10247 the exact encoding required.
10249 However it does mean (in the unpatched case) that we have
10250 a 1 insn window where the asynchronous unwind info is
10251 incorrect. However, if we placed the unwind info at
10252 its correct location we would have incorrect unwind info
10253 in the patched case. Which is probably all moot since
10254 I don't expect Wine generates dwarf2 unwind info for the
10255 system libraries that use this feature. */
10257 insn = emit_insn (gen_blockage ());
10259 push = gen_push (hard_frame_pointer_rtx);
10260 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10261 stack_pointer_rtx);
10262 RTX_FRAME_RELATED_P (push) = 1;
10263 RTX_FRAME_RELATED_P (mov) = 1;
10265 RTX_FRAME_RELATED_P (insn) = 1;
10266 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10267 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10269 /* Note that gen_push incremented m->fs.cfa_offset, even
10270 though we didn't emit the push insn here. */
10271 m->fs.cfa_reg = hard_frame_pointer_rtx;
10272 m->fs.fp_offset = m->fs.cfa_offset;
10273 m->fs.fp_valid = true;
10277 /* The frame pointer is not needed so pop %ebp again.
10278 This leaves us with a pristine state. */
10279 emit_insn (gen_pop (hard_frame_pointer_rtx));
10283 /* The first insn of a function that accepts its static chain on the
10284 stack is to push the register that would be filled in by a direct
10285 call. This insn will be skipped by the trampoline. */
10286 else if (ix86_static_chain_on_stack)
10288 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10289 emit_insn (gen_blockage ());
10291 /* We don't want to interpret this push insn as a register save,
10292 only as a stack adjustment. The real copy of the register as
10293 a save will be done later, if needed. */
10294 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10295 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10296 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10297 RTX_FRAME_RELATED_P (insn) = 1;
10300 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10301 of DRAP is needed and stack realignment is really needed after reload */
10302 if (stack_realign_drap)
10304 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10306 /* Only need to push parameter pointer reg if it is caller saved. */
10307 if (!call_used_regs[REGNO (crtl->drap_reg)])
10309 /* Push arg pointer reg */
10310 insn = emit_insn (gen_push (crtl->drap_reg));
10311 RTX_FRAME_RELATED_P (insn) = 1;
10314 /* Grab the argument pointer. */
10315 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10316 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10317 RTX_FRAME_RELATED_P (insn) = 1;
10318 m->fs.cfa_reg = crtl->drap_reg;
10319 m->fs.cfa_offset = 0;
10321 /* Align the stack. */
10322 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10324 GEN_INT (-align_bytes)));
10325 RTX_FRAME_RELATED_P (insn) = 1;
10327 /* Replicate the return address on the stack so that return
10328 address can be reached via (argp - 1) slot. This is needed
10329 to implement macro RETURN_ADDR_RTX and intrinsic function
10330 expand_builtin_return_addr etc. */
10331 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10332 t = gen_frame_mem (Pmode, t);
10333 insn = emit_insn (gen_push (t));
10334 RTX_FRAME_RELATED_P (insn) = 1;
10336 /* For the purposes of frame and register save area addressing,
10337 we've started over with a new frame. */
10338 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10339 m->fs.realigned = true;
10342 if (frame_pointer_needed && !m->fs.fp_valid)
10344 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10345 slower on all targets. Also sdb doesn't like it. */
10346 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10347 RTX_FRAME_RELATED_P (insn) = 1;
10349 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10351 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10352 RTX_FRAME_RELATED_P (insn) = 1;
10354 if (m->fs.cfa_reg == stack_pointer_rtx)
10355 m->fs.cfa_reg = hard_frame_pointer_rtx;
10356 m->fs.fp_offset = m->fs.sp_offset;
10357 m->fs.fp_valid = true;
10361 int_registers_saved = (frame.nregs == 0);
10363 if (!int_registers_saved)
10365 /* If saving registers via PUSH, do so now. */
10366 if (!frame.save_regs_using_mov)
10368 ix86_emit_save_regs ();
10369 int_registers_saved = true;
10370 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10373 /* When using red zone we may start register saving before allocating
10374 the stack frame saving one cycle of the prologue. However, avoid
10375 doing this if we have to probe the stack; at least on x86_64 the
10376 stack probe can turn into a call that clobbers a red zone location. */
10377 else if (ix86_using_red_zone ()
10378 && (! TARGET_STACK_PROBE
10379 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10381 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10382 int_registers_saved = true;
10386 if (stack_realign_fp)
10388 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10389 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10391 /* The computation of the size of the re-aligned stack frame means
10392 that we must allocate the size of the register save area before
10393 performing the actual alignment. Otherwise we cannot guarantee
10394 that there's enough storage above the realignment point. */
10395 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10396 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10397 GEN_INT (m->fs.sp_offset
10398 - frame.sse_reg_save_offset),
10401 /* Align the stack. */
10402 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10404 GEN_INT (-align_bytes)));
10406 /* For the purposes of register save area addressing, the stack
10407 pointer is no longer valid. As for the value of sp_offset,
10408 see ix86_compute_frame_layout, which we need to match in order
10409 to pass verification of stack_pointer_offset at the end. */
10410 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10411 m->fs.sp_valid = false;
10414 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10416 if (flag_stack_usage_info)
10418 /* We start to count from ARG_POINTER. */
10419 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10421 /* If it was realigned, take into account the fake frame. */
10422 if (stack_realign_drap)
10424 if (ix86_static_chain_on_stack)
10425 stack_size += UNITS_PER_WORD;
10427 if (!call_used_regs[REGNO (crtl->drap_reg)])
10428 stack_size += UNITS_PER_WORD;
10430 /* This over-estimates by 1 minimal-stack-alignment-unit but
10431 mitigates that by counting in the new return address slot. */
10432 current_function_dynamic_stack_size
10433 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10436 current_function_static_stack_size = stack_size;
10439 /* The stack has already been decremented by the instruction calling us
10440 so probe if the size is non-negative to preserve the protection area. */
10441 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10443 /* We expect the registers to be saved when probes are used. */
10444 gcc_assert (int_registers_saved);
10446 if (STACK_CHECK_MOVING_SP)
10448 ix86_adjust_stack_and_probe (allocate);
10453 HOST_WIDE_INT size = allocate;
10455 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10456 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10458 if (TARGET_STACK_PROBE)
10459 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10461 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10467 else if (!ix86_target_stack_probe ()
10468 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10470 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10471 GEN_INT (-allocate), -1,
10472 m->fs.cfa_reg == stack_pointer_rtx);
10476 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10478 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10480 bool eax_live = false;
10481 bool r10_live = false;
10484 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10485 if (!TARGET_64BIT_MS_ABI)
10486 eax_live = ix86_eax_live_at_start_p ();
10490 emit_insn (gen_push (eax));
10491 allocate -= UNITS_PER_WORD;
10495 r10 = gen_rtx_REG (Pmode, R10_REG);
10496 emit_insn (gen_push (r10));
10497 allocate -= UNITS_PER_WORD;
10500 emit_move_insn (eax, GEN_INT (allocate));
10501 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10503 /* Use the fact that AX still contains ALLOCATE. */
10504 adjust_stack_insn = (TARGET_64BIT
10505 ? gen_pro_epilogue_adjust_stack_di_sub
10506 : gen_pro_epilogue_adjust_stack_si_sub);
10508 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10509 stack_pointer_rtx, eax));
10511 /* Note that SEH directives need to continue tracking the stack
10512 pointer even after the frame pointer has been set up. */
10513 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10515 if (m->fs.cfa_reg == stack_pointer_rtx)
10516 m->fs.cfa_offset += allocate;
10518 RTX_FRAME_RELATED_P (insn) = 1;
10519 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10520 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10521 plus_constant (stack_pointer_rtx,
10524 m->fs.sp_offset += allocate;
10526 if (r10_live && eax_live)
10528 t = choose_baseaddr (m->fs.sp_offset - allocate);
10529 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10530 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10531 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10533 else if (eax_live || r10_live)
10535 t = choose_baseaddr (m->fs.sp_offset - allocate);
10536 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10539 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10541 /* If we havn't already set up the frame pointer, do so now. */
10542 if (frame_pointer_needed && !m->fs.fp_valid)
10544 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10545 GEN_INT (frame.stack_pointer_offset
10546 - frame.hard_frame_pointer_offset));
10547 insn = emit_insn (insn);
10548 RTX_FRAME_RELATED_P (insn) = 1;
10549 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10551 if (m->fs.cfa_reg == stack_pointer_rtx)
10552 m->fs.cfa_reg = hard_frame_pointer_rtx;
10553 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10554 m->fs.fp_valid = true;
10557 if (!int_registers_saved)
10558 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10559 if (frame.nsseregs)
10560 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10562 pic_reg_used = false;
10563 if (pic_offset_table_rtx
10564 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10567 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10569 if (alt_pic_reg_used != INVALID_REGNUM)
10570 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10572 pic_reg_used = true;
10579 if (ix86_cmodel == CM_LARGE_PIC)
10581 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10582 rtx label = gen_label_rtx ();
10583 emit_label (label);
10584 LABEL_PRESERVE_P (label) = 1;
10585 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10586 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10587 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10588 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10589 pic_offset_table_rtx, tmp_reg));
10592 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10596 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10597 RTX_FRAME_RELATED_P (insn) = 1;
10598 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10602 /* In the pic_reg_used case, make sure that the got load isn't deleted
10603 when mcount needs it. Blockage to avoid call movement across mcount
10604 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10606 if (crtl->profile && !flag_fentry && pic_reg_used)
10607 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10609 if (crtl->drap_reg && !crtl->stack_realign_needed)
10611 /* vDRAP is setup but after reload it turns out stack realign
10612 isn't necessary, here we will emit prologue to setup DRAP
10613 without stack realign adjustment */
10614 t = choose_baseaddr (0);
10615 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10618 /* Prevent instructions from being scheduled into register save push
10619 sequence when access to the redzone area is done through frame pointer.
10620 The offset between the frame pointer and the stack pointer is calculated
10621 relative to the value of the stack pointer at the end of the function
10622 prologue, and moving instructions that access redzone area via frame
10623 pointer inside push sequence violates this assumption. */
10624 if (frame_pointer_needed && frame.red_zone_size)
10625 emit_insn (gen_memory_blockage ());
10627 /* Emit cld instruction if stringops are used in the function. */
10628 if (TARGET_CLD && ix86_current_function_needs_cld)
10629 emit_insn (gen_cld ());
10631 /* SEH requires that the prologue end within 256 bytes of the start of
10632 the function. Prevent instruction schedules that would extend that.
10633 Further, prevent alloca modifications to the stack pointer from being
10634 combined with prologue modifications. */
10636 emit_insn (gen_prologue_use (stack_pointer_rtx));
10639 /* Emit code to restore REG using a POP insn. */
10642 ix86_emit_restore_reg_using_pop (rtx reg)
10644 struct machine_function *m = cfun->machine;
10645 rtx insn = emit_insn (gen_pop (reg));
10647 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10648 m->fs.sp_offset -= UNITS_PER_WORD;
10650 if (m->fs.cfa_reg == crtl->drap_reg
10651 && REGNO (reg) == REGNO (crtl->drap_reg))
10653 /* Previously we'd represented the CFA as an expression
10654 like *(%ebp - 8). We've just popped that value from
10655 the stack, which means we need to reset the CFA to
10656 the drap register. This will remain until we restore
10657 the stack pointer. */
10658 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10659 RTX_FRAME_RELATED_P (insn) = 1;
10661 /* This means that the DRAP register is valid for addressing too. */
10662 m->fs.drap_valid = true;
10666 if (m->fs.cfa_reg == stack_pointer_rtx)
10668 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10669 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10670 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10671 RTX_FRAME_RELATED_P (insn) = 1;
10673 m->fs.cfa_offset -= UNITS_PER_WORD;
10676 /* When the frame pointer is the CFA, and we pop it, we are
10677 swapping back to the stack pointer as the CFA. This happens
10678 for stack frames that don't allocate other data, so we assume
10679 the stack pointer is now pointing at the return address, i.e.
10680 the function entry state, which makes the offset be 1 word. */
10681 if (reg == hard_frame_pointer_rtx)
10683 m->fs.fp_valid = false;
10684 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10686 m->fs.cfa_reg = stack_pointer_rtx;
10687 m->fs.cfa_offset -= UNITS_PER_WORD;
10689 add_reg_note (insn, REG_CFA_DEF_CFA,
10690 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10691 GEN_INT (m->fs.cfa_offset)));
10692 RTX_FRAME_RELATED_P (insn) = 1;
10697 /* Emit code to restore saved registers using POP insns. */
10700 ix86_emit_restore_regs_using_pop (void)
10702 unsigned int regno;
10704 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10705 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10706 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10709 /* Emit code and notes for the LEAVE instruction. */
10712 ix86_emit_leave (void)
10714 struct machine_function *m = cfun->machine;
10715 rtx insn = emit_insn (ix86_gen_leave ());
10717 ix86_add_queued_cfa_restore_notes (insn);
10719 gcc_assert (m->fs.fp_valid);
10720 m->fs.sp_valid = true;
10721 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10722 m->fs.fp_valid = false;
10724 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10726 m->fs.cfa_reg = stack_pointer_rtx;
10727 m->fs.cfa_offset = m->fs.sp_offset;
10729 add_reg_note (insn, REG_CFA_DEF_CFA,
10730 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10731 RTX_FRAME_RELATED_P (insn) = 1;
10732 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10737 /* Emit code to restore saved registers using MOV insns.
10738 First register is restored from CFA - CFA_OFFSET. */
10740 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10741 bool maybe_eh_return)
10743 struct machine_function *m = cfun->machine;
10744 unsigned int regno;
10746 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10747 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10749 rtx reg = gen_rtx_REG (Pmode, regno);
10752 mem = choose_baseaddr (cfa_offset);
10753 mem = gen_frame_mem (Pmode, mem);
10754 insn = emit_move_insn (reg, mem);
10756 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10758 /* Previously we'd represented the CFA as an expression
10759 like *(%ebp - 8). We've just popped that value from
10760 the stack, which means we need to reset the CFA to
10761 the drap register. This will remain until we restore
10762 the stack pointer. */
10763 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10764 RTX_FRAME_RELATED_P (insn) = 1;
10766 /* This means that the DRAP register is valid for addressing. */
10767 m->fs.drap_valid = true;
10770 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10772 cfa_offset -= UNITS_PER_WORD;
10776 /* Emit code to restore saved registers using MOV insns.
10777 First register is restored from CFA - CFA_OFFSET. */
10779 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10780 bool maybe_eh_return)
10782 unsigned int regno;
10784 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10785 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10787 rtx reg = gen_rtx_REG (V4SFmode, regno);
10790 mem = choose_baseaddr (cfa_offset);
10791 mem = gen_rtx_MEM (V4SFmode, mem);
10792 set_mem_align (mem, 128);
10793 emit_move_insn (reg, mem);
10795 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10801 /* Emit vzeroupper if needed. */
10804 ix86_maybe_emit_epilogue_vzeroupper (void)
10806 if (TARGET_VZEROUPPER
10807 && !TREE_THIS_VOLATILE (cfun->decl)
10808 && !cfun->machine->caller_return_avx256_p)
10809 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10812 /* Restore function stack, frame, and registers. */
10815 ix86_expand_epilogue (int style)
10817 struct machine_function *m = cfun->machine;
10818 struct machine_frame_state frame_state_save = m->fs;
10819 struct ix86_frame frame;
10820 bool restore_regs_via_mov;
10823 ix86_finalize_stack_realign_flags ();
10824 ix86_compute_frame_layout (&frame);
10826 m->fs.sp_valid = (!frame_pointer_needed
10827 || (current_function_sp_is_unchanging
10828 && !stack_realign_fp));
10829 gcc_assert (!m->fs.sp_valid
10830 || m->fs.sp_offset == frame.stack_pointer_offset);
10832 /* The FP must be valid if the frame pointer is present. */
10833 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10834 gcc_assert (!m->fs.fp_valid
10835 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10837 /* We must have *some* valid pointer to the stack frame. */
10838 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10840 /* The DRAP is never valid at this point. */
10841 gcc_assert (!m->fs.drap_valid);
10843 /* See the comment about red zone and frame
10844 pointer usage in ix86_expand_prologue. */
10845 if (frame_pointer_needed && frame.red_zone_size)
10846 emit_insn (gen_memory_blockage ());
10848 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10849 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10851 /* Determine the CFA offset of the end of the red-zone. */
10852 m->fs.red_zone_offset = 0;
10853 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10855 /* The red-zone begins below the return address. */
10856 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10858 /* When the register save area is in the aligned portion of
10859 the stack, determine the maximum runtime displacement that
10860 matches up with the aligned frame. */
10861 if (stack_realign_drap)
10862 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10866 /* Special care must be taken for the normal return case of a function
10867 using eh_return: the eax and edx registers are marked as saved, but
10868 not restored along this path. Adjust the save location to match. */
10869 if (crtl->calls_eh_return && style != 2)
10870 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10872 /* EH_RETURN requires the use of moves to function properly. */
10873 if (crtl->calls_eh_return)
10874 restore_regs_via_mov = true;
10875 /* SEH requires the use of pops to identify the epilogue. */
10876 else if (TARGET_SEH)
10877 restore_regs_via_mov = false;
10878 /* If we're only restoring one register and sp is not valid then
10879 using a move instruction to restore the register since it's
10880 less work than reloading sp and popping the register. */
10881 else if (!m->fs.sp_valid && frame.nregs <= 1)
10882 restore_regs_via_mov = true;
10883 else if (TARGET_EPILOGUE_USING_MOVE
10884 && cfun->machine->use_fast_prologue_epilogue
10885 && (frame.nregs > 1
10886 || m->fs.sp_offset != frame.reg_save_offset))
10887 restore_regs_via_mov = true;
10888 else if (frame_pointer_needed
10890 && m->fs.sp_offset != frame.reg_save_offset)
10891 restore_regs_via_mov = true;
10892 else if (frame_pointer_needed
10893 && TARGET_USE_LEAVE
10894 && cfun->machine->use_fast_prologue_epilogue
10895 && frame.nregs == 1)
10896 restore_regs_via_mov = true;
10898 restore_regs_via_mov = false;
10900 if (restore_regs_via_mov || frame.nsseregs)
10902 /* Ensure that the entire register save area is addressable via
10903 the stack pointer, if we will restore via sp. */
10905 && m->fs.sp_offset > 0x7fffffff
10906 && !(m->fs.fp_valid || m->fs.drap_valid)
10907 && (frame.nsseregs + frame.nregs) != 0)
10909 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10910 GEN_INT (m->fs.sp_offset
10911 - frame.sse_reg_save_offset),
10913 m->fs.cfa_reg == stack_pointer_rtx);
10917 /* If there are any SSE registers to restore, then we have to do it
10918 via moves, since there's obviously no pop for SSE regs. */
10919 if (frame.nsseregs)
10920 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10923 if (restore_regs_via_mov)
10928 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10930 /* eh_return epilogues need %ecx added to the stack pointer. */
10933 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10935 /* Stack align doesn't work with eh_return. */
10936 gcc_assert (!stack_realign_drap);
10937 /* Neither does regparm nested functions. */
10938 gcc_assert (!ix86_static_chain_on_stack);
10940 if (frame_pointer_needed)
10942 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10943 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10944 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10946 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10947 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10949 /* Note that we use SA as a temporary CFA, as the return
10950 address is at the proper place relative to it. We
10951 pretend this happens at the FP restore insn because
10952 prior to this insn the FP would be stored at the wrong
10953 offset relative to SA, and after this insn we have no
10954 other reasonable register to use for the CFA. We don't
10955 bother resetting the CFA to the SP for the duration of
10956 the return insn. */
10957 add_reg_note (insn, REG_CFA_DEF_CFA,
10958 plus_constant (sa, UNITS_PER_WORD));
10959 ix86_add_queued_cfa_restore_notes (insn);
10960 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10961 RTX_FRAME_RELATED_P (insn) = 1;
10963 m->fs.cfa_reg = sa;
10964 m->fs.cfa_offset = UNITS_PER_WORD;
10965 m->fs.fp_valid = false;
10967 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10968 const0_rtx, style, false);
10972 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10973 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10974 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10975 ix86_add_queued_cfa_restore_notes (insn);
10977 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10978 if (m->fs.cfa_offset != UNITS_PER_WORD)
10980 m->fs.cfa_offset = UNITS_PER_WORD;
10981 add_reg_note (insn, REG_CFA_DEF_CFA,
10982 plus_constant (stack_pointer_rtx,
10984 RTX_FRAME_RELATED_P (insn) = 1;
10987 m->fs.sp_offset = UNITS_PER_WORD;
10988 m->fs.sp_valid = true;
10993 /* SEH requires that the function end with (1) a stack adjustment
10994 if necessary, (2) a sequence of pops, and (3) a return or
10995 jump instruction. Prevent insns from the function body from
10996 being scheduled into this sequence. */
10999 /* Prevent a catch region from being adjacent to the standard
11000 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11001 several other flags that would be interesting to test are
11003 if (flag_non_call_exceptions)
11004 emit_insn (gen_nops (const1_rtx));
11006 emit_insn (gen_blockage ());
11009 /* First step is to deallocate the stack frame so that we can
11010 pop the registers. */
11011 if (!m->fs.sp_valid)
11013 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11014 GEN_INT (m->fs.fp_offset
11015 - frame.reg_save_offset),
11018 else if (m->fs.sp_offset != frame.reg_save_offset)
11020 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11021 GEN_INT (m->fs.sp_offset
11022 - frame.reg_save_offset),
11024 m->fs.cfa_reg == stack_pointer_rtx);
11027 ix86_emit_restore_regs_using_pop ();
11030 /* If we used a stack pointer and haven't already got rid of it,
11032 if (m->fs.fp_valid)
11034 /* If the stack pointer is valid and pointing at the frame
11035 pointer store address, then we only need a pop. */
11036 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11037 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11038 /* Leave results in shorter dependency chains on CPUs that are
11039 able to grok it fast. */
11040 else if (TARGET_USE_LEAVE
11041 || optimize_function_for_size_p (cfun)
11042 || !cfun->machine->use_fast_prologue_epilogue)
11043 ix86_emit_leave ();
11046 pro_epilogue_adjust_stack (stack_pointer_rtx,
11047 hard_frame_pointer_rtx,
11048 const0_rtx, style, !using_drap);
11049 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11055 int param_ptr_offset = UNITS_PER_WORD;
11058 gcc_assert (stack_realign_drap);
11060 if (ix86_static_chain_on_stack)
11061 param_ptr_offset += UNITS_PER_WORD;
11062 if (!call_used_regs[REGNO (crtl->drap_reg)])
11063 param_ptr_offset += UNITS_PER_WORD;
11065 insn = emit_insn (gen_rtx_SET
11066 (VOIDmode, stack_pointer_rtx,
11067 gen_rtx_PLUS (Pmode,
11069 GEN_INT (-param_ptr_offset))));
11070 m->fs.cfa_reg = stack_pointer_rtx;
11071 m->fs.cfa_offset = param_ptr_offset;
11072 m->fs.sp_offset = param_ptr_offset;
11073 m->fs.realigned = false;
11075 add_reg_note (insn, REG_CFA_DEF_CFA,
11076 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11077 GEN_INT (param_ptr_offset)));
11078 RTX_FRAME_RELATED_P (insn) = 1;
11080 if (!call_used_regs[REGNO (crtl->drap_reg)])
11081 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11084 /* At this point the stack pointer must be valid, and we must have
11085 restored all of the registers. We may not have deallocated the
11086 entire stack frame. We've delayed this until now because it may
11087 be possible to merge the local stack deallocation with the
11088 deallocation forced by ix86_static_chain_on_stack. */
11089 gcc_assert (m->fs.sp_valid);
11090 gcc_assert (!m->fs.fp_valid);
11091 gcc_assert (!m->fs.realigned);
11092 if (m->fs.sp_offset != UNITS_PER_WORD)
11094 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11095 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11099 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11101 /* Sibcall epilogues don't want a return instruction. */
11104 m->fs = frame_state_save;
11108 /* Emit vzeroupper if needed. */
11109 ix86_maybe_emit_epilogue_vzeroupper ();
11111 if (crtl->args.pops_args && crtl->args.size)
11113 rtx popc = GEN_INT (crtl->args.pops_args);
11115 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11116 address, do explicit add, and jump indirectly to the caller. */
11118 if (crtl->args.pops_args >= 65536)
11120 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11123 /* There is no "pascal" calling convention in any 64bit ABI. */
11124 gcc_assert (!TARGET_64BIT);
11126 insn = emit_insn (gen_pop (ecx));
11127 m->fs.cfa_offset -= UNITS_PER_WORD;
11128 m->fs.sp_offset -= UNITS_PER_WORD;
11130 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11131 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11132 add_reg_note (insn, REG_CFA_REGISTER,
11133 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11134 RTX_FRAME_RELATED_P (insn) = 1;
11136 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11138 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11141 emit_jump_insn (gen_simple_return_pop_internal (popc));
11144 emit_jump_insn (gen_simple_return_internal ());
11146 /* Restore the state back to the state from the prologue,
11147 so that it's correct for the next epilogue. */
11148 m->fs = frame_state_save;
11151 /* Reset from the function's potential modifications. */
11154 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11155 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11157 if (pic_offset_table_rtx)
11158 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11160 /* Mach-O doesn't support labels at the end of objects, so if
11161 it looks like we might want one, insert a NOP. */
11163 rtx insn = get_last_insn ();
11164 rtx deleted_debug_label = NULL_RTX;
11167 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11169 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11170 notes only, instead set their CODE_LABEL_NUMBER to -1,
11171 otherwise there would be code generation differences
11172 in between -g and -g0. */
11173 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11174 deleted_debug_label = insn;
11175 insn = PREV_INSN (insn);
11180 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11181 fputs ("\tnop\n", file);
11182 else if (deleted_debug_label)
11183 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11184 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11185 CODE_LABEL_NUMBER (insn) = -1;
11191 /* Return a scratch register to use in the split stack prologue. The
11192 split stack prologue is used for -fsplit-stack. It is the first
11193 instructions in the function, even before the regular prologue.
11194 The scratch register can be any caller-saved register which is not
11195 used for parameters or for the static chain. */
11197 static unsigned int
11198 split_stack_prologue_scratch_regno (void)
11207 is_fastcall = (lookup_attribute ("fastcall",
11208 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11210 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11214 if (DECL_STATIC_CHAIN (cfun->decl))
11216 sorry ("-fsplit-stack does not support fastcall with "
11217 "nested function");
11218 return INVALID_REGNUM;
11222 else if (regparm < 3)
11224 if (!DECL_STATIC_CHAIN (cfun->decl))
11230 sorry ("-fsplit-stack does not support 2 register "
11231 " parameters for a nested function");
11232 return INVALID_REGNUM;
11239 /* FIXME: We could make this work by pushing a register
11240 around the addition and comparison. */
11241 sorry ("-fsplit-stack does not support 3 register parameters");
11242 return INVALID_REGNUM;
11247 /* A SYMBOL_REF for the function which allocates new stackspace for
11250 static GTY(()) rtx split_stack_fn;
11252 /* A SYMBOL_REF for the more stack function when using the large
11255 static GTY(()) rtx split_stack_fn_large;
11257 /* Handle -fsplit-stack. These are the first instructions in the
11258 function, even before the regular prologue. */
11261 ix86_expand_split_stack_prologue (void)
11263 struct ix86_frame frame;
11264 HOST_WIDE_INT allocate;
11265 unsigned HOST_WIDE_INT args_size;
11266 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11267 rtx scratch_reg = NULL_RTX;
11268 rtx varargs_label = NULL_RTX;
11271 gcc_assert (flag_split_stack && reload_completed);
11273 ix86_finalize_stack_realign_flags ();
11274 ix86_compute_frame_layout (&frame);
11275 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11277 /* This is the label we will branch to if we have enough stack
11278 space. We expect the basic block reordering pass to reverse this
11279 branch if optimizing, so that we branch in the unlikely case. */
11280 label = gen_label_rtx ();
11282 /* We need to compare the stack pointer minus the frame size with
11283 the stack boundary in the TCB. The stack boundary always gives
11284 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11285 can compare directly. Otherwise we need to do an addition. */
11287 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11288 UNSPEC_STACK_CHECK);
11289 limit = gen_rtx_CONST (Pmode, limit);
11290 limit = gen_rtx_MEM (Pmode, limit);
11291 if (allocate < SPLIT_STACK_AVAILABLE)
11292 current = stack_pointer_rtx;
11295 unsigned int scratch_regno;
11298 /* We need a scratch register to hold the stack pointer minus
11299 the required frame size. Since this is the very start of the
11300 function, the scratch register can be any caller-saved
11301 register which is not used for parameters. */
11302 offset = GEN_INT (- allocate);
11303 scratch_regno = split_stack_prologue_scratch_regno ();
11304 if (scratch_regno == INVALID_REGNUM)
11306 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11307 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11309 /* We don't use ix86_gen_add3 in this case because it will
11310 want to split to lea, but when not optimizing the insn
11311 will not be split after this point. */
11312 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11313 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11318 emit_move_insn (scratch_reg, offset);
11319 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11320 stack_pointer_rtx));
11322 current = scratch_reg;
11325 ix86_expand_branch (GEU, current, limit, label);
11326 jump_insn = get_last_insn ();
11327 JUMP_LABEL (jump_insn) = label;
11329 /* Mark the jump as very likely to be taken. */
11330 add_reg_note (jump_insn, REG_BR_PROB,
11331 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11333 if (split_stack_fn == NULL_RTX)
11334 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11335 fn = split_stack_fn;
11337 /* Get more stack space. We pass in the desired stack space and the
11338 size of the arguments to copy to the new stack. In 32-bit mode
11339 we push the parameters; __morestack will return on a new stack
11340 anyhow. In 64-bit mode we pass the parameters in r10 and
11342 allocate_rtx = GEN_INT (allocate);
11343 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11344 call_fusage = NULL_RTX;
11349 reg10 = gen_rtx_REG (Pmode, R10_REG);
11350 reg11 = gen_rtx_REG (Pmode, R11_REG);
11352 /* If this function uses a static chain, it will be in %r10.
11353 Preserve it across the call to __morestack. */
11354 if (DECL_STATIC_CHAIN (cfun->decl))
11358 rax = gen_rtx_REG (Pmode, AX_REG);
11359 emit_move_insn (rax, reg10);
11360 use_reg (&call_fusage, rax);
11363 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11365 HOST_WIDE_INT argval;
11367 /* When using the large model we need to load the address
11368 into a register, and we've run out of registers. So we
11369 switch to a different calling convention, and we call a
11370 different function: __morestack_large. We pass the
11371 argument size in the upper 32 bits of r10 and pass the
11372 frame size in the lower 32 bits. */
11373 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11374 gcc_assert ((args_size & 0xffffffff) == args_size);
11376 if (split_stack_fn_large == NULL_RTX)
11377 split_stack_fn_large =
11378 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11380 if (ix86_cmodel == CM_LARGE_PIC)
11384 label = gen_label_rtx ();
11385 emit_label (label);
11386 LABEL_PRESERVE_P (label) = 1;
11387 emit_insn (gen_set_rip_rex64 (reg10, label));
11388 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11389 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11390 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11392 x = gen_rtx_CONST (Pmode, x);
11393 emit_move_insn (reg11, x);
11394 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11395 x = gen_const_mem (Pmode, x);
11396 emit_move_insn (reg11, x);
11399 emit_move_insn (reg11, split_stack_fn_large);
11403 argval = ((args_size << 16) << 16) + allocate;
11404 emit_move_insn (reg10, GEN_INT (argval));
11408 emit_move_insn (reg10, allocate_rtx);
11409 emit_move_insn (reg11, GEN_INT (args_size));
11410 use_reg (&call_fusage, reg11);
11413 use_reg (&call_fusage, reg10);
11417 emit_insn (gen_push (GEN_INT (args_size)));
11418 emit_insn (gen_push (allocate_rtx));
11420 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11421 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11423 add_function_usage_to (call_insn, call_fusage);
11425 /* In order to make call/return prediction work right, we now need
11426 to execute a return instruction. See
11427 libgcc/config/i386/morestack.S for the details on how this works.
11429 For flow purposes gcc must not see this as a return
11430 instruction--we need control flow to continue at the subsequent
11431 label. Therefore, we use an unspec. */
11432 gcc_assert (crtl->args.pops_args < 65536);
11433 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11435 /* If we are in 64-bit mode and this function uses a static chain,
11436 we saved %r10 in %rax before calling _morestack. */
11437 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11438 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11439 gen_rtx_REG (Pmode, AX_REG));
11441 /* If this function calls va_start, we need to store a pointer to
11442 the arguments on the old stack, because they may not have been
11443 all copied to the new stack. At this point the old stack can be
11444 found at the frame pointer value used by __morestack, because
11445 __morestack has set that up before calling back to us. Here we
11446 store that pointer in a scratch register, and in
11447 ix86_expand_prologue we store the scratch register in a stack
11449 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11451 unsigned int scratch_regno;
11455 scratch_regno = split_stack_prologue_scratch_regno ();
11456 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11457 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11461 return address within this function
11462 return address of caller of this function
11464 So we add three words to get to the stack arguments.
11468 return address within this function
11469 first argument to __morestack
11470 second argument to __morestack
11471 return address of caller of this function
11473 So we add five words to get to the stack arguments.
11475 words = TARGET_64BIT ? 3 : 5;
11476 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11477 gen_rtx_PLUS (Pmode, frame_reg,
11478 GEN_INT (words * UNITS_PER_WORD))));
11480 varargs_label = gen_label_rtx ();
11481 emit_jump_insn (gen_jump (varargs_label));
11482 JUMP_LABEL (get_last_insn ()) = varargs_label;
11487 emit_label (label);
11488 LABEL_NUSES (label) = 1;
11490 /* If this function calls va_start, we now have to set the scratch
11491 register for the case where we do not call __morestack. In this
11492 case we need to set it based on the stack pointer. */
11493 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11495 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11496 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11497 GEN_INT (UNITS_PER_WORD))));
11499 emit_label (varargs_label);
11500 LABEL_NUSES (varargs_label) = 1;
11504 /* We may have to tell the dataflow pass that the split stack prologue
11505 is initializing a scratch register. */
11508 ix86_live_on_entry (bitmap regs)
11510 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11512 gcc_assert (flag_split_stack);
11513 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11517 /* Determine if op is suitable SUBREG RTX for address. */
11520 ix86_address_subreg_operand (rtx op)
11522 enum machine_mode mode;
11527 mode = GET_MODE (op);
11529 if (GET_MODE_CLASS (mode) != MODE_INT)
11532 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11533 failures when the register is one word out of a two word structure. */
11534 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11537 /* Allow only SUBREGs of non-eliminable hard registers. */
11538 return register_no_elim_operand (op, mode);
11541 /* Extract the parts of an RTL expression that is a valid memory address
11542 for an instruction. Return 0 if the structure of the address is
11543 grossly off. Return -1 if the address contains ASHIFT, so it is not
11544 strictly valid, but still used for computing length of lea instruction. */
11547 ix86_decompose_address (rtx addr, struct ix86_address *out)
11549 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11550 rtx base_reg, index_reg;
11551 HOST_WIDE_INT scale = 1;
11552 rtx scale_rtx = NULL_RTX;
11555 enum ix86_address_seg seg = SEG_DEFAULT;
11557 /* Allow zero-extended SImode addresses,
11558 they will be emitted with addr32 prefix. */
11559 if (TARGET_64BIT && GET_MODE (addr) == DImode)
11561 if (GET_CODE (addr) == ZERO_EXTEND
11562 && GET_MODE (XEXP (addr, 0)) == SImode)
11563 addr = XEXP (addr, 0);
11564 else if (GET_CODE (addr) == AND
11565 && const_32bit_mask (XEXP (addr, 1), DImode))
11567 addr = XEXP (addr, 0);
11569 /* Strip subreg. */
11570 if (GET_CODE (addr) == SUBREG
11571 && GET_MODE (SUBREG_REG (addr)) == SImode)
11572 addr = SUBREG_REG (addr);
11578 else if (GET_CODE (addr) == SUBREG)
11580 if (ix86_address_subreg_operand (SUBREG_REG (addr)))
11585 else if (GET_CODE (addr) == PLUS)
11587 rtx addends[4], op;
11595 addends[n++] = XEXP (op, 1);
11598 while (GET_CODE (op) == PLUS);
11603 for (i = n; i >= 0; --i)
11606 switch (GET_CODE (op))
11611 index = XEXP (op, 0);
11612 scale_rtx = XEXP (op, 1);
11618 index = XEXP (op, 0);
11619 tmp = XEXP (op, 1);
11620 if (!CONST_INT_P (tmp))
11622 scale = INTVAL (tmp);
11623 if ((unsigned HOST_WIDE_INT) scale > 3)
11625 scale = 1 << scale;
11629 if (XINT (op, 1) == UNSPEC_TP
11630 && TARGET_TLS_DIRECT_SEG_REFS
11631 && seg == SEG_DEFAULT)
11632 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11638 if (!ix86_address_subreg_operand (SUBREG_REG (op)))
11665 else if (GET_CODE (addr) == MULT)
11667 index = XEXP (addr, 0); /* index*scale */
11668 scale_rtx = XEXP (addr, 1);
11670 else if (GET_CODE (addr) == ASHIFT)
11672 /* We're called for lea too, which implements ashift on occasion. */
11673 index = XEXP (addr, 0);
11674 tmp = XEXP (addr, 1);
11675 if (!CONST_INT_P (tmp))
11677 scale = INTVAL (tmp);
11678 if ((unsigned HOST_WIDE_INT) scale > 3)
11680 scale = 1 << scale;
11684 disp = addr; /* displacement */
11690 else if (GET_CODE (index) == SUBREG
11691 && ix86_address_subreg_operand (SUBREG_REG (index)))
11697 /* Extract the integral value of scale. */
11700 if (!CONST_INT_P (scale_rtx))
11702 scale = INTVAL (scale_rtx);
11705 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11706 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11708 /* Avoid useless 0 displacement. */
11709 if (disp == const0_rtx && (base || index))
11712 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11713 if (base_reg && index_reg && scale == 1
11714 && (index_reg == arg_pointer_rtx
11715 || index_reg == frame_pointer_rtx
11716 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11719 tmp = base, base = index, index = tmp;
11720 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11723 /* Special case: %ebp cannot be encoded as a base without a displacement.
11727 && (base_reg == hard_frame_pointer_rtx
11728 || base_reg == frame_pointer_rtx
11729 || base_reg == arg_pointer_rtx
11730 || (REG_P (base_reg)
11731 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11732 || REGNO (base_reg) == R13_REG))))
11735 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11736 Avoid this by transforming to [%esi+0].
11737 Reload calls address legitimization without cfun defined, so we need
11738 to test cfun for being non-NULL. */
11739 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11740 && base_reg && !index_reg && !disp
11741 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11744 /* Special case: encode reg+reg instead of reg*2. */
11745 if (!base && index && scale == 2)
11746 base = index, base_reg = index_reg, scale = 1;
11748 /* Special case: scaling cannot be encoded without base or displacement. */
11749 if (!base && !disp && index && scale != 1)
11753 out->index = index;
11755 out->scale = scale;
11761 /* Return cost of the memory address x.
11762 For i386, it is better to use a complex address than let gcc copy
11763 the address into a reg and make a new pseudo. But not if the address
11764 requires to two regs - that would mean more pseudos with longer
11767 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11769 struct ix86_address parts;
11771 int ok = ix86_decompose_address (x, &parts);
11775 if (parts.base && GET_CODE (parts.base) == SUBREG)
11776 parts.base = SUBREG_REG (parts.base);
11777 if (parts.index && GET_CODE (parts.index) == SUBREG)
11778 parts.index = SUBREG_REG (parts.index);
11780 /* Attempt to minimize number of registers in the address. */
11782 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11784 && (!REG_P (parts.index)
11785 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11789 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11791 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11792 && parts.base != parts.index)
11795 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11796 since it's predecode logic can't detect the length of instructions
11797 and it degenerates to vector decoded. Increase cost of such
11798 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11799 to split such addresses or even refuse such addresses at all.
11801 Following addressing modes are affected:
11806 The first and last case may be avoidable by explicitly coding the zero in
11807 memory address, but I don't have AMD-K6 machine handy to check this
11811 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11812 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11813 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11819 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11820 this is used for to form addresses to local data when -fPIC is in
11824 darwin_local_data_pic (rtx disp)
11826 return (GET_CODE (disp) == UNSPEC
11827 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11830 /* Determine if a given RTX is a valid constant. We already know this
11831 satisfies CONSTANT_P. */
11834 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11836 switch (GET_CODE (x))
11841 if (GET_CODE (x) == PLUS)
11843 if (!CONST_INT_P (XEXP (x, 1)))
11848 if (TARGET_MACHO && darwin_local_data_pic (x))
11851 /* Only some unspecs are valid as "constants". */
11852 if (GET_CODE (x) == UNSPEC)
11853 switch (XINT (x, 1))
11856 case UNSPEC_GOTOFF:
11857 case UNSPEC_PLTOFF:
11858 return TARGET_64BIT;
11860 case UNSPEC_NTPOFF:
11861 x = XVECEXP (x, 0, 0);
11862 return (GET_CODE (x) == SYMBOL_REF
11863 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11864 case UNSPEC_DTPOFF:
11865 x = XVECEXP (x, 0, 0);
11866 return (GET_CODE (x) == SYMBOL_REF
11867 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11872 /* We must have drilled down to a symbol. */
11873 if (GET_CODE (x) == LABEL_REF)
11875 if (GET_CODE (x) != SYMBOL_REF)
11880 /* TLS symbols are never valid. */
11881 if (SYMBOL_REF_TLS_MODEL (x))
11884 /* DLLIMPORT symbols are never valid. */
11885 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11886 && SYMBOL_REF_DLLIMPORT_P (x))
11890 /* mdynamic-no-pic */
11891 if (MACHO_DYNAMIC_NO_PIC_P)
11892 return machopic_symbol_defined_p (x);
11897 if (GET_MODE (x) == TImode
11898 && x != CONST0_RTX (TImode)
11904 if (!standard_sse_constant_p (x))
11911 /* Otherwise we handle everything else in the move patterns. */
11915 /* Determine if it's legal to put X into the constant pool. This
11916 is not possible for the address of thread-local symbols, which
11917 is checked above. */
11920 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11922 /* We can always put integral constants and vectors in memory. */
11923 switch (GET_CODE (x))
11933 return !ix86_legitimate_constant_p (mode, x);
11937 /* Nonzero if the constant value X is a legitimate general operand
11938 when generating PIC code. It is given that flag_pic is on and
11939 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11942 legitimate_pic_operand_p (rtx x)
11946 switch (GET_CODE (x))
11949 inner = XEXP (x, 0);
11950 if (GET_CODE (inner) == PLUS
11951 && CONST_INT_P (XEXP (inner, 1)))
11952 inner = XEXP (inner, 0);
11954 /* Only some unspecs are valid as "constants". */
11955 if (GET_CODE (inner) == UNSPEC)
11956 switch (XINT (inner, 1))
11959 case UNSPEC_GOTOFF:
11960 case UNSPEC_PLTOFF:
11961 return TARGET_64BIT;
11963 x = XVECEXP (inner, 0, 0);
11964 return (GET_CODE (x) == SYMBOL_REF
11965 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11966 case UNSPEC_MACHOPIC_OFFSET:
11967 return legitimate_pic_address_disp_p (x);
11975 return legitimate_pic_address_disp_p (x);
11982 /* Determine if a given CONST RTX is a valid memory displacement
11986 legitimate_pic_address_disp_p (rtx disp)
11990 /* In 64bit mode we can allow direct addresses of symbols and labels
11991 when they are not dynamic symbols. */
11994 rtx op0 = disp, op1;
11996 switch (GET_CODE (disp))
12002 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12004 op0 = XEXP (XEXP (disp, 0), 0);
12005 op1 = XEXP (XEXP (disp, 0), 1);
12006 if (!CONST_INT_P (op1)
12007 || INTVAL (op1) >= 16*1024*1024
12008 || INTVAL (op1) < -16*1024*1024)
12010 if (GET_CODE (op0) == LABEL_REF)
12012 if (GET_CODE (op0) != SYMBOL_REF)
12017 /* TLS references should always be enclosed in UNSPEC. */
12018 if (SYMBOL_REF_TLS_MODEL (op0))
12020 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
12021 && ix86_cmodel != CM_LARGE_PIC)
12029 if (GET_CODE (disp) != CONST)
12031 disp = XEXP (disp, 0);
12035 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12036 of GOT tables. We should not need these anyway. */
12037 if (GET_CODE (disp) != UNSPEC
12038 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12039 && XINT (disp, 1) != UNSPEC_GOTOFF
12040 && XINT (disp, 1) != UNSPEC_PCREL
12041 && XINT (disp, 1) != UNSPEC_PLTOFF))
12044 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12045 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12051 if (GET_CODE (disp) == PLUS)
12053 if (!CONST_INT_P (XEXP (disp, 1)))
12055 disp = XEXP (disp, 0);
12059 if (TARGET_MACHO && darwin_local_data_pic (disp))
12062 if (GET_CODE (disp) != UNSPEC)
12065 switch (XINT (disp, 1))
12070 /* We need to check for both symbols and labels because VxWorks loads
12071 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12073 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12074 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12075 case UNSPEC_GOTOFF:
12076 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12077 While ABI specify also 32bit relocation but we don't produce it in
12078 small PIC model at all. */
12079 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12080 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12082 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12084 case UNSPEC_GOTTPOFF:
12085 case UNSPEC_GOTNTPOFF:
12086 case UNSPEC_INDNTPOFF:
12089 disp = XVECEXP (disp, 0, 0);
12090 return (GET_CODE (disp) == SYMBOL_REF
12091 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12092 case UNSPEC_NTPOFF:
12093 disp = XVECEXP (disp, 0, 0);
12094 return (GET_CODE (disp) == SYMBOL_REF
12095 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12096 case UNSPEC_DTPOFF:
12097 disp = XVECEXP (disp, 0, 0);
12098 return (GET_CODE (disp) == SYMBOL_REF
12099 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12105 /* Recognizes RTL expressions that are valid memory addresses for an
12106 instruction. The MODE argument is the machine mode for the MEM
12107 expression that wants to use this address.
12109 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12110 convert common non-canonical forms to canonical form so that they will
12114 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12115 rtx addr, bool strict)
12117 struct ix86_address parts;
12118 rtx base, index, disp;
12119 HOST_WIDE_INT scale;
12121 if (ix86_decompose_address (addr, &parts) <= 0)
12122 /* Decomposition failed. */
12126 index = parts.index;
12128 scale = parts.scale;
12130 /* Validate base register. */
12137 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
12138 reg = SUBREG_REG (base);
12140 /* Base is not a register. */
12143 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
12146 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12147 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12148 /* Base is not valid. */
12152 /* Validate index register. */
12159 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
12160 reg = SUBREG_REG (index);
12162 /* Index is not a register. */
12165 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
12168 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12169 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12170 /* Index is not valid. */
12174 /* Index and base should have the same mode. */
12176 && GET_MODE (base) != GET_MODE (index))
12179 /* Validate scale factor. */
12183 /* Scale without index. */
12186 if (scale != 2 && scale != 4 && scale != 8)
12187 /* Scale is not a valid multiplier. */
12191 /* Validate displacement. */
12194 if (GET_CODE (disp) == CONST
12195 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12196 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12197 switch (XINT (XEXP (disp, 0), 1))
12199 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12200 used. While ABI specify also 32bit relocations, we don't produce
12201 them at all and use IP relative instead. */
12203 case UNSPEC_GOTOFF:
12204 gcc_assert (flag_pic);
12206 goto is_legitimate_pic;
12208 /* 64bit address unspec. */
12211 case UNSPEC_GOTPCREL:
12213 gcc_assert (flag_pic);
12214 goto is_legitimate_pic;
12216 case UNSPEC_GOTTPOFF:
12217 case UNSPEC_GOTNTPOFF:
12218 case UNSPEC_INDNTPOFF:
12219 case UNSPEC_NTPOFF:
12220 case UNSPEC_DTPOFF:
12223 case UNSPEC_STACK_CHECK:
12224 gcc_assert (flag_split_stack);
12228 /* Invalid address unspec. */
12232 else if (SYMBOLIC_CONST (disp)
12236 && MACHOPIC_INDIRECT
12237 && !machopic_operand_p (disp)
12243 if (TARGET_64BIT && (index || base))
12245 /* foo@dtpoff(%rX) is ok. */
12246 if (GET_CODE (disp) != CONST
12247 || GET_CODE (XEXP (disp, 0)) != PLUS
12248 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12249 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12250 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12251 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12252 /* Non-constant pic memory reference. */
12255 else if ((!TARGET_MACHO || flag_pic)
12256 && ! legitimate_pic_address_disp_p (disp))
12257 /* Displacement is an invalid pic construct. */
12260 else if (MACHO_DYNAMIC_NO_PIC_P
12261 && !ix86_legitimate_constant_p (Pmode, disp))
12262 /* displacment must be referenced via non_lazy_pointer */
12266 /* This code used to verify that a symbolic pic displacement
12267 includes the pic_offset_table_rtx register.
12269 While this is good idea, unfortunately these constructs may
12270 be created by "adds using lea" optimization for incorrect
12279 This code is nonsensical, but results in addressing
12280 GOT table with pic_offset_table_rtx base. We can't
12281 just refuse it easily, since it gets matched by
12282 "addsi3" pattern, that later gets split to lea in the
12283 case output register differs from input. While this
12284 can be handled by separate addsi pattern for this case
12285 that never results in lea, this seems to be easier and
12286 correct fix for crash to disable this test. */
12288 else if (GET_CODE (disp) != LABEL_REF
12289 && !CONST_INT_P (disp)
12290 && (GET_CODE (disp) != CONST
12291 || !ix86_legitimate_constant_p (Pmode, disp))
12292 && (GET_CODE (disp) != SYMBOL_REF
12293 || !ix86_legitimate_constant_p (Pmode, disp)))
12294 /* Displacement is not constant. */
12296 else if (TARGET_64BIT
12297 && !x86_64_immediate_operand (disp, VOIDmode))
12298 /* Displacement is out of range. */
12302 /* Everything looks valid. */
12306 /* Determine if a given RTX is a valid constant address. */
12309 constant_address_p (rtx x)
12311 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12314 /* Return a unique alias set for the GOT. */
12316 static alias_set_type
12317 ix86_GOT_alias_set (void)
12319 static alias_set_type set = -1;
12321 set = new_alias_set ();
12325 /* Return a legitimate reference for ORIG (an address) using the
12326 register REG. If REG is 0, a new pseudo is generated.
12328 There are two types of references that must be handled:
12330 1. Global data references must load the address from the GOT, via
12331 the PIC reg. An insn is emitted to do this load, and the reg is
12334 2. Static data references, constant pool addresses, and code labels
12335 compute the address as an offset from the GOT, whose base is in
12336 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12337 differentiate them from global data objects. The returned
12338 address is the PIC reg + an unspec constant.
12340 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12341 reg also appears in the address. */
12344 legitimize_pic_address (rtx orig, rtx reg)
12347 rtx new_rtx = orig;
12351 if (TARGET_MACHO && !TARGET_64BIT)
12354 reg = gen_reg_rtx (Pmode);
12355 /* Use the generic Mach-O PIC machinery. */
12356 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12360 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12362 else if (TARGET_64BIT
12363 && ix86_cmodel != CM_SMALL_PIC
12364 && gotoff_operand (addr, Pmode))
12367 /* This symbol may be referenced via a displacement from the PIC
12368 base address (@GOTOFF). */
12370 if (reload_in_progress)
12371 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12372 if (GET_CODE (addr) == CONST)
12373 addr = XEXP (addr, 0);
12374 if (GET_CODE (addr) == PLUS)
12376 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12378 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12381 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12382 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12384 tmpreg = gen_reg_rtx (Pmode);
12387 emit_move_insn (tmpreg, new_rtx);
12391 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12392 tmpreg, 1, OPTAB_DIRECT);
12395 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12397 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12399 /* This symbol may be referenced via a displacement from the PIC
12400 base address (@GOTOFF). */
12402 if (reload_in_progress)
12403 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12404 if (GET_CODE (addr) == CONST)
12405 addr = XEXP (addr, 0);
12406 if (GET_CODE (addr) == PLUS)
12408 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12410 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12413 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12414 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12415 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12419 emit_move_insn (reg, new_rtx);
12423 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12424 /* We can't use @GOTOFF for text labels on VxWorks;
12425 see gotoff_operand. */
12426 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12428 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12430 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12431 return legitimize_dllimport_symbol (addr, true);
12432 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12433 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12434 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12436 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12437 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12441 /* For x64 PE-COFF there is no GOT table. So we use address
12443 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12445 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12446 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12449 reg = gen_reg_rtx (Pmode);
12450 emit_move_insn (reg, new_rtx);
12453 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12455 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12456 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12457 new_rtx = gen_const_mem (Pmode, new_rtx);
12458 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12461 reg = gen_reg_rtx (Pmode);
12462 /* Use directly gen_movsi, otherwise the address is loaded
12463 into register for CSE. We don't want to CSE this addresses,
12464 instead we CSE addresses from the GOT table, so skip this. */
12465 emit_insn (gen_movsi (reg, new_rtx));
12470 /* This symbol must be referenced via a load from the
12471 Global Offset Table (@GOT). */
12473 if (reload_in_progress)
12474 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12475 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12476 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12478 new_rtx = force_reg (Pmode, new_rtx);
12479 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12480 new_rtx = gen_const_mem (Pmode, new_rtx);
12481 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12484 reg = gen_reg_rtx (Pmode);
12485 emit_move_insn (reg, new_rtx);
12491 if (CONST_INT_P (addr)
12492 && !x86_64_immediate_operand (addr, VOIDmode))
12496 emit_move_insn (reg, addr);
12500 new_rtx = force_reg (Pmode, addr);
12502 else if (GET_CODE (addr) == CONST)
12504 addr = XEXP (addr, 0);
12506 /* We must match stuff we generate before. Assume the only
12507 unspecs that can get here are ours. Not that we could do
12508 anything with them anyway.... */
12509 if (GET_CODE (addr) == UNSPEC
12510 || (GET_CODE (addr) == PLUS
12511 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12513 gcc_assert (GET_CODE (addr) == PLUS);
12515 if (GET_CODE (addr) == PLUS)
12517 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12519 /* Check first to see if this is a constant offset from a @GOTOFF
12520 symbol reference. */
12521 if (gotoff_operand (op0, Pmode)
12522 && CONST_INT_P (op1))
12526 if (reload_in_progress)
12527 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12528 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12530 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12531 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12532 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12536 emit_move_insn (reg, new_rtx);
12542 if (INTVAL (op1) < -16*1024*1024
12543 || INTVAL (op1) >= 16*1024*1024)
12545 if (!x86_64_immediate_operand (op1, Pmode))
12546 op1 = force_reg (Pmode, op1);
12547 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12553 base = legitimize_pic_address (XEXP (addr, 0), reg);
12554 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12555 base == reg ? NULL_RTX : reg);
12557 if (CONST_INT_P (new_rtx))
12558 new_rtx = plus_constant (base, INTVAL (new_rtx));
12561 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12563 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12564 new_rtx = XEXP (new_rtx, 1);
12566 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12574 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12577 get_thread_pointer (bool to_reg)
12579 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12581 if (GET_MODE (tp) != Pmode)
12582 tp = convert_to_mode (Pmode, tp, 1);
12585 tp = copy_addr_to_reg (tp);
12590 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12592 static GTY(()) rtx ix86_tls_symbol;
12595 ix86_tls_get_addr (void)
12597 if (!ix86_tls_symbol)
12600 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12601 ? "___tls_get_addr" : "__tls_get_addr");
12603 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12606 return ix86_tls_symbol;
12609 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12611 static GTY(()) rtx ix86_tls_module_base_symbol;
12614 ix86_tls_module_base (void)
12616 if (!ix86_tls_module_base_symbol)
12618 ix86_tls_module_base_symbol
12619 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12621 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12622 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12625 return ix86_tls_module_base_symbol;
12628 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12629 false if we expect this to be used for a memory address and true if
12630 we expect to load the address into a register. */
12633 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12635 rtx dest, base, off;
12636 rtx pic = NULL_RTX, tp = NULL_RTX;
12641 case TLS_MODEL_GLOBAL_DYNAMIC:
12642 dest = gen_reg_rtx (Pmode);
12647 pic = pic_offset_table_rtx;
12650 pic = gen_reg_rtx (Pmode);
12651 emit_insn (gen_set_got (pic));
12655 if (TARGET_GNU2_TLS)
12658 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12660 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12662 tp = get_thread_pointer (true);
12663 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12665 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12669 rtx caddr = ix86_tls_get_addr ();
12673 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12676 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12677 insns = get_insns ();
12680 RTL_CONST_CALL_P (insns) = 1;
12681 emit_libcall_block (insns, dest, rax, x);
12684 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12688 case TLS_MODEL_LOCAL_DYNAMIC:
12689 base = gen_reg_rtx (Pmode);
12694 pic = pic_offset_table_rtx;
12697 pic = gen_reg_rtx (Pmode);
12698 emit_insn (gen_set_got (pic));
12702 if (TARGET_GNU2_TLS)
12704 rtx tmp = ix86_tls_module_base ();
12707 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12709 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12711 tp = get_thread_pointer (true);
12712 set_unique_reg_note (get_last_insn (), REG_EQUAL,
12713 gen_rtx_MINUS (Pmode, tmp, tp));
12717 rtx caddr = ix86_tls_get_addr ();
12721 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12724 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12725 insns = get_insns ();
12728 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12729 share the LD_BASE result with other LD model accesses. */
12730 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12731 UNSPEC_TLS_LD_BASE);
12733 RTL_CONST_CALL_P (insns) = 1;
12734 emit_libcall_block (insns, base, rax, eqv);
12737 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12740 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12741 off = gen_rtx_CONST (Pmode, off);
12743 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12745 if (TARGET_GNU2_TLS)
12747 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12749 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12753 case TLS_MODEL_INITIAL_EXEC:
12756 if (TARGET_SUN_TLS)
12758 /* The Sun linker took the AMD64 TLS spec literally
12759 and can only handle %rax as destination of the
12760 initial executable code sequence. */
12762 dest = gen_reg_rtx (Pmode);
12763 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12768 type = UNSPEC_GOTNTPOFF;
12772 if (reload_in_progress)
12773 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12774 pic = pic_offset_table_rtx;
12775 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12777 else if (!TARGET_ANY_GNU_TLS)
12779 pic = gen_reg_rtx (Pmode);
12780 emit_insn (gen_set_got (pic));
12781 type = UNSPEC_GOTTPOFF;
12786 type = UNSPEC_INDNTPOFF;
12789 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12790 off = gen_rtx_CONST (Pmode, off);
12792 off = gen_rtx_PLUS (Pmode, pic, off);
12793 off = gen_const_mem (Pmode, off);
12794 set_mem_alias_set (off, ix86_GOT_alias_set ());
12796 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12798 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12799 off = force_reg (Pmode, off);
12800 return gen_rtx_PLUS (Pmode, base, off);
12804 base = get_thread_pointer (true);
12805 dest = gen_reg_rtx (Pmode);
12806 emit_insn (gen_subsi3 (dest, base, off));
12810 case TLS_MODEL_LOCAL_EXEC:
12811 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12812 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12813 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12814 off = gen_rtx_CONST (Pmode, off);
12816 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12818 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12819 return gen_rtx_PLUS (Pmode, base, off);
12823 base = get_thread_pointer (true);
12824 dest = gen_reg_rtx (Pmode);
12825 emit_insn (gen_subsi3 (dest, base, off));
12830 gcc_unreachable ();
12836 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12839 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12840 htab_t dllimport_map;
12843 get_dllimport_decl (tree decl)
12845 struct tree_map *h, in;
12848 const char *prefix;
12849 size_t namelen, prefixlen;
12854 if (!dllimport_map)
12855 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12857 in.hash = htab_hash_pointer (decl);
12858 in.base.from = decl;
12859 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12860 h = (struct tree_map *) *loc;
12864 *loc = h = ggc_alloc_tree_map ();
12866 h->base.from = decl;
12867 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12868 VAR_DECL, NULL, ptr_type_node);
12869 DECL_ARTIFICIAL (to) = 1;
12870 DECL_IGNORED_P (to) = 1;
12871 DECL_EXTERNAL (to) = 1;
12872 TREE_READONLY (to) = 1;
12874 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12875 name = targetm.strip_name_encoding (name);
12876 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12877 ? "*__imp_" : "*__imp__";
12878 namelen = strlen (name);
12879 prefixlen = strlen (prefix);
12880 imp_name = (char *) alloca (namelen + prefixlen + 1);
12881 memcpy (imp_name, prefix, prefixlen);
12882 memcpy (imp_name + prefixlen, name, namelen + 1);
12884 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12885 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12886 SET_SYMBOL_REF_DECL (rtl, to);
12887 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12889 rtl = gen_const_mem (Pmode, rtl);
12890 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12892 SET_DECL_RTL (to, rtl);
12893 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12898 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12899 true if we require the result be a register. */
12902 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12907 gcc_assert (SYMBOL_REF_DECL (symbol));
12908 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12910 x = DECL_RTL (imp_decl);
12912 x = force_reg (Pmode, x);
12916 /* Try machine-dependent ways of modifying an illegitimate address
12917 to be legitimate. If we find one, return the new, valid address.
12918 This macro is used in only one place: `memory_address' in explow.c.
12920 OLDX is the address as it was before break_out_memory_refs was called.
12921 In some cases it is useful to look at this to decide what needs to be done.
12923 It is always safe for this macro to do nothing. It exists to recognize
12924 opportunities to optimize the output.
12926 For the 80386, we handle X+REG by loading X into a register R and
12927 using R+REG. R will go in a general reg and indexing will be used.
12928 However, if REG is a broken-out memory address or multiplication,
12929 nothing needs to be done because REG can certainly go in a general reg.
12931 When -fpic is used, special handling is needed for symbolic references.
12932 See comments by legitimize_pic_address in i386.c for details. */
12935 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12936 enum machine_mode mode)
12941 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12943 return legitimize_tls_address (x, (enum tls_model) log, false);
12944 if (GET_CODE (x) == CONST
12945 && GET_CODE (XEXP (x, 0)) == PLUS
12946 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12947 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12949 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12950 (enum tls_model) log, false);
12951 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12954 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12956 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12957 return legitimize_dllimport_symbol (x, true);
12958 if (GET_CODE (x) == CONST
12959 && GET_CODE (XEXP (x, 0)) == PLUS
12960 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12961 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12963 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12964 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12968 if (flag_pic && SYMBOLIC_CONST (x))
12969 return legitimize_pic_address (x, 0);
12972 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12973 return machopic_indirect_data_reference (x, 0);
12976 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12977 if (GET_CODE (x) == ASHIFT
12978 && CONST_INT_P (XEXP (x, 1))
12979 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12982 log = INTVAL (XEXP (x, 1));
12983 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12984 GEN_INT (1 << log));
12987 if (GET_CODE (x) == PLUS)
12989 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12991 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12992 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12993 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12996 log = INTVAL (XEXP (XEXP (x, 0), 1));
12997 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12998 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12999 GEN_INT (1 << log));
13002 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13003 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13004 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13007 log = INTVAL (XEXP (XEXP (x, 1), 1));
13008 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13009 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13010 GEN_INT (1 << log));
13013 /* Put multiply first if it isn't already. */
13014 if (GET_CODE (XEXP (x, 1)) == MULT)
13016 rtx tmp = XEXP (x, 0);
13017 XEXP (x, 0) = XEXP (x, 1);
13022 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13023 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13024 created by virtual register instantiation, register elimination, and
13025 similar optimizations. */
13026 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13029 x = gen_rtx_PLUS (Pmode,
13030 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13031 XEXP (XEXP (x, 1), 0)),
13032 XEXP (XEXP (x, 1), 1));
13036 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13037 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13038 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13039 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13040 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13041 && CONSTANT_P (XEXP (x, 1)))
13044 rtx other = NULL_RTX;
13046 if (CONST_INT_P (XEXP (x, 1)))
13048 constant = XEXP (x, 1);
13049 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13051 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13053 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13054 other = XEXP (x, 1);
13062 x = gen_rtx_PLUS (Pmode,
13063 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13064 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13065 plus_constant (other, INTVAL (constant)));
13069 if (changed && ix86_legitimate_address_p (mode, x, false))
13072 if (GET_CODE (XEXP (x, 0)) == MULT)
13075 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13078 if (GET_CODE (XEXP (x, 1)) == MULT)
13081 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13085 && REG_P (XEXP (x, 1))
13086 && REG_P (XEXP (x, 0)))
13089 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13092 x = legitimize_pic_address (x, 0);
13095 if (changed && ix86_legitimate_address_p (mode, x, false))
13098 if (REG_P (XEXP (x, 0)))
13100 rtx temp = gen_reg_rtx (Pmode);
13101 rtx val = force_operand (XEXP (x, 1), temp);
13104 if (GET_MODE (val) != Pmode)
13105 val = convert_to_mode (Pmode, val, 1);
13106 emit_move_insn (temp, val);
13109 XEXP (x, 1) = temp;
13113 else if (REG_P (XEXP (x, 1)))
13115 rtx temp = gen_reg_rtx (Pmode);
13116 rtx val = force_operand (XEXP (x, 0), temp);
13119 if (GET_MODE (val) != Pmode)
13120 val = convert_to_mode (Pmode, val, 1);
13121 emit_move_insn (temp, val);
13124 XEXP (x, 0) = temp;
13132 /* Print an integer constant expression in assembler syntax. Addition
13133 and subtraction are the only arithmetic that may appear in these
13134 expressions. FILE is the stdio stream to write to, X is the rtx, and
13135 CODE is the operand print code from the output string. */
13138 output_pic_addr_const (FILE *file, rtx x, int code)
13142 switch (GET_CODE (x))
13145 gcc_assert (flag_pic);
13150 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13151 output_addr_const (file, x);
13154 const char *name = XSTR (x, 0);
13156 /* Mark the decl as referenced so that cgraph will
13157 output the function. */
13158 if (SYMBOL_REF_DECL (x))
13159 mark_decl_referenced (SYMBOL_REF_DECL (x));
13162 if (MACHOPIC_INDIRECT
13163 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13164 name = machopic_indirection_name (x, /*stub_p=*/true);
13166 assemble_name (file, name);
13168 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
13169 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13170 fputs ("@PLT", file);
13177 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13178 assemble_name (asm_out_file, buf);
13182 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13186 /* This used to output parentheses around the expression,
13187 but that does not work on the 386 (either ATT or BSD assembler). */
13188 output_pic_addr_const (file, XEXP (x, 0), code);
13192 if (GET_MODE (x) == VOIDmode)
13194 /* We can use %d if the number is <32 bits and positive. */
13195 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13196 fprintf (file, "0x%lx%08lx",
13197 (unsigned long) CONST_DOUBLE_HIGH (x),
13198 (unsigned long) CONST_DOUBLE_LOW (x));
13200 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13203 /* We can't handle floating point constants;
13204 TARGET_PRINT_OPERAND must handle them. */
13205 output_operand_lossage ("floating constant misused");
13209 /* Some assemblers need integer constants to appear first. */
13210 if (CONST_INT_P (XEXP (x, 0)))
13212 output_pic_addr_const (file, XEXP (x, 0), code);
13214 output_pic_addr_const (file, XEXP (x, 1), code);
13218 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13219 output_pic_addr_const (file, XEXP (x, 1), code);
13221 output_pic_addr_const (file, XEXP (x, 0), code);
13227 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13228 output_pic_addr_const (file, XEXP (x, 0), code);
13230 output_pic_addr_const (file, XEXP (x, 1), code);
13232 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13236 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13238 bool f = i386_asm_output_addr_const_extra (file, x);
13243 gcc_assert (XVECLEN (x, 0) == 1);
13244 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13245 switch (XINT (x, 1))
13248 fputs ("@GOT", file);
13250 case UNSPEC_GOTOFF:
13251 fputs ("@GOTOFF", file);
13253 case UNSPEC_PLTOFF:
13254 fputs ("@PLTOFF", file);
13257 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13258 "(%rip)" : "[rip]", file);
13260 case UNSPEC_GOTPCREL:
13261 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13262 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13264 case UNSPEC_GOTTPOFF:
13265 /* FIXME: This might be @TPOFF in Sun ld too. */
13266 fputs ("@gottpoff", file);
13269 fputs ("@tpoff", file);
13271 case UNSPEC_NTPOFF:
13273 fputs ("@tpoff", file);
13275 fputs ("@ntpoff", file);
13277 case UNSPEC_DTPOFF:
13278 fputs ("@dtpoff", file);
13280 case UNSPEC_GOTNTPOFF:
13282 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13283 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13285 fputs ("@gotntpoff", file);
13287 case UNSPEC_INDNTPOFF:
13288 fputs ("@indntpoff", file);
13291 case UNSPEC_MACHOPIC_OFFSET:
13293 machopic_output_function_base_name (file);
13297 output_operand_lossage ("invalid UNSPEC as operand");
13303 output_operand_lossage ("invalid expression as operand");
13307 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13308 We need to emit DTP-relative relocations. */
13310 static void ATTRIBUTE_UNUSED
13311 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13313 fputs (ASM_LONG, file);
13314 output_addr_const (file, x);
13315 fputs ("@dtpoff", file);
13321 fputs (", 0", file);
13324 gcc_unreachable ();
13328 /* Return true if X is a representation of the PIC register. This copes
13329 with calls from ix86_find_base_term, where the register might have
13330 been replaced by a cselib value. */
13333 ix86_pic_register_p (rtx x)
13335 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13336 return (pic_offset_table_rtx
13337 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13339 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13342 /* Helper function for ix86_delegitimize_address.
13343 Attempt to delegitimize TLS local-exec accesses. */
13346 ix86_delegitimize_tls_address (rtx orig_x)
13348 rtx x = orig_x, unspec;
13349 struct ix86_address addr;
13351 if (!TARGET_TLS_DIRECT_SEG_REFS)
13355 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13357 if (ix86_decompose_address (x, &addr) == 0
13358 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13359 || addr.disp == NULL_RTX
13360 || GET_CODE (addr.disp) != CONST)
13362 unspec = XEXP (addr.disp, 0);
13363 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13364 unspec = XEXP (unspec, 0);
13365 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13367 x = XVECEXP (unspec, 0, 0);
13368 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13369 if (unspec != XEXP (addr.disp, 0))
13370 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13373 rtx idx = addr.index;
13374 if (addr.scale != 1)
13375 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13376 x = gen_rtx_PLUS (Pmode, idx, x);
13379 x = gen_rtx_PLUS (Pmode, addr.base, x);
13380 if (MEM_P (orig_x))
13381 x = replace_equiv_address_nv (orig_x, x);
13385 /* In the name of slightly smaller debug output, and to cater to
13386 general assembler lossage, recognize PIC+GOTOFF and turn it back
13387 into a direct symbol reference.
13389 On Darwin, this is necessary to avoid a crash, because Darwin
13390 has a different PIC label for each routine but the DWARF debugging
13391 information is not associated with any particular routine, so it's
13392 necessary to remove references to the PIC label from RTL stored by
13393 the DWARF output code. */
13396 ix86_delegitimize_address (rtx x)
13398 rtx orig_x = delegitimize_mem_from_attrs (x);
13399 /* addend is NULL or some rtx if x is something+GOTOFF where
13400 something doesn't include the PIC register. */
13401 rtx addend = NULL_RTX;
13402 /* reg_addend is NULL or a multiple of some register. */
13403 rtx reg_addend = NULL_RTX;
13404 /* const_addend is NULL or a const_int. */
13405 rtx const_addend = NULL_RTX;
13406 /* This is the result, or NULL. */
13407 rtx result = NULL_RTX;
13416 if (GET_CODE (x) != CONST
13417 || GET_CODE (XEXP (x, 0)) != UNSPEC
13418 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13419 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13420 || !MEM_P (orig_x))
13421 return ix86_delegitimize_tls_address (orig_x);
13422 x = XVECEXP (XEXP (x, 0), 0, 0);
13423 if (GET_MODE (orig_x) != GET_MODE (x))
13425 x = simplify_gen_subreg (GET_MODE (orig_x), x,
13433 if (GET_CODE (x) != PLUS
13434 || GET_CODE (XEXP (x, 1)) != CONST)
13435 return ix86_delegitimize_tls_address (orig_x);
13437 if (ix86_pic_register_p (XEXP (x, 0)))
13438 /* %ebx + GOT/GOTOFF */
13440 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13442 /* %ebx + %reg * scale + GOT/GOTOFF */
13443 reg_addend = XEXP (x, 0);
13444 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13445 reg_addend = XEXP (reg_addend, 1);
13446 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13447 reg_addend = XEXP (reg_addend, 0);
13450 reg_addend = NULL_RTX;
13451 addend = XEXP (x, 0);
13455 addend = XEXP (x, 0);
13457 x = XEXP (XEXP (x, 1), 0);
13458 if (GET_CODE (x) == PLUS
13459 && CONST_INT_P (XEXP (x, 1)))
13461 const_addend = XEXP (x, 1);
13465 if (GET_CODE (x) == UNSPEC
13466 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13467 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13468 result = XVECEXP (x, 0, 0);
13470 if (TARGET_MACHO && darwin_local_data_pic (x)
13471 && !MEM_P (orig_x))
13472 result = XVECEXP (x, 0, 0);
13475 return ix86_delegitimize_tls_address (orig_x);
13478 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13480 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13483 /* If the rest of original X doesn't involve the PIC register, add
13484 addend and subtract pic_offset_table_rtx. This can happen e.g.
13486 leal (%ebx, %ecx, 4), %ecx
13488 movl foo@GOTOFF(%ecx), %edx
13489 in which case we return (%ecx - %ebx) + foo. */
13490 if (pic_offset_table_rtx)
13491 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13492 pic_offset_table_rtx),
13497 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13499 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13500 if (result == NULL_RTX)
13506 /* If X is a machine specific address (i.e. a symbol or label being
13507 referenced as a displacement from the GOT implemented using an
13508 UNSPEC), then return the base term. Otherwise return X. */
13511 ix86_find_base_term (rtx x)
13517 if (GET_CODE (x) != CONST)
13519 term = XEXP (x, 0);
13520 if (GET_CODE (term) == PLUS
13521 && (CONST_INT_P (XEXP (term, 1))
13522 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13523 term = XEXP (term, 0);
13524 if (GET_CODE (term) != UNSPEC
13525 || (XINT (term, 1) != UNSPEC_GOTPCREL
13526 && XINT (term, 1) != UNSPEC_PCREL))
13529 return XVECEXP (term, 0, 0);
13532 return ix86_delegitimize_address (x);
13536 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13537 int fp, FILE *file)
13539 const char *suffix;
13541 if (mode == CCFPmode || mode == CCFPUmode)
13543 code = ix86_fp_compare_code_to_integer (code);
13547 code = reverse_condition (code);
13598 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13602 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13603 Those same assemblers have the same but opposite lossage on cmov. */
13604 if (mode == CCmode)
13605 suffix = fp ? "nbe" : "a";
13606 else if (mode == CCCmode)
13609 gcc_unreachable ();
13625 gcc_unreachable ();
13629 gcc_assert (mode == CCmode || mode == CCCmode);
13646 gcc_unreachable ();
13650 /* ??? As above. */
13651 gcc_assert (mode == CCmode || mode == CCCmode);
13652 suffix = fp ? "nb" : "ae";
13655 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13659 /* ??? As above. */
13660 if (mode == CCmode)
13662 else if (mode == CCCmode)
13663 suffix = fp ? "nb" : "ae";
13665 gcc_unreachable ();
13668 suffix = fp ? "u" : "p";
13671 suffix = fp ? "nu" : "np";
13674 gcc_unreachable ();
13676 fputs (suffix, file);
13679 /* Print the name of register X to FILE based on its machine mode and number.
13680 If CODE is 'w', pretend the mode is HImode.
13681 If CODE is 'b', pretend the mode is QImode.
13682 If CODE is 'k', pretend the mode is SImode.
13683 If CODE is 'q', pretend the mode is DImode.
13684 If CODE is 'x', pretend the mode is V4SFmode.
13685 If CODE is 't', pretend the mode is V8SFmode.
13686 If CODE is 'h', pretend the reg is the 'high' byte register.
13687 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13688 If CODE is 'd', duplicate the operand for AVX instruction.
13692 print_reg (rtx x, int code, FILE *file)
13695 bool duplicated = code == 'd' && TARGET_AVX;
13697 gcc_assert (x == pc_rtx
13698 || (REGNO (x) != ARG_POINTER_REGNUM
13699 && REGNO (x) != FRAME_POINTER_REGNUM
13700 && REGNO (x) != FLAGS_REG
13701 && REGNO (x) != FPSR_REG
13702 && REGNO (x) != FPCR_REG));
13704 if (ASSEMBLER_DIALECT == ASM_ATT)
13709 gcc_assert (TARGET_64BIT);
13710 fputs ("rip", file);
13714 if (code == 'w' || MMX_REG_P (x))
13716 else if (code == 'b')
13718 else if (code == 'k')
13720 else if (code == 'q')
13722 else if (code == 'y')
13724 else if (code == 'h')
13726 else if (code == 'x')
13728 else if (code == 't')
13731 code = GET_MODE_SIZE (GET_MODE (x));
13733 /* Irritatingly, AMD extended registers use different naming convention
13734 from the normal registers: "r%d[bwd]" */
13735 if (REX_INT_REG_P (x))
13737 gcc_assert (TARGET_64BIT);
13739 fprint_ul (file, REGNO (x) - FIRST_REX_INT_REG + 8);
13743 error ("extended registers have no high halves");
13758 error ("unsupported operand size for extended register");
13768 if (STACK_TOP_P (x))
13777 if (! ANY_FP_REG_P (x))
13778 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13783 reg = hi_reg_name[REGNO (x)];
13786 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13788 reg = qi_reg_name[REGNO (x)];
13791 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13793 reg = qi_high_reg_name[REGNO (x)];
13798 gcc_assert (!duplicated);
13800 fputs (hi_reg_name[REGNO (x)] + 1, file);
13805 gcc_unreachable ();
13811 if (ASSEMBLER_DIALECT == ASM_ATT)
13812 fprintf (file, ", %%%s", reg);
13814 fprintf (file, ", %s", reg);
13818 /* Locate some local-dynamic symbol still in use by this function
13819 so that we can print its name in some tls_local_dynamic_base
13823 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13827 if (GET_CODE (x) == SYMBOL_REF
13828 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13830 cfun->machine->some_ld_name = XSTR (x, 0);
13837 static const char *
13838 get_some_local_dynamic_name (void)
13842 if (cfun->machine->some_ld_name)
13843 return cfun->machine->some_ld_name;
13845 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13846 if (NONDEBUG_INSN_P (insn)
13847 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13848 return cfun->machine->some_ld_name;
13853 /* Meaning of CODE:
13854 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13855 C -- print opcode suffix for set/cmov insn.
13856 c -- like C, but print reversed condition
13857 F,f -- likewise, but for floating-point.
13858 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13860 R -- print the prefix for register names.
13861 z -- print the opcode suffix for the size of the current operand.
13862 Z -- likewise, with special suffixes for x87 instructions.
13863 * -- print a star (in certain assembler syntax)
13864 A -- print an absolute memory reference.
13865 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13866 s -- print a shift double count, followed by the assemblers argument
13868 b -- print the QImode name of the register for the indicated operand.
13869 %b0 would print %al if operands[0] is reg 0.
13870 w -- likewise, print the HImode name of the register.
13871 k -- likewise, print the SImode name of the register.
13872 q -- likewise, print the DImode name of the register.
13873 x -- likewise, print the V4SFmode name of the register.
13874 t -- likewise, print the V8SFmode name of the register.
13875 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13876 y -- print "st(0)" instead of "st" as a register.
13877 d -- print duplicated register operand for AVX instruction.
13878 D -- print condition for SSE cmp instruction.
13879 P -- if PIC, print an @PLT suffix.
13880 p -- print raw symbol name.
13881 X -- don't print any sort of PIC '@' suffix for a symbol.
13882 & -- print some in-use local-dynamic symbol name.
13883 H -- print a memory address offset by 8; used for sse high-parts
13884 Y -- print condition for XOP pcom* instruction.
13885 + -- print a branch hint as 'cs' or 'ds' prefix
13886 ; -- print a semicolon (after prefixes due to bug in older gas).
13887 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13888 @ -- print a segment register of thread base pointer load
13892 ix86_print_operand (FILE *file, rtx x, int code)
13899 if (ASSEMBLER_DIALECT == ASM_ATT)
13905 const char *name = get_some_local_dynamic_name ();
13907 output_operand_lossage ("'%%&' used without any "
13908 "local dynamic TLS references");
13910 assemble_name (file, name);
13915 switch (ASSEMBLER_DIALECT)
13922 /* Intel syntax. For absolute addresses, registers should not
13923 be surrounded by braces. */
13927 ix86_print_operand (file, x, 0);
13934 gcc_unreachable ();
13937 ix86_print_operand (file, x, 0);
13942 if (ASSEMBLER_DIALECT == ASM_ATT)
13947 if (ASSEMBLER_DIALECT == ASM_ATT)
13952 if (ASSEMBLER_DIALECT == ASM_ATT)
13957 if (ASSEMBLER_DIALECT == ASM_ATT)
13962 if (ASSEMBLER_DIALECT == ASM_ATT)
13967 if (ASSEMBLER_DIALECT == ASM_ATT)
13972 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13974 /* Opcodes don't get size suffixes if using Intel opcodes. */
13975 if (ASSEMBLER_DIALECT == ASM_INTEL)
13978 switch (GET_MODE_SIZE (GET_MODE (x)))
13997 output_operand_lossage
13998 ("invalid operand size for operand code '%c'", code);
14003 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14005 (0, "non-integer operand used with operand code '%c'", code);
14009 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14010 if (ASSEMBLER_DIALECT == ASM_INTEL)
14013 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14015 switch (GET_MODE_SIZE (GET_MODE (x)))
14018 #ifdef HAVE_AS_IX86_FILDS
14028 #ifdef HAVE_AS_IX86_FILDQ
14031 fputs ("ll", file);
14039 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14041 /* 387 opcodes don't get size suffixes
14042 if the operands are registers. */
14043 if (STACK_REG_P (x))
14046 switch (GET_MODE_SIZE (GET_MODE (x)))
14067 output_operand_lossage
14068 ("invalid operand type used with operand code '%c'", code);
14072 output_operand_lossage
14073 ("invalid operand size for operand code '%c'", code);
14091 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14093 ix86_print_operand (file, x, 0);
14094 fputs (", ", file);
14099 /* Little bit of braindamage here. The SSE compare instructions
14100 does use completely different names for the comparisons that the
14101 fp conditional moves. */
14104 switch (GET_CODE (x))
14107 fputs ("eq", file);
14110 fputs ("eq_us", file);
14113 fputs ("lt", file);
14116 fputs ("nge", file);
14119 fputs ("le", file);
14122 fputs ("ngt", file);
14125 fputs ("unord", file);
14128 fputs ("neq", file);
14131 fputs ("neq_oq", file);
14134 fputs ("ge", file);
14137 fputs ("nlt", file);
14140 fputs ("gt", file);
14143 fputs ("nle", file);
14146 fputs ("ord", file);
14149 output_operand_lossage ("operand is not a condition code, "
14150 "invalid operand code 'D'");
14156 switch (GET_CODE (x))
14160 fputs ("eq", file);
14164 fputs ("lt", file);
14168 fputs ("le", file);
14171 fputs ("unord", file);
14175 fputs ("neq", file);
14179 fputs ("nlt", file);
14183 fputs ("nle", file);
14186 fputs ("ord", file);
14189 output_operand_lossage ("operand is not a condition code, "
14190 "invalid operand code 'D'");
14196 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14197 if (ASSEMBLER_DIALECT == ASM_ATT)
14199 switch (GET_MODE (x))
14201 case HImode: putc ('w', file); break;
14203 case SFmode: putc ('l', file); break;
14205 case DFmode: putc ('q', file); break;
14206 default: gcc_unreachable ();
14213 if (!COMPARISON_P (x))
14215 output_operand_lossage ("operand is neither a constant nor a "
14216 "condition code, invalid operand code "
14220 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
14223 if (!COMPARISON_P (x))
14225 output_operand_lossage ("operand is neither a constant nor a "
14226 "condition code, invalid operand code "
14230 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14231 if (ASSEMBLER_DIALECT == ASM_ATT)
14234 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
14237 /* Like above, but reverse condition */
14239 /* Check to see if argument to %c is really a constant
14240 and not a condition code which needs to be reversed. */
14241 if (!COMPARISON_P (x))
14243 output_operand_lossage ("operand is neither a constant nor a "
14244 "condition code, invalid operand "
14248 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
14251 if (!COMPARISON_P (x))
14253 output_operand_lossage ("operand is neither a constant nor a "
14254 "condition code, invalid operand "
14258 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14259 if (ASSEMBLER_DIALECT == ASM_ATT)
14262 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
14266 /* It doesn't actually matter what mode we use here, as we're
14267 only going to use this for printing. */
14268 x = adjust_address_nv (x, DImode, 8);
14276 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
14279 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14282 int pred_val = INTVAL (XEXP (x, 0));
14284 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14285 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14287 int taken = pred_val > REG_BR_PROB_BASE / 2;
14288 int cputaken = final_forward_branch_p (current_output_insn) == 0;
14290 /* Emit hints only in the case default branch prediction
14291 heuristics would fail. */
14292 if (taken != cputaken)
14294 /* We use 3e (DS) prefix for taken branches and
14295 2e (CS) prefix for not taken branches. */
14297 fputs ("ds ; ", file);
14299 fputs ("cs ; ", file);
14307 switch (GET_CODE (x))
14310 fputs ("neq", file);
14313 fputs ("eq", file);
14317 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14321 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14325 fputs ("le", file);
14329 fputs ("lt", file);
14332 fputs ("unord", file);
14335 fputs ("ord", file);
14338 fputs ("ueq", file);
14341 fputs ("nlt", file);
14344 fputs ("nle", file);
14347 fputs ("ule", file);
14350 fputs ("ult", file);
14353 fputs ("une", file);
14356 output_operand_lossage ("operand is not a condition code, "
14357 "invalid operand code 'Y'");
14363 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14369 if (ASSEMBLER_DIALECT == ASM_ATT)
14372 /* The kernel uses a different segment register for performance
14373 reasons; a system call would not have to trash the userspace
14374 segment register, which would be expensive. */
14375 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14376 fputs ("fs", file);
14378 fputs ("gs", file);
14382 putc (TARGET_AVX2 ? 'i' : 'f', file);
14386 output_operand_lossage ("invalid operand code '%c'", code);
14391 print_reg (x, code, file);
14393 else if (MEM_P (x))
14395 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14396 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14397 && GET_MODE (x) != BLKmode)
14400 switch (GET_MODE_SIZE (GET_MODE (x)))
14402 case 1: size = "BYTE"; break;
14403 case 2: size = "WORD"; break;
14404 case 4: size = "DWORD"; break;
14405 case 8: size = "QWORD"; break;
14406 case 12: size = "TBYTE"; break;
14408 if (GET_MODE (x) == XFmode)
14413 case 32: size = "YMMWORD"; break;
14415 gcc_unreachable ();
14418 /* Check for explicit size override (codes 'b', 'w', 'k',
14422 else if (code == 'w')
14424 else if (code == 'k')
14426 else if (code == 'q')
14428 else if (code == 'x')
14431 fputs (size, file);
14432 fputs (" PTR ", file);
14436 /* Avoid (%rip) for call operands. */
14437 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14438 && !CONST_INT_P (x))
14439 output_addr_const (file, x);
14440 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14441 output_operand_lossage ("invalid constraints for operand");
14443 output_address (x);
14446 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14451 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14452 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14454 if (ASSEMBLER_DIALECT == ASM_ATT)
14456 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14458 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14460 fprintf (file, "0x%08x", (unsigned int) l);
14463 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14468 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14469 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14471 if (ASSEMBLER_DIALECT == ASM_ATT)
14473 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14476 /* These float cases don't actually occur as immediate operands. */
14477 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14481 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14482 fputs (dstr, file);
14487 /* We have patterns that allow zero sets of memory, for instance.
14488 In 64-bit mode, we should probably support all 8-byte vectors,
14489 since we can in fact encode that into an immediate. */
14490 if (GET_CODE (x) == CONST_VECTOR)
14492 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14496 if (code != 'P' && code != 'p')
14498 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14500 if (ASSEMBLER_DIALECT == ASM_ATT)
14503 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14504 || GET_CODE (x) == LABEL_REF)
14506 if (ASSEMBLER_DIALECT == ASM_ATT)
14509 fputs ("OFFSET FLAT:", file);
14512 if (CONST_INT_P (x))
14513 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14514 else if (flag_pic || MACHOPIC_INDIRECT)
14515 output_pic_addr_const (file, x, code);
14517 output_addr_const (file, x);
14522 ix86_print_operand_punct_valid_p (unsigned char code)
14524 return (code == '@' || code == '*' || code == '+'
14525 || code == '&' || code == ';' || code == '~');
14528 /* Print a memory operand whose address is ADDR. */
14531 ix86_print_operand_address (FILE *file, rtx addr)
14533 struct ix86_address parts;
14534 rtx base, index, disp;
14539 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
14541 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14542 gcc_assert (parts.index == NULL_RTX);
14543 parts.index = XVECEXP (addr, 0, 1);
14544 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
14545 addr = XVECEXP (addr, 0, 0);
14549 ok = ix86_decompose_address (addr, &parts);
14553 if (parts.base && GET_CODE (parts.base) == SUBREG)
14555 rtx tmp = SUBREG_REG (parts.base);
14556 parts.base = simplify_subreg (GET_MODE (parts.base),
14557 tmp, GET_MODE (tmp), 0);
14560 if (parts.index && GET_CODE (parts.index) == SUBREG)
14562 rtx tmp = SUBREG_REG (parts.index);
14563 parts.index = simplify_subreg (GET_MODE (parts.index),
14564 tmp, GET_MODE (tmp), 0);
14568 index = parts.index;
14570 scale = parts.scale;
14578 if (ASSEMBLER_DIALECT == ASM_ATT)
14580 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14583 gcc_unreachable ();
14586 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14587 if (TARGET_64BIT && !base && !index)
14591 if (GET_CODE (disp) == CONST
14592 && GET_CODE (XEXP (disp, 0)) == PLUS
14593 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14594 symbol = XEXP (XEXP (disp, 0), 0);
14596 if (GET_CODE (symbol) == LABEL_REF
14597 || (GET_CODE (symbol) == SYMBOL_REF
14598 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14601 if (!base && !index)
14603 /* Displacement only requires special attention. */
14605 if (CONST_INT_P (disp))
14607 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14608 fputs ("ds:", file);
14609 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14612 output_pic_addr_const (file, disp, 0);
14614 output_addr_const (file, disp);
14620 /* Print SImode registers for zero-extended addresses to force
14621 addr32 prefix. Otherwise print DImode registers to avoid it. */
14623 code = ((GET_CODE (addr) == ZERO_EXTEND
14624 || GET_CODE (addr) == AND)
14628 if (ASSEMBLER_DIALECT == ASM_ATT)
14633 output_pic_addr_const (file, disp, 0);
14634 else if (GET_CODE (disp) == LABEL_REF)
14635 output_asm_label (disp);
14637 output_addr_const (file, disp);
14642 print_reg (base, code, file);
14646 print_reg (index, vsib ? 0 : code, file);
14647 if (scale != 1 || vsib)
14648 fprintf (file, ",%d", scale);
14654 rtx offset = NULL_RTX;
14658 /* Pull out the offset of a symbol; print any symbol itself. */
14659 if (GET_CODE (disp) == CONST
14660 && GET_CODE (XEXP (disp, 0)) == PLUS
14661 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14663 offset = XEXP (XEXP (disp, 0), 1);
14664 disp = gen_rtx_CONST (VOIDmode,
14665 XEXP (XEXP (disp, 0), 0));
14669 output_pic_addr_const (file, disp, 0);
14670 else if (GET_CODE (disp) == LABEL_REF)
14671 output_asm_label (disp);
14672 else if (CONST_INT_P (disp))
14675 output_addr_const (file, disp);
14681 print_reg (base, code, file);
14684 if (INTVAL (offset) >= 0)
14686 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14690 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14697 print_reg (index, vsib ? 0 : code, file);
14698 if (scale != 1 || vsib)
14699 fprintf (file, "*%d", scale);
14706 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14709 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14713 if (GET_CODE (x) != UNSPEC)
14716 op = XVECEXP (x, 0, 0);
14717 switch (XINT (x, 1))
14719 case UNSPEC_GOTTPOFF:
14720 output_addr_const (file, op);
14721 /* FIXME: This might be @TPOFF in Sun ld. */
14722 fputs ("@gottpoff", file);
14725 output_addr_const (file, op);
14726 fputs ("@tpoff", file);
14728 case UNSPEC_NTPOFF:
14729 output_addr_const (file, op);
14731 fputs ("@tpoff", file);
14733 fputs ("@ntpoff", file);
14735 case UNSPEC_DTPOFF:
14736 output_addr_const (file, op);
14737 fputs ("@dtpoff", file);
14739 case UNSPEC_GOTNTPOFF:
14740 output_addr_const (file, op);
14742 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14743 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14745 fputs ("@gotntpoff", file);
14747 case UNSPEC_INDNTPOFF:
14748 output_addr_const (file, op);
14749 fputs ("@indntpoff", file);
14752 case UNSPEC_MACHOPIC_OFFSET:
14753 output_addr_const (file, op);
14755 machopic_output_function_base_name (file);
14759 case UNSPEC_STACK_CHECK:
14763 gcc_assert (flag_split_stack);
14765 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14766 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14768 gcc_unreachable ();
14771 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14782 /* Split one or more double-mode RTL references into pairs of half-mode
14783 references. The RTL can be REG, offsettable MEM, integer constant, or
14784 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14785 split and "num" is its length. lo_half and hi_half are output arrays
14786 that parallel "operands". */
14789 split_double_mode (enum machine_mode mode, rtx operands[],
14790 int num, rtx lo_half[], rtx hi_half[])
14792 enum machine_mode half_mode;
14798 half_mode = DImode;
14801 half_mode = SImode;
14804 gcc_unreachable ();
14807 byte = GET_MODE_SIZE (half_mode);
14811 rtx op = operands[num];
14813 /* simplify_subreg refuse to split volatile memory addresses,
14814 but we still have to handle it. */
14817 lo_half[num] = adjust_address (op, half_mode, 0);
14818 hi_half[num] = adjust_address (op, half_mode, byte);
14822 lo_half[num] = simplify_gen_subreg (half_mode, op,
14823 GET_MODE (op) == VOIDmode
14824 ? mode : GET_MODE (op), 0);
14825 hi_half[num] = simplify_gen_subreg (half_mode, op,
14826 GET_MODE (op) == VOIDmode
14827 ? mode : GET_MODE (op), byte);
14832 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14833 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14834 is the expression of the binary operation. The output may either be
14835 emitted here, or returned to the caller, like all output_* functions.
14837 There is no guarantee that the operands are the same mode, as they
14838 might be within FLOAT or FLOAT_EXTEND expressions. */
14840 #ifndef SYSV386_COMPAT
14841 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14842 wants to fix the assemblers because that causes incompatibility
14843 with gcc. No-one wants to fix gcc because that causes
14844 incompatibility with assemblers... You can use the option of
14845 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14846 #define SYSV386_COMPAT 1
14850 output_387_binary_op (rtx insn, rtx *operands)
14852 static char buf[40];
14855 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14857 #ifdef ENABLE_CHECKING
14858 /* Even if we do not want to check the inputs, this documents input
14859 constraints. Which helps in understanding the following code. */
14860 if (STACK_REG_P (operands[0])
14861 && ((REG_P (operands[1])
14862 && REGNO (operands[0]) == REGNO (operands[1])
14863 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14864 || (REG_P (operands[2])
14865 && REGNO (operands[0]) == REGNO (operands[2])
14866 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14867 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14870 gcc_assert (is_sse);
14873 switch (GET_CODE (operands[3]))
14876 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14877 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14885 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14886 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14894 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14895 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14903 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14904 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14912 gcc_unreachable ();
14919 strcpy (buf, ssep);
14920 if (GET_MODE (operands[0]) == SFmode)
14921 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14923 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14927 strcpy (buf, ssep + 1);
14928 if (GET_MODE (operands[0]) == SFmode)
14929 strcat (buf, "ss\t{%2, %0|%0, %2}");
14931 strcat (buf, "sd\t{%2, %0|%0, %2}");
14937 switch (GET_CODE (operands[3]))
14941 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14943 rtx temp = operands[2];
14944 operands[2] = operands[1];
14945 operands[1] = temp;
14948 /* know operands[0] == operands[1]. */
14950 if (MEM_P (operands[2]))
14956 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14958 if (STACK_TOP_P (operands[0]))
14959 /* How is it that we are storing to a dead operand[2]?
14960 Well, presumably operands[1] is dead too. We can't
14961 store the result to st(0) as st(0) gets popped on this
14962 instruction. Instead store to operands[2] (which I
14963 think has to be st(1)). st(1) will be popped later.
14964 gcc <= 2.8.1 didn't have this check and generated
14965 assembly code that the Unixware assembler rejected. */
14966 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14968 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14972 if (STACK_TOP_P (operands[0]))
14973 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14975 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14980 if (MEM_P (operands[1]))
14986 if (MEM_P (operands[2]))
14992 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14995 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14996 derived assemblers, confusingly reverse the direction of
14997 the operation for fsub{r} and fdiv{r} when the
14998 destination register is not st(0). The Intel assembler
14999 doesn't have this brain damage. Read !SYSV386_COMPAT to
15000 figure out what the hardware really does. */
15001 if (STACK_TOP_P (operands[0]))
15002 p = "{p\t%0, %2|rp\t%2, %0}";
15004 p = "{rp\t%2, %0|p\t%0, %2}";
15006 if (STACK_TOP_P (operands[0]))
15007 /* As above for fmul/fadd, we can't store to st(0). */
15008 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15010 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15015 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15018 if (STACK_TOP_P (operands[0]))
15019 p = "{rp\t%0, %1|p\t%1, %0}";
15021 p = "{p\t%1, %0|rp\t%0, %1}";
15023 if (STACK_TOP_P (operands[0]))
15024 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15026 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15031 if (STACK_TOP_P (operands[0]))
15033 if (STACK_TOP_P (operands[1]))
15034 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15036 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15039 else if (STACK_TOP_P (operands[1]))
15042 p = "{\t%1, %0|r\t%0, %1}";
15044 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15050 p = "{r\t%2, %0|\t%0, %2}";
15052 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15058 gcc_unreachable ();
15065 /* Return needed mode for entity in optimize_mode_switching pass. */
15068 ix86_mode_needed (int entity, rtx insn)
15070 enum attr_i387_cw mode;
15072 /* The mode UNINITIALIZED is used to store control word after a
15073 function call or ASM pattern. The mode ANY specify that function
15074 has no requirements on the control word and make no changes in the
15075 bits we are interested in. */
15078 || (NONJUMP_INSN_P (insn)
15079 && (asm_noperands (PATTERN (insn)) >= 0
15080 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15081 return I387_CW_UNINITIALIZED;
15083 if (recog_memoized (insn) < 0)
15084 return I387_CW_ANY;
15086 mode = get_attr_i387_cw (insn);
15091 if (mode == I387_CW_TRUNC)
15096 if (mode == I387_CW_FLOOR)
15101 if (mode == I387_CW_CEIL)
15106 if (mode == I387_CW_MASK_PM)
15111 gcc_unreachable ();
15114 return I387_CW_ANY;
15117 /* Output code to initialize control word copies used by trunc?f?i and
15118 rounding patterns. CURRENT_MODE is set to current control word,
15119 while NEW_MODE is set to new control word. */
15122 emit_i387_cw_initialization (int mode)
15124 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15127 enum ix86_stack_slot slot;
15129 rtx reg = gen_reg_rtx (HImode);
15131 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15132 emit_move_insn (reg, copy_rtx (stored_mode));
15134 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
15135 || optimize_function_for_size_p (cfun))
15139 case I387_CW_TRUNC:
15140 /* round toward zero (truncate) */
15141 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15142 slot = SLOT_CW_TRUNC;
15145 case I387_CW_FLOOR:
15146 /* round down toward -oo */
15147 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15148 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15149 slot = SLOT_CW_FLOOR;
15153 /* round up toward +oo */
15154 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15155 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15156 slot = SLOT_CW_CEIL;
15159 case I387_CW_MASK_PM:
15160 /* mask precision exception for nearbyint() */
15161 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15162 slot = SLOT_CW_MASK_PM;
15166 gcc_unreachable ();
15173 case I387_CW_TRUNC:
15174 /* round toward zero (truncate) */
15175 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
15176 slot = SLOT_CW_TRUNC;
15179 case I387_CW_FLOOR:
15180 /* round down toward -oo */
15181 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
15182 slot = SLOT_CW_FLOOR;
15186 /* round up toward +oo */
15187 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
15188 slot = SLOT_CW_CEIL;
15191 case I387_CW_MASK_PM:
15192 /* mask precision exception for nearbyint() */
15193 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15194 slot = SLOT_CW_MASK_PM;
15198 gcc_unreachable ();
15202 gcc_assert (slot < MAX_386_STACK_LOCALS);
15204 new_mode = assign_386_stack_local (HImode, slot);
15205 emit_move_insn (new_mode, reg);
15208 /* Output code for INSN to convert a float to a signed int. OPERANDS
15209 are the insn operands. The output may be [HSD]Imode and the input
15210 operand may be [SDX]Fmode. */
15213 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
15215 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15216 int dimode_p = GET_MODE (operands[0]) == DImode;
15217 int round_mode = get_attr_i387_cw (insn);
15219 /* Jump through a hoop or two for DImode, since the hardware has no
15220 non-popping instruction. We used to do this a different way, but
15221 that was somewhat fragile and broke with post-reload splitters. */
15222 if ((dimode_p || fisttp) && !stack_top_dies)
15223 output_asm_insn ("fld\t%y1", operands);
15225 gcc_assert (STACK_TOP_P (operands[1]));
15226 gcc_assert (MEM_P (operands[0]));
15227 gcc_assert (GET_MODE (operands[1]) != TFmode);
15230 output_asm_insn ("fisttp%Z0\t%0", operands);
15233 if (round_mode != I387_CW_ANY)
15234 output_asm_insn ("fldcw\t%3", operands);
15235 if (stack_top_dies || dimode_p)
15236 output_asm_insn ("fistp%Z0\t%0", operands);
15238 output_asm_insn ("fist%Z0\t%0", operands);
15239 if (round_mode != I387_CW_ANY)
15240 output_asm_insn ("fldcw\t%2", operands);
15246 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15247 have the values zero or one, indicates the ffreep insn's operand
15248 from the OPERANDS array. */
15250 static const char *
15251 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15253 if (TARGET_USE_FFREEP)
15254 #ifdef HAVE_AS_IX86_FFREEP
15255 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15258 static char retval[32];
15259 int regno = REGNO (operands[opno]);
15261 gcc_assert (FP_REGNO_P (regno));
15263 regno -= FIRST_STACK_REG;
15265 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15270 return opno ? "fstp\t%y1" : "fstp\t%y0";
15274 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15275 should be used. UNORDERED_P is true when fucom should be used. */
15278 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
15280 int stack_top_dies;
15281 rtx cmp_op0, cmp_op1;
15282 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
15286 cmp_op0 = operands[0];
15287 cmp_op1 = operands[1];
15291 cmp_op0 = operands[1];
15292 cmp_op1 = operands[2];
15297 if (GET_MODE (operands[0]) == SFmode)
15299 return "%vucomiss\t{%1, %0|%0, %1}";
15301 return "%vcomiss\t{%1, %0|%0, %1}";
15304 return "%vucomisd\t{%1, %0|%0, %1}";
15306 return "%vcomisd\t{%1, %0|%0, %1}";
15309 gcc_assert (STACK_TOP_P (cmp_op0));
15311 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15313 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15315 if (stack_top_dies)
15317 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15318 return output_387_ffreep (operands, 1);
15321 return "ftst\n\tfnstsw\t%0";
15324 if (STACK_REG_P (cmp_op1)
15326 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15327 && REGNO (cmp_op1) != FIRST_STACK_REG)
15329 /* If both the top of the 387 stack dies, and the other operand
15330 is also a stack register that dies, then this must be a
15331 `fcompp' float compare */
15335 /* There is no double popping fcomi variant. Fortunately,
15336 eflags is immune from the fstp's cc clobbering. */
15338 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15340 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15341 return output_387_ffreep (operands, 0);
15346 return "fucompp\n\tfnstsw\t%0";
15348 return "fcompp\n\tfnstsw\t%0";
15353 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15355 static const char * const alt[16] =
15357 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15358 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15359 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15360 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15362 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15363 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15367 "fcomi\t{%y1, %0|%0, %y1}",
15368 "fcomip\t{%y1, %0|%0, %y1}",
15369 "fucomi\t{%y1, %0|%0, %y1}",
15370 "fucomip\t{%y1, %0|%0, %y1}",
15381 mask = eflags_p << 3;
15382 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15383 mask |= unordered_p << 1;
15384 mask |= stack_top_dies;
15386 gcc_assert (mask < 16);
15395 ix86_output_addr_vec_elt (FILE *file, int value)
15397 const char *directive = ASM_LONG;
15401 directive = ASM_QUAD;
15403 gcc_assert (!TARGET_64BIT);
15406 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15410 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15412 const char *directive = ASM_LONG;
15415 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15416 directive = ASM_QUAD;
15418 gcc_assert (!TARGET_64BIT);
15420 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15421 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15422 fprintf (file, "%s%s%d-%s%d\n",
15423 directive, LPREFIX, value, LPREFIX, rel);
15424 else if (HAVE_AS_GOTOFF_IN_DATA)
15425 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15427 else if (TARGET_MACHO)
15429 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15430 machopic_output_function_base_name (file);
15435 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15436 GOT_SYMBOL_NAME, LPREFIX, value);
15439 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15443 ix86_expand_clear (rtx dest)
15447 /* We play register width games, which are only valid after reload. */
15448 gcc_assert (reload_completed);
15450 /* Avoid HImode and its attendant prefix byte. */
15451 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15452 dest = gen_rtx_REG (SImode, REGNO (dest));
15453 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15455 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15456 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15458 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15459 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15465 /* X is an unchanging MEM. If it is a constant pool reference, return
15466 the constant pool rtx, else NULL. */
15469 maybe_get_pool_constant (rtx x)
15471 x = ix86_delegitimize_address (XEXP (x, 0));
15473 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15474 return get_pool_constant (x);
15480 ix86_expand_move (enum machine_mode mode, rtx operands[])
15483 enum tls_model model;
15488 if (GET_CODE (op1) == SYMBOL_REF)
15490 model = SYMBOL_REF_TLS_MODEL (op1);
15493 op1 = legitimize_tls_address (op1, model, true);
15494 op1 = force_operand (op1, op0);
15497 if (GET_MODE (op1) != mode)
15498 op1 = convert_to_mode (mode, op1, 1);
15500 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15501 && SYMBOL_REF_DLLIMPORT_P (op1))
15502 op1 = legitimize_dllimport_symbol (op1, false);
15504 else if (GET_CODE (op1) == CONST
15505 && GET_CODE (XEXP (op1, 0)) == PLUS
15506 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15508 rtx addend = XEXP (XEXP (op1, 0), 1);
15509 rtx symbol = XEXP (XEXP (op1, 0), 0);
15512 model = SYMBOL_REF_TLS_MODEL (symbol);
15514 tmp = legitimize_tls_address (symbol, model, true);
15515 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15516 && SYMBOL_REF_DLLIMPORT_P (symbol))
15517 tmp = legitimize_dllimport_symbol (symbol, true);
15521 tmp = force_operand (tmp, NULL);
15522 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15523 op0, 1, OPTAB_DIRECT);
15526 if (GET_MODE (tmp) != mode)
15527 op1 = convert_to_mode (mode, tmp, 1);
15531 if ((flag_pic || MACHOPIC_INDIRECT)
15532 && symbolic_operand (op1, mode))
15534 if (TARGET_MACHO && !TARGET_64BIT)
15537 /* dynamic-no-pic */
15538 if (MACHOPIC_INDIRECT)
15540 rtx temp = ((reload_in_progress
15541 || ((op0 && REG_P (op0))
15543 ? op0 : gen_reg_rtx (Pmode));
15544 op1 = machopic_indirect_data_reference (op1, temp);
15546 op1 = machopic_legitimize_pic_address (op1, mode,
15547 temp == op1 ? 0 : temp);
15549 if (op0 != op1 && GET_CODE (op0) != MEM)
15551 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15555 if (GET_CODE (op0) == MEM)
15556 op1 = force_reg (Pmode, op1);
15560 if (GET_CODE (temp) != REG)
15561 temp = gen_reg_rtx (Pmode);
15562 temp = legitimize_pic_address (op1, temp);
15567 /* dynamic-no-pic */
15573 op1 = force_reg (mode, op1);
15574 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
15576 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15577 op1 = legitimize_pic_address (op1, reg);
15580 if (GET_MODE (op1) != mode)
15581 op1 = convert_to_mode (mode, op1, 1);
15588 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15589 || !push_operand (op0, mode))
15591 op1 = force_reg (mode, op1);
15593 if (push_operand (op0, mode)
15594 && ! general_no_elim_operand (op1, mode))
15595 op1 = copy_to_mode_reg (mode, op1);
15597 /* Force large constants in 64bit compilation into register
15598 to get them CSEed. */
15599 if (can_create_pseudo_p ()
15600 && (mode == DImode) && TARGET_64BIT
15601 && immediate_operand (op1, mode)
15602 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15603 && !register_operand (op0, mode)
15605 op1 = copy_to_mode_reg (mode, op1);
15607 if (can_create_pseudo_p ()
15608 && FLOAT_MODE_P (mode)
15609 && GET_CODE (op1) == CONST_DOUBLE)
15611 /* If we are loading a floating point constant to a register,
15612 force the value to memory now, since we'll get better code
15613 out the back end. */
15615 op1 = validize_mem (force_const_mem (mode, op1));
15616 if (!register_operand (op0, mode))
15618 rtx temp = gen_reg_rtx (mode);
15619 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15620 emit_move_insn (op0, temp);
15626 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15630 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15632 rtx op0 = operands[0], op1 = operands[1];
15633 unsigned int align = GET_MODE_ALIGNMENT (mode);
15635 /* Force constants other than zero into memory. We do not know how
15636 the instructions used to build constants modify the upper 64 bits
15637 of the register, once we have that information we may be able
15638 to handle some of them more efficiently. */
15639 if (can_create_pseudo_p ()
15640 && register_operand (op0, mode)
15641 && (CONSTANT_P (op1)
15642 || (GET_CODE (op1) == SUBREG
15643 && CONSTANT_P (SUBREG_REG (op1))))
15644 && !standard_sse_constant_p (op1))
15645 op1 = validize_mem (force_const_mem (mode, op1));
15647 /* We need to check memory alignment for SSE mode since attribute
15648 can make operands unaligned. */
15649 if (can_create_pseudo_p ()
15650 && SSE_REG_MODE_P (mode)
15651 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15652 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15656 /* ix86_expand_vector_move_misalign() does not like constants ... */
15657 if (CONSTANT_P (op1)
15658 || (GET_CODE (op1) == SUBREG
15659 && CONSTANT_P (SUBREG_REG (op1))))
15660 op1 = validize_mem (force_const_mem (mode, op1));
15662 /* ... nor both arguments in memory. */
15663 if (!register_operand (op0, mode)
15664 && !register_operand (op1, mode))
15665 op1 = force_reg (mode, op1);
15667 tmp[0] = op0; tmp[1] = op1;
15668 ix86_expand_vector_move_misalign (mode, tmp);
15672 /* Make operand1 a register if it isn't already. */
15673 if (can_create_pseudo_p ()
15674 && !register_operand (op0, mode)
15675 && !register_operand (op1, mode))
15677 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15681 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15684 /* Split 32-byte AVX unaligned load and store if needed. */
15687 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15690 rtx (*extract) (rtx, rtx, rtx);
15691 rtx (*move_unaligned) (rtx, rtx);
15692 enum machine_mode mode;
15694 switch (GET_MODE (op0))
15697 gcc_unreachable ();
15699 extract = gen_avx_vextractf128v32qi;
15700 move_unaligned = gen_avx_movdqu256;
15704 extract = gen_avx_vextractf128v8sf;
15705 move_unaligned = gen_avx_movups256;
15709 extract = gen_avx_vextractf128v4df;
15710 move_unaligned = gen_avx_movupd256;
15715 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15717 rtx r = gen_reg_rtx (mode);
15718 m = adjust_address (op1, mode, 0);
15719 emit_move_insn (r, m);
15720 m = adjust_address (op1, mode, 16);
15721 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15722 emit_move_insn (op0, r);
15724 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15726 m = adjust_address (op0, mode, 0);
15727 emit_insn (extract (m, op1, const0_rtx));
15728 m = adjust_address (op0, mode, 16);
15729 emit_insn (extract (m, op1, const1_rtx));
15732 emit_insn (move_unaligned (op0, op1));
15735 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15736 straight to ix86_expand_vector_move. */
15737 /* Code generation for scalar reg-reg moves of single and double precision data:
15738 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15742 if (x86_sse_partial_reg_dependency == true)
15747 Code generation for scalar loads of double precision data:
15748 if (x86_sse_split_regs == true)
15749 movlpd mem, reg (gas syntax)
15753 Code generation for unaligned packed loads of single precision data
15754 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15755 if (x86_sse_unaligned_move_optimal)
15758 if (x86_sse_partial_reg_dependency == true)
15770 Code generation for unaligned packed loads of double precision data
15771 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15772 if (x86_sse_unaligned_move_optimal)
15775 if (x86_sse_split_regs == true)
15788 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15797 switch (GET_MODE_CLASS (mode))
15799 case MODE_VECTOR_INT:
15801 switch (GET_MODE_SIZE (mode))
15804 /* If we're optimizing for size, movups is the smallest. */
15805 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15807 op0 = gen_lowpart (V4SFmode, op0);
15808 op1 = gen_lowpart (V4SFmode, op1);
15809 emit_insn (gen_sse_movups (op0, op1));
15812 op0 = gen_lowpart (V16QImode, op0);
15813 op1 = gen_lowpart (V16QImode, op1);
15814 emit_insn (gen_sse2_movdqu (op0, op1));
15817 op0 = gen_lowpart (V32QImode, op0);
15818 op1 = gen_lowpart (V32QImode, op1);
15819 ix86_avx256_split_vector_move_misalign (op0, op1);
15822 gcc_unreachable ();
15825 case MODE_VECTOR_FLOAT:
15826 op0 = gen_lowpart (mode, op0);
15827 op1 = gen_lowpart (mode, op1);
15832 emit_insn (gen_sse_movups (op0, op1));
15835 ix86_avx256_split_vector_move_misalign (op0, op1);
15838 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15840 op0 = gen_lowpart (V4SFmode, op0);
15841 op1 = gen_lowpart (V4SFmode, op1);
15842 emit_insn (gen_sse_movups (op0, op1));
15845 emit_insn (gen_sse2_movupd (op0, op1));
15848 ix86_avx256_split_vector_move_misalign (op0, op1);
15851 gcc_unreachable ();
15856 gcc_unreachable ();
15864 /* If we're optimizing for size, movups is the smallest. */
15865 if (optimize_insn_for_size_p ()
15866 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15868 op0 = gen_lowpart (V4SFmode, op0);
15869 op1 = gen_lowpart (V4SFmode, op1);
15870 emit_insn (gen_sse_movups (op0, op1));
15874 /* ??? If we have typed data, then it would appear that using
15875 movdqu is the only way to get unaligned data loaded with
15877 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15879 op0 = gen_lowpart (V16QImode, op0);
15880 op1 = gen_lowpart (V16QImode, op1);
15881 emit_insn (gen_sse2_movdqu (op0, op1));
15885 if (TARGET_SSE2 && mode == V2DFmode)
15889 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15891 op0 = gen_lowpart (V2DFmode, op0);
15892 op1 = gen_lowpart (V2DFmode, op1);
15893 emit_insn (gen_sse2_movupd (op0, op1));
15897 /* When SSE registers are split into halves, we can avoid
15898 writing to the top half twice. */
15899 if (TARGET_SSE_SPLIT_REGS)
15901 emit_clobber (op0);
15906 /* ??? Not sure about the best option for the Intel chips.
15907 The following would seem to satisfy; the register is
15908 entirely cleared, breaking the dependency chain. We
15909 then store to the upper half, with a dependency depth
15910 of one. A rumor has it that Intel recommends two movsd
15911 followed by an unpacklpd, but this is unconfirmed. And
15912 given that the dependency depth of the unpacklpd would
15913 still be one, I'm not sure why this would be better. */
15914 zero = CONST0_RTX (V2DFmode);
15917 m = adjust_address (op1, DFmode, 0);
15918 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15919 m = adjust_address (op1, DFmode, 8);
15920 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15924 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15926 op0 = gen_lowpart (V4SFmode, op0);
15927 op1 = gen_lowpart (V4SFmode, op1);
15928 emit_insn (gen_sse_movups (op0, op1));
15932 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15933 emit_move_insn (op0, CONST0_RTX (mode));
15935 emit_clobber (op0);
15937 if (mode != V4SFmode)
15938 op0 = gen_lowpart (V4SFmode, op0);
15939 m = adjust_address (op1, V2SFmode, 0);
15940 emit_insn (gen_sse_loadlps (op0, op0, m));
15941 m = adjust_address (op1, V2SFmode, 8);
15942 emit_insn (gen_sse_loadhps (op0, op0, m));
15945 else if (MEM_P (op0))
15947 /* If we're optimizing for size, movups is the smallest. */
15948 if (optimize_insn_for_size_p ()
15949 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15951 op0 = gen_lowpart (V4SFmode, op0);
15952 op1 = gen_lowpart (V4SFmode, op1);
15953 emit_insn (gen_sse_movups (op0, op1));
15957 /* ??? Similar to above, only less clear because of quote
15958 typeless stores unquote. */
15959 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15960 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15962 op0 = gen_lowpart (V16QImode, op0);
15963 op1 = gen_lowpart (V16QImode, op1);
15964 emit_insn (gen_sse2_movdqu (op0, op1));
15968 if (TARGET_SSE2 && mode == V2DFmode)
15970 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15972 op0 = gen_lowpart (V2DFmode, op0);
15973 op1 = gen_lowpart (V2DFmode, op1);
15974 emit_insn (gen_sse2_movupd (op0, op1));
15978 m = adjust_address (op0, DFmode, 0);
15979 emit_insn (gen_sse2_storelpd (m, op1));
15980 m = adjust_address (op0, DFmode, 8);
15981 emit_insn (gen_sse2_storehpd (m, op1));
15986 if (mode != V4SFmode)
15987 op1 = gen_lowpart (V4SFmode, op1);
15989 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15991 op0 = gen_lowpart (V4SFmode, op0);
15992 emit_insn (gen_sse_movups (op0, op1));
15996 m = adjust_address (op0, V2SFmode, 0);
15997 emit_insn (gen_sse_storelps (m, op1));
15998 m = adjust_address (op0, V2SFmode, 8);
15999 emit_insn (gen_sse_storehps (m, op1));
16004 gcc_unreachable ();
16007 /* Expand a push in MODE. This is some mode for which we do not support
16008 proper push instructions, at least from the registers that we expect
16009 the value to live in. */
16012 ix86_expand_push (enum machine_mode mode, rtx x)
16016 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
16017 GEN_INT (-GET_MODE_SIZE (mode)),
16018 stack_pointer_rtx, 1, OPTAB_DIRECT);
16019 if (tmp != stack_pointer_rtx)
16020 emit_move_insn (stack_pointer_rtx, tmp);
16022 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
16024 /* When we push an operand onto stack, it has to be aligned at least
16025 at the function argument boundary. However since we don't have
16026 the argument type, we can't determine the actual argument
16028 emit_move_insn (tmp, x);
16031 /* Helper function of ix86_fixup_binary_operands to canonicalize
16032 operand order. Returns true if the operands should be swapped. */
16035 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
16038 rtx dst = operands[0];
16039 rtx src1 = operands[1];
16040 rtx src2 = operands[2];
16042 /* If the operation is not commutative, we can't do anything. */
16043 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
16046 /* Highest priority is that src1 should match dst. */
16047 if (rtx_equal_p (dst, src1))
16049 if (rtx_equal_p (dst, src2))
16052 /* Next highest priority is that immediate constants come second. */
16053 if (immediate_operand (src2, mode))
16055 if (immediate_operand (src1, mode))
16058 /* Lowest priority is that memory references should come second. */
16068 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16069 destination to use for the operation. If different from the true
16070 destination in operands[0], a copy operation will be required. */
16073 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
16076 rtx dst = operands[0];
16077 rtx src1 = operands[1];
16078 rtx src2 = operands[2];
16080 /* Canonicalize operand order. */
16081 if (ix86_swap_binary_operands_p (code, mode, operands))
16085 /* It is invalid to swap operands of different modes. */
16086 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
16093 /* Both source operands cannot be in memory. */
16094 if (MEM_P (src1) && MEM_P (src2))
16096 /* Optimization: Only read from memory once. */
16097 if (rtx_equal_p (src1, src2))
16099 src2 = force_reg (mode, src2);
16103 src2 = force_reg (mode, src2);
16106 /* If the destination is memory, and we do not have matching source
16107 operands, do things in registers. */
16108 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16109 dst = gen_reg_rtx (mode);
16111 /* Source 1 cannot be a constant. */
16112 if (CONSTANT_P (src1))
16113 src1 = force_reg (mode, src1);
16115 /* Source 1 cannot be a non-matching memory. */
16116 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16117 src1 = force_reg (mode, src1);
16119 /* Improve address combine. */
16121 && GET_MODE_CLASS (mode) == MODE_INT
16123 src2 = force_reg (mode, src2);
16125 operands[1] = src1;
16126 operands[2] = src2;
16130 /* Similarly, but assume that the destination has already been
16131 set up properly. */
16134 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
16135 enum machine_mode mode, rtx operands[])
16137 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
16138 gcc_assert (dst == operands[0]);
16141 /* Attempt to expand a binary operator. Make the expansion closer to the
16142 actual machine, then just general_operand, which will allow 3 separate
16143 memory references (one output, two input) in a single insn. */
16146 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
16149 rtx src1, src2, dst, op, clob;
16151 dst = ix86_fixup_binary_operands (code, mode, operands);
16152 src1 = operands[1];
16153 src2 = operands[2];
16155 /* Emit the instruction. */
16157 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
16158 if (reload_in_progress)
16160 /* Reload doesn't know about the flags register, and doesn't know that
16161 it doesn't want to clobber it. We can only do this with PLUS. */
16162 gcc_assert (code == PLUS);
16165 else if (reload_completed
16167 && !rtx_equal_p (dst, src1))
16169 /* This is going to be an LEA; avoid splitting it later. */
16174 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16175 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16178 /* Fix up the destination if needed. */
16179 if (dst != operands[0])
16180 emit_move_insn (operands[0], dst);
16183 /* Return TRUE or FALSE depending on whether the binary operator meets the
16184 appropriate constraints. */
16187 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
16190 rtx dst = operands[0];
16191 rtx src1 = operands[1];
16192 rtx src2 = operands[2];
16194 /* Both source operands cannot be in memory. */
16195 if (MEM_P (src1) && MEM_P (src2))
16198 /* Canonicalize operand order for commutative operators. */
16199 if (ix86_swap_binary_operands_p (code, mode, operands))
16206 /* If the destination is memory, we must have a matching source operand. */
16207 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16210 /* Source 1 cannot be a constant. */
16211 if (CONSTANT_P (src1))
16214 /* Source 1 cannot be a non-matching memory. */
16215 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16216 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16217 return (code == AND
16220 || (TARGET_64BIT && mode == DImode))
16221 && satisfies_constraint_L (src2));
16226 /* Attempt to expand a unary operator. Make the expansion closer to the
16227 actual machine, then just general_operand, which will allow 2 separate
16228 memory references (one output, one input) in a single insn. */
16231 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
16234 int matching_memory;
16235 rtx src, dst, op, clob;
16240 /* If the destination is memory, and we do not have matching source
16241 operands, do things in registers. */
16242 matching_memory = 0;
16245 if (rtx_equal_p (dst, src))
16246 matching_memory = 1;
16248 dst = gen_reg_rtx (mode);
16251 /* When source operand is memory, destination must match. */
16252 if (MEM_P (src) && !matching_memory)
16253 src = force_reg (mode, src);
16255 /* Emit the instruction. */
16257 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
16258 if (reload_in_progress || code == NOT)
16260 /* Reload doesn't know about the flags register, and doesn't know that
16261 it doesn't want to clobber it. */
16262 gcc_assert (code == NOT);
16267 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16268 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16271 /* Fix up the destination if needed. */
16272 if (dst != operands[0])
16273 emit_move_insn (operands[0], dst);
16276 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16277 divisor are within the range [0-255]. */
16280 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
16283 rtx end_label, qimode_label;
16284 rtx insn, div, mod;
16285 rtx scratch, tmp0, tmp1, tmp2;
16286 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
16287 rtx (*gen_zero_extend) (rtx, rtx);
16288 rtx (*gen_test_ccno_1) (rtx, rtx);
16293 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
16294 gen_test_ccno_1 = gen_testsi_ccno_1;
16295 gen_zero_extend = gen_zero_extendqisi2;
16298 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
16299 gen_test_ccno_1 = gen_testdi_ccno_1;
16300 gen_zero_extend = gen_zero_extendqidi2;
16303 gcc_unreachable ();
16306 end_label = gen_label_rtx ();
16307 qimode_label = gen_label_rtx ();
16309 scratch = gen_reg_rtx (mode);
16311 /* Use 8bit unsigned divimod if dividend and divisor are within
16312 the range [0-255]. */
16313 emit_move_insn (scratch, operands[2]);
16314 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
16315 scratch, 1, OPTAB_DIRECT);
16316 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
16317 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
16318 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16319 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16320 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16322 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16323 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16324 JUMP_LABEL (insn) = qimode_label;
16326 /* Generate original signed/unsigned divimod. */
16327 div = gen_divmod4_1 (operands[0], operands[1],
16328 operands[2], operands[3]);
16331 /* Branch to the end. */
16332 emit_jump_insn (gen_jump (end_label));
16335 /* Generate 8bit unsigned divide. */
16336 emit_label (qimode_label);
16337 /* Don't use operands[0] for result of 8bit divide since not all
16338 registers support QImode ZERO_EXTRACT. */
16339 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16340 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16341 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16342 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16346 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16347 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16351 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16352 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16355 /* Extract remainder from AH. */
16356 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16357 if (REG_P (operands[1]))
16358 insn = emit_move_insn (operands[1], tmp1);
16361 /* Need a new scratch register since the old one has result
16363 scratch = gen_reg_rtx (mode);
16364 emit_move_insn (scratch, tmp1);
16365 insn = emit_move_insn (operands[1], scratch);
16367 set_unique_reg_note (insn, REG_EQUAL, mod);
16369 /* Zero extend quotient from AL. */
16370 tmp1 = gen_lowpart (QImode, tmp0);
16371 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16372 set_unique_reg_note (insn, REG_EQUAL, div);
16374 emit_label (end_label);
16377 #define LEA_MAX_STALL (3)
16378 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16380 /* Increase given DISTANCE in half-cycles according to
16381 dependencies between PREV and NEXT instructions.
16382 Add 1 half-cycle if there is no dependency and
16383 go to next cycle if there is some dependecy. */
16385 static unsigned int
16386 increase_distance (rtx prev, rtx next, unsigned int distance)
16391 if (!prev || !next)
16392 return distance + (distance & 1) + 2;
16394 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
16395 return distance + 1;
16397 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16398 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16399 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
16400 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
16401 return distance + (distance & 1) + 2;
16403 return distance + 1;
16406 /* Function checks if instruction INSN defines register number
16407 REGNO1 or REGNO2. */
16410 insn_defines_reg (unsigned int regno1, unsigned int regno2,
16415 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
16416 if (DF_REF_REG_DEF_P (*def_rec)
16417 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16418 && (regno1 == DF_REF_REGNO (*def_rec)
16419 || regno2 == DF_REF_REGNO (*def_rec)))
16427 /* Function checks if instruction INSN uses register number
16428 REGNO as a part of address expression. */
16431 insn_uses_reg_mem (unsigned int regno, rtx insn)
16435 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
16436 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
16442 /* Search backward for non-agu definition of register number REGNO1
16443 or register number REGNO2 in basic block starting from instruction
16444 START up to head of basic block or instruction INSN.
16446 Function puts true value into *FOUND var if definition was found
16447 and false otherwise.
16449 Distance in half-cycles between START and found instruction or head
16450 of BB is added to DISTANCE and returned. */
16453 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
16454 rtx insn, int distance,
16455 rtx start, bool *found)
16457 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16460 enum attr_type insn_type;
16466 && distance < LEA_SEARCH_THRESHOLD)
16468 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
16470 distance = increase_distance (prev, next, distance);
16471 if (insn_defines_reg (regno1, regno2, prev))
16473 insn_type = get_attr_type (prev);
16474 if (insn_type != TYPE_LEA)
16483 if (prev == BB_HEAD (bb))
16486 prev = PREV_INSN (prev);
16492 /* Search backward for non-agu definition of register number REGNO1
16493 or register number REGNO2 in INSN's basic block until
16494 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16495 2. Reach neighbour BBs boundary, or
16496 3. Reach agu definition.
16497 Returns the distance between the non-agu definition point and INSN.
16498 If no definition point, returns -1. */
16501 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16504 basic_block bb = BLOCK_FOR_INSN (insn);
16506 bool found = false;
16508 if (insn != BB_HEAD (bb))
16509 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
16510 distance, PREV_INSN (insn),
16513 if (!found && distance < LEA_SEARCH_THRESHOLD)
16517 bool simple_loop = false;
16519 FOR_EACH_EDGE (e, ei, bb->preds)
16522 simple_loop = true;
16527 distance = distance_non_agu_define_in_bb (regno1, regno2,
16529 BB_END (bb), &found);
16532 int shortest_dist = -1;
16533 bool found_in_bb = false;
16535 FOR_EACH_EDGE (e, ei, bb->preds)
16538 = distance_non_agu_define_in_bb (regno1, regno2,
16544 if (shortest_dist < 0)
16545 shortest_dist = bb_dist;
16546 else if (bb_dist > 0)
16547 shortest_dist = MIN (bb_dist, shortest_dist);
16553 distance = shortest_dist;
16557 /* get_attr_type may modify recog data. We want to make sure
16558 that recog data is valid for instruction INSN, on which
16559 distance_non_agu_define is called. INSN is unchanged here. */
16560 extract_insn_cached (insn);
16565 return distance >> 1;
16568 /* Return the distance in half-cycles between INSN and the next
16569 insn that uses register number REGNO in memory address added
16570 to DISTANCE. Return -1 if REGNO0 is set.
16572 Put true value into *FOUND if register usage was found and
16574 Put true value into *REDEFINED if register redefinition was
16575 found and false otherwise. */
16578 distance_agu_use_in_bb (unsigned int regno,
16579 rtx insn, int distance, rtx start,
16580 bool *found, bool *redefined)
16582 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
16587 *redefined = false;
16591 && distance < LEA_SEARCH_THRESHOLD)
16593 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
16595 distance = increase_distance(prev, next, distance);
16596 if (insn_uses_reg_mem (regno, next))
16598 /* Return DISTANCE if OP0 is used in memory
16599 address in NEXT. */
16604 if (insn_defines_reg (regno, INVALID_REGNUM, next))
16606 /* Return -1 if OP0 is set in NEXT. */
16614 if (next == BB_END (bb))
16617 next = NEXT_INSN (next);
16623 /* Return the distance between INSN and the next insn that uses
16624 register number REGNO0 in memory address. Return -1 if no such
16625 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16628 distance_agu_use (unsigned int regno0, rtx insn)
16630 basic_block bb = BLOCK_FOR_INSN (insn);
16632 bool found = false;
16633 bool redefined = false;
16635 if (insn != BB_END (bb))
16636 distance = distance_agu_use_in_bb (regno0, insn, distance,
16638 &found, &redefined);
16640 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
16644 bool simple_loop = false;
16646 FOR_EACH_EDGE (e, ei, bb->succs)
16649 simple_loop = true;
16654 distance = distance_agu_use_in_bb (regno0, insn,
16655 distance, BB_HEAD (bb),
16656 &found, &redefined);
16659 int shortest_dist = -1;
16660 bool found_in_bb = false;
16661 bool redefined_in_bb = false;
16663 FOR_EACH_EDGE (e, ei, bb->succs)
16666 = distance_agu_use_in_bb (regno0, insn,
16667 distance, BB_HEAD (e->dest),
16668 &found_in_bb, &redefined_in_bb);
16671 if (shortest_dist < 0)
16672 shortest_dist = bb_dist;
16673 else if (bb_dist > 0)
16674 shortest_dist = MIN (bb_dist, shortest_dist);
16680 distance = shortest_dist;
16684 if (!found || redefined)
16687 return distance >> 1;
16690 /* Define this macro to tune LEA priority vs ADD, it take effect when
16691 there is a dilemma of choicing LEA or ADD
16692 Negative value: ADD is more preferred than LEA
16694 Positive value: LEA is more preferred than ADD*/
16695 #define IX86_LEA_PRIORITY 0
16697 /* Return true if usage of lea INSN has performance advantage
16698 over a sequence of instructions. Instructions sequence has
16699 SPLIT_COST cycles higher latency than lea latency. */
16702 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
16703 unsigned int regno2, unsigned int split_cost)
16705 int dist_define, dist_use;
16707 dist_define = distance_non_agu_define (regno1, regno2, insn);
16708 dist_use = distance_agu_use (regno0, insn);
16710 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
16712 /* If there is no non AGU operand definition, no AGU
16713 operand usage and split cost is 0 then both lea
16714 and non lea variants have same priority. Currently
16715 we prefer lea for 64 bit code and non lea on 32 bit
16717 if (dist_use < 0 && split_cost == 0)
16718 return TARGET_64BIT || IX86_LEA_PRIORITY;
16723 /* With longer definitions distance lea is more preferable.
16724 Here we change it to take into account splitting cost and
16726 dist_define += split_cost + IX86_LEA_PRIORITY;
16728 /* If there is no use in memory addess then we just check
16729 that split cost does not exceed AGU stall. */
16731 return dist_define >= LEA_MAX_STALL;
16733 /* If this insn has both backward non-agu dependence and forward
16734 agu dependence, the one with short distance takes effect. */
16735 return dist_define >= dist_use;
16738 /* Return true if it is legal to clobber flags by INSN and
16739 false otherwise. */
16742 ix86_ok_to_clobber_flags (rtx insn)
16744 basic_block bb = BLOCK_FOR_INSN (insn);
16750 if (NONDEBUG_INSN_P (insn))
16752 for (use = DF_INSN_USES (insn); *use; use++)
16753 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
16756 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
16760 if (insn == BB_END (bb))
16763 insn = NEXT_INSN (insn);
16766 live = df_get_live_out(bb);
16767 return !REGNO_REG_SET_P (live, FLAGS_REG);
16770 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16771 move and add to avoid AGU stalls. */
16774 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
16776 unsigned int regno0 = true_regnum (operands[0]);
16777 unsigned int regno1 = true_regnum (operands[1]);
16778 unsigned int regno2 = true_regnum (operands[2]);
16780 /* Check if we need to optimize. */
16781 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16784 /* Check it is correct to split here. */
16785 if (!ix86_ok_to_clobber_flags(insn))
16788 /* We need to split only adds with non destructive
16789 destination operand. */
16790 if (regno0 == regno1 || regno0 == regno2)
16793 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
16796 /* Return true if we should emit lea instruction instead of mov
16800 ix86_use_lea_for_mov (rtx insn, rtx operands[])
16802 unsigned int regno0;
16803 unsigned int regno1;
16805 /* Check if we need to optimize. */
16806 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16809 /* Use lea for reg to reg moves only. */
16810 if (!REG_P (operands[0]) || !REG_P (operands[1]))
16813 regno0 = true_regnum (operands[0]);
16814 regno1 = true_regnum (operands[1]);
16816 return ix86_lea_outperforms (insn, regno0, regno1, -1, 0);
16819 /* Return true if we need to split lea into a sequence of
16820 instructions to avoid AGU stalls. */
16823 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
16825 unsigned int regno0 = true_regnum (operands[0]) ;
16826 unsigned int regno1 = -1;
16827 unsigned int regno2 = -1;
16828 unsigned int split_cost = 0;
16829 struct ix86_address parts;
16832 /* Check we need to optimize. */
16833 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16836 /* Check it is correct to split here. */
16837 if (!ix86_ok_to_clobber_flags(insn))
16840 ok = ix86_decompose_address (operands[1], &parts);
16843 /* We should not split into add if non legitimate pic
16844 operand is used as displacement. */
16845 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
16849 regno1 = true_regnum (parts.base);
16851 regno2 = true_regnum (parts.index);
16853 /* Compute how many cycles we will add to execution time
16854 if split lea into a sequence of instructions. */
16855 if (parts.base || parts.index)
16857 /* Have to use mov instruction if non desctructive
16858 destination form is used. */
16859 if (regno1 != regno0 && regno2 != regno0)
16862 /* Have to add index to base if both exist. */
16863 if (parts.base && parts.index)
16866 /* Have to use shift and adds if scale is 2 or greater. */
16867 if (parts.scale > 1)
16869 if (regno0 != regno1)
16871 else if (regno2 == regno0)
16874 split_cost += parts.scale;
16877 /* Have to use add instruction with immediate if
16878 disp is non zero. */
16879 if (parts.disp && parts.disp != const0_rtx)
16882 /* Subtract the price of lea. */
16886 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
16889 /* Emit x86 binary operand CODE in mode MODE, where the first operand
16890 matches destination. RTX includes clobber of FLAGS_REG. */
16893 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
16898 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
16899 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16901 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16904 /* Split lea instructions into a sequence of instructions
16905 which are executed on ALU to avoid AGU stalls.
16906 It is assumed that it is allowed to clobber flags register
16907 at lea position. */
16910 ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
16912 unsigned int regno0 = true_regnum (operands[0]) ;
16913 unsigned int regno1 = INVALID_REGNUM;
16914 unsigned int regno2 = INVALID_REGNUM;
16915 struct ix86_address parts;
16919 ok = ix86_decompose_address (operands[1], &parts);
16924 if (GET_MODE (parts.base) != mode)
16925 parts.base = gen_rtx_SUBREG (mode, parts.base, 0);
16926 regno1 = true_regnum (parts.base);
16931 if (GET_MODE (parts.index) != mode)
16932 parts.index = gen_rtx_SUBREG (mode, parts.index, 0);
16933 regno2 = true_regnum (parts.index);
16936 if (parts.scale > 1)
16938 /* Case r1 = r1 + ... */
16939 if (regno1 == regno0)
16941 /* If we have a case r1 = r1 + C * r1 then we
16942 should use multiplication which is very
16943 expensive. Assume cost model is wrong if we
16944 have such case here. */
16945 gcc_assert (regno2 != regno0);
16947 for (adds = parts.scale; adds > 0; adds--)
16948 ix86_emit_binop (PLUS, mode, operands[0], parts.index);
16952 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
16953 if (regno0 != regno2)
16954 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
16956 /* Use shift for scaling. */
16957 ix86_emit_binop (ASHIFT, mode, operands[0],
16958 GEN_INT (exact_log2 (parts.scale)));
16961 ix86_emit_binop (PLUS, mode, operands[0], parts.base);
16963 if (parts.disp && parts.disp != const0_rtx)
16964 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
16967 else if (!parts.base && !parts.index)
16969 gcc_assert(parts.disp);
16970 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.disp));
16976 if (regno0 != regno2)
16977 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
16979 else if (!parts.index)
16981 if (regno0 != regno1)
16982 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
16986 if (regno0 == regno1)
16988 else if (regno0 == regno2)
16992 emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
16996 ix86_emit_binop (PLUS, mode, operands[0], tmp);
16999 if (parts.disp && parts.disp != const0_rtx)
17000 ix86_emit_binop (PLUS, mode, operands[0], parts.disp);
17004 /* Return true if it is ok to optimize an ADD operation to LEA
17005 operation to avoid flag register consumation. For most processors,
17006 ADD is faster than LEA. For the processors like ATOM, if the
17007 destination register of LEA holds an actual address which will be
17008 used soon, LEA is better and otherwise ADD is better. */
17011 ix86_lea_for_add_ok (rtx insn, rtx operands[])
17013 unsigned int regno0 = true_regnum (operands[0]);
17014 unsigned int regno1 = true_regnum (operands[1]);
17015 unsigned int regno2 = true_regnum (operands[2]);
17017 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17018 if (regno0 != regno1 && regno0 != regno2)
17021 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
17024 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
17027 /* Return true if destination reg of SET_BODY is shift count of
17031 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
17037 /* Retrieve destination of SET_BODY. */
17038 switch (GET_CODE (set_body))
17041 set_dest = SET_DEST (set_body);
17042 if (!set_dest || !REG_P (set_dest))
17046 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
17047 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
17055 /* Retrieve shift count of USE_BODY. */
17056 switch (GET_CODE (use_body))
17059 shift_rtx = XEXP (use_body, 1);
17062 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
17063 if (ix86_dep_by_shift_count_body (set_body,
17064 XVECEXP (use_body, 0, i)))
17072 && (GET_CODE (shift_rtx) == ASHIFT
17073 || GET_CODE (shift_rtx) == LSHIFTRT
17074 || GET_CODE (shift_rtx) == ASHIFTRT
17075 || GET_CODE (shift_rtx) == ROTATE
17076 || GET_CODE (shift_rtx) == ROTATERT))
17078 rtx shift_count = XEXP (shift_rtx, 1);
17080 /* Return true if shift count is dest of SET_BODY. */
17081 if (REG_P (shift_count)
17082 && true_regnum (set_dest) == true_regnum (shift_count))
17089 /* Return true if destination reg of SET_INSN is shift count of
17093 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
17095 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
17096 PATTERN (use_insn));
17099 /* Return TRUE or FALSE depending on whether the unary operator meets the
17100 appropriate constraints. */
17103 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
17104 enum machine_mode mode ATTRIBUTE_UNUSED,
17105 rtx operands[2] ATTRIBUTE_UNUSED)
17107 /* If one of operands is memory, source and destination must match. */
17108 if ((MEM_P (operands[0])
17109 || MEM_P (operands[1]))
17110 && ! rtx_equal_p (operands[0], operands[1]))
17115 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17116 are ok, keeping in mind the possible movddup alternative. */
17119 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
17121 if (MEM_P (operands[0]))
17122 return rtx_equal_p (operands[0], operands[1 + high]);
17123 if (MEM_P (operands[1]) && MEM_P (operands[2]))
17124 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
17128 /* Post-reload splitter for converting an SF or DFmode value in an
17129 SSE register into an unsigned SImode. */
17132 ix86_split_convert_uns_si_sse (rtx operands[])
17134 enum machine_mode vecmode;
17135 rtx value, large, zero_or_two31, input, two31, x;
17137 large = operands[1];
17138 zero_or_two31 = operands[2];
17139 input = operands[3];
17140 two31 = operands[4];
17141 vecmode = GET_MODE (large);
17142 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
17144 /* Load up the value into the low element. We must ensure that the other
17145 elements are valid floats -- zero is the easiest such value. */
17148 if (vecmode == V4SFmode)
17149 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
17151 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
17155 input = gen_rtx_REG (vecmode, REGNO (input));
17156 emit_move_insn (value, CONST0_RTX (vecmode));
17157 if (vecmode == V4SFmode)
17158 emit_insn (gen_sse_movss (value, value, input));
17160 emit_insn (gen_sse2_movsd (value, value, input));
17163 emit_move_insn (large, two31);
17164 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
17166 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
17167 emit_insn (gen_rtx_SET (VOIDmode, large, x));
17169 x = gen_rtx_AND (vecmode, zero_or_two31, large);
17170 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
17172 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
17173 emit_insn (gen_rtx_SET (VOIDmode, value, x));
17175 large = gen_rtx_REG (V4SImode, REGNO (large));
17176 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
17178 x = gen_rtx_REG (V4SImode, REGNO (value));
17179 if (vecmode == V4SFmode)
17180 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
17182 emit_insn (gen_sse2_cvttpd2dq (x, value));
17185 emit_insn (gen_xorv4si3 (value, value, large));
17188 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17189 Expects the 64-bit DImode to be supplied in a pair of integral
17190 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17191 -mfpmath=sse, !optimize_size only. */
17194 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
17196 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
17197 rtx int_xmm, fp_xmm;
17198 rtx biases, exponents;
17201 int_xmm = gen_reg_rtx (V4SImode);
17202 if (TARGET_INTER_UNIT_MOVES)
17203 emit_insn (gen_movdi_to_sse (int_xmm, input));
17204 else if (TARGET_SSE_SPLIT_REGS)
17206 emit_clobber (int_xmm);
17207 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
17211 x = gen_reg_rtx (V2DImode);
17212 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
17213 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
17216 x = gen_rtx_CONST_VECTOR (V4SImode,
17217 gen_rtvec (4, GEN_INT (0x43300000UL),
17218 GEN_INT (0x45300000UL),
17219 const0_rtx, const0_rtx));
17220 exponents = validize_mem (force_const_mem (V4SImode, x));
17222 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17223 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
17225 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17226 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17227 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17228 (0x1.0p84 + double(fp_value_hi_xmm)).
17229 Note these exponents differ by 32. */
17231 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
17233 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17234 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17235 real_ldexp (&bias_lo_rvt, &dconst1, 52);
17236 real_ldexp (&bias_hi_rvt, &dconst1, 84);
17237 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
17238 x = const_double_from_real_value (bias_hi_rvt, DFmode);
17239 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
17240 biases = validize_mem (force_const_mem (V2DFmode, biases));
17241 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
17243 /* Add the upper and lower DFmode values together. */
17245 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
17248 x = copy_to_mode_reg (V2DFmode, fp_xmm);
17249 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
17250 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
17253 ix86_expand_vector_extract (false, target, fp_xmm, 0);
17256 /* Not used, but eases macroization of patterns. */
17258 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
17259 rtx input ATTRIBUTE_UNUSED)
17261 gcc_unreachable ();
17264 /* Convert an unsigned SImode value into a DFmode. Only currently used
17265 for SSE, but applicable anywhere. */
17268 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
17270 REAL_VALUE_TYPE TWO31r;
17273 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
17274 NULL, 1, OPTAB_DIRECT);
17276 fp = gen_reg_rtx (DFmode);
17277 emit_insn (gen_floatsidf2 (fp, x));
17279 real_ldexp (&TWO31r, &dconst1, 31);
17280 x = const_double_from_real_value (TWO31r, DFmode);
17282 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
17284 emit_move_insn (target, x);
17287 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17288 32-bit mode; otherwise we have a direct convert instruction. */
17291 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
17293 REAL_VALUE_TYPE TWO32r;
17294 rtx fp_lo, fp_hi, x;
17296 fp_lo = gen_reg_rtx (DFmode);
17297 fp_hi = gen_reg_rtx (DFmode);
17299 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
17301 real_ldexp (&TWO32r, &dconst1, 32);
17302 x = const_double_from_real_value (TWO32r, DFmode);
17303 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
17305 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
17307 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
17310 emit_move_insn (target, x);
17313 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17314 For x86_32, -mfpmath=sse, !optimize_size only. */
17316 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
17318 REAL_VALUE_TYPE ONE16r;
17319 rtx fp_hi, fp_lo, int_hi, int_lo, x;
17321 real_ldexp (&ONE16r, &dconst1, 16);
17322 x = const_double_from_real_value (ONE16r, SFmode);
17323 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
17324 NULL, 0, OPTAB_DIRECT);
17325 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
17326 NULL, 0, OPTAB_DIRECT);
17327 fp_hi = gen_reg_rtx (SFmode);
17328 fp_lo = gen_reg_rtx (SFmode);
17329 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
17330 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
17331 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
17333 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
17335 if (!rtx_equal_p (target, fp_hi))
17336 emit_move_insn (target, fp_hi);
17339 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17340 a vector of unsigned ints VAL to vector of floats TARGET. */
17343 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
17346 REAL_VALUE_TYPE TWO16r;
17347 enum machine_mode intmode = GET_MODE (val);
17348 enum machine_mode fltmode = GET_MODE (target);
17349 rtx (*cvt) (rtx, rtx);
17351 if (intmode == V4SImode)
17352 cvt = gen_floatv4siv4sf2;
17354 cvt = gen_floatv8siv8sf2;
17355 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
17356 tmp[0] = force_reg (intmode, tmp[0]);
17357 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
17359 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
17360 NULL_RTX, 1, OPTAB_DIRECT);
17361 tmp[3] = gen_reg_rtx (fltmode);
17362 emit_insn (cvt (tmp[3], tmp[1]));
17363 tmp[4] = gen_reg_rtx (fltmode);
17364 emit_insn (cvt (tmp[4], tmp[2]));
17365 real_ldexp (&TWO16r, &dconst1, 16);
17366 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
17367 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
17368 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
17370 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
17372 if (tmp[7] != target)
17373 emit_move_insn (target, tmp[7]);
17376 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17377 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17378 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17379 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17382 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
17384 REAL_VALUE_TYPE TWO31r;
17385 rtx two31r, tmp[4];
17386 enum machine_mode mode = GET_MODE (val);
17387 enum machine_mode scalarmode = GET_MODE_INNER (mode);
17388 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
17389 rtx (*cmp) (rtx, rtx, rtx, rtx);
17392 for (i = 0; i < 3; i++)
17393 tmp[i] = gen_reg_rtx (mode);
17394 real_ldexp (&TWO31r, &dconst1, 31);
17395 two31r = const_double_from_real_value (TWO31r, scalarmode);
17396 two31r = ix86_build_const_vector (mode, 1, two31r);
17397 two31r = force_reg (mode, two31r);
17400 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
17401 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
17402 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
17403 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
17404 default: gcc_unreachable ();
17406 tmp[3] = gen_rtx_LE (mode, two31r, val);
17407 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
17408 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
17410 if (intmode == V4SImode || TARGET_AVX2)
17411 *xorp = expand_simple_binop (intmode, ASHIFT,
17412 gen_lowpart (intmode, tmp[0]),
17413 GEN_INT (31), NULL_RTX, 0,
17417 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
17418 two31 = ix86_build_const_vector (intmode, 1, two31);
17419 *xorp = expand_simple_binop (intmode, AND,
17420 gen_lowpart (intmode, tmp[0]),
17421 two31, NULL_RTX, 0,
17424 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
17428 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17429 then replicate the value for all elements of the vector
17433 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
17437 enum machine_mode scalar_mode;
17454 n_elt = GET_MODE_NUNITS (mode);
17455 v = rtvec_alloc (n_elt);
17456 scalar_mode = GET_MODE_INNER (mode);
17458 RTVEC_ELT (v, 0) = value;
17460 for (i = 1; i < n_elt; ++i)
17461 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
17463 return gen_rtx_CONST_VECTOR (mode, v);
17466 gcc_unreachable ();
17470 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17471 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17472 for an SSE register. If VECT is true, then replicate the mask for
17473 all elements of the vector register. If INVERT is true, then create
17474 a mask excluding the sign bit. */
17477 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
17479 enum machine_mode vec_mode, imode;
17480 HOST_WIDE_INT hi, lo;
17485 /* Find the sign bit, sign extended to 2*HWI. */
17493 mode = GET_MODE_INNER (mode);
17495 lo = 0x80000000, hi = lo < 0;
17503 mode = GET_MODE_INNER (mode);
17505 if (HOST_BITS_PER_WIDE_INT >= 64)
17506 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
17508 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17513 vec_mode = VOIDmode;
17514 if (HOST_BITS_PER_WIDE_INT >= 64)
17517 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
17524 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17528 lo = ~lo, hi = ~hi;
17534 mask = immed_double_const (lo, hi, imode);
17536 vec = gen_rtvec (2, v, mask);
17537 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
17538 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
17545 gcc_unreachable ();
17549 lo = ~lo, hi = ~hi;
17551 /* Force this value into the low part of a fp vector constant. */
17552 mask = immed_double_const (lo, hi, imode);
17553 mask = gen_lowpart (mode, mask);
17555 if (vec_mode == VOIDmode)
17556 return force_reg (mode, mask);
17558 v = ix86_build_const_vector (vec_mode, vect, mask);
17559 return force_reg (vec_mode, v);
17562 /* Generate code for floating point ABS or NEG. */
17565 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
17568 rtx mask, set, dst, src;
17569 bool use_sse = false;
17570 bool vector_mode = VECTOR_MODE_P (mode);
17571 enum machine_mode vmode = mode;
17575 else if (mode == TFmode)
17577 else if (TARGET_SSE_MATH)
17579 use_sse = SSE_FLOAT_MODE_P (mode);
17580 if (mode == SFmode)
17582 else if (mode == DFmode)
17586 /* NEG and ABS performed with SSE use bitwise mask operations.
17587 Create the appropriate mask now. */
17589 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
17596 set = gen_rtx_fmt_e (code, mode, src);
17597 set = gen_rtx_SET (VOIDmode, dst, set);
17604 use = gen_rtx_USE (VOIDmode, mask);
17606 par = gen_rtvec (2, set, use);
17609 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17610 par = gen_rtvec (3, set, use, clob);
17612 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
17618 /* Expand a copysign operation. Special case operand 0 being a constant. */
17621 ix86_expand_copysign (rtx operands[])
17623 enum machine_mode mode, vmode;
17624 rtx dest, op0, op1, mask, nmask;
17626 dest = operands[0];
17630 mode = GET_MODE (dest);
17632 if (mode == SFmode)
17634 else if (mode == DFmode)
17639 if (GET_CODE (op0) == CONST_DOUBLE)
17641 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
17643 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
17644 op0 = simplify_unary_operation (ABS, mode, op0, mode);
17646 if (mode == SFmode || mode == DFmode)
17648 if (op0 == CONST0_RTX (mode))
17649 op0 = CONST0_RTX (vmode);
17652 rtx v = ix86_build_const_vector (vmode, false, op0);
17654 op0 = force_reg (vmode, v);
17657 else if (op0 != CONST0_RTX (mode))
17658 op0 = force_reg (mode, op0);
17660 mask = ix86_build_signbit_mask (vmode, 0, 0);
17662 if (mode == SFmode)
17663 copysign_insn = gen_copysignsf3_const;
17664 else if (mode == DFmode)
17665 copysign_insn = gen_copysigndf3_const;
17667 copysign_insn = gen_copysigntf3_const;
17669 emit_insn (copysign_insn (dest, op0, op1, mask));
17673 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17675 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17676 mask = ix86_build_signbit_mask (vmode, 0, 0);
17678 if (mode == SFmode)
17679 copysign_insn = gen_copysignsf3_var;
17680 else if (mode == DFmode)
17681 copysign_insn = gen_copysigndf3_var;
17683 copysign_insn = gen_copysigntf3_var;
17685 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17689 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17690 be a constant, and so has already been expanded into a vector constant. */
17693 ix86_split_copysign_const (rtx operands[])
17695 enum machine_mode mode, vmode;
17696 rtx dest, op0, mask, x;
17698 dest = operands[0];
17700 mask = operands[3];
17702 mode = GET_MODE (dest);
17703 vmode = GET_MODE (mask);
17705 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17706 x = gen_rtx_AND (vmode, dest, mask);
17707 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17709 if (op0 != CONST0_RTX (vmode))
17711 x = gen_rtx_IOR (vmode, dest, op0);
17712 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17716 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17717 so we have to do two masks. */
17720 ix86_split_copysign_var (rtx operands[])
17722 enum machine_mode mode, vmode;
17723 rtx dest, scratch, op0, op1, mask, nmask, x;
17725 dest = operands[0];
17726 scratch = operands[1];
17729 nmask = operands[4];
17730 mask = operands[5];
17732 mode = GET_MODE (dest);
17733 vmode = GET_MODE (mask);
17735 if (rtx_equal_p (op0, op1))
17737 /* Shouldn't happen often (it's useless, obviously), but when it does
17738 we'd generate incorrect code if we continue below. */
17739 emit_move_insn (dest, op0);
17743 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17745 gcc_assert (REGNO (op1) == REGNO (scratch));
17747 x = gen_rtx_AND (vmode, scratch, mask);
17748 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17751 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17752 x = gen_rtx_NOT (vmode, dest);
17753 x = gen_rtx_AND (vmode, x, op0);
17754 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17758 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17760 x = gen_rtx_AND (vmode, scratch, mask);
17762 else /* alternative 2,4 */
17764 gcc_assert (REGNO (mask) == REGNO (scratch));
17765 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17766 x = gen_rtx_AND (vmode, scratch, op1);
17768 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17770 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17772 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17773 x = gen_rtx_AND (vmode, dest, nmask);
17775 else /* alternative 3,4 */
17777 gcc_assert (REGNO (nmask) == REGNO (dest));
17779 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17780 x = gen_rtx_AND (vmode, dest, op0);
17782 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17785 x = gen_rtx_IOR (vmode, dest, scratch);
17786 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17789 /* Return TRUE or FALSE depending on whether the first SET in INSN
17790 has source and destination with matching CC modes, and that the
17791 CC mode is at least as constrained as REQ_MODE. */
17794 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17797 enum machine_mode set_mode;
17799 set = PATTERN (insn);
17800 if (GET_CODE (set) == PARALLEL)
17801 set = XVECEXP (set, 0, 0);
17802 gcc_assert (GET_CODE (set) == SET);
17803 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17805 set_mode = GET_MODE (SET_DEST (set));
17809 if (req_mode != CCNOmode
17810 && (req_mode != CCmode
17811 || XEXP (SET_SRC (set), 1) != const0_rtx))
17815 if (req_mode == CCGCmode)
17819 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17823 if (req_mode == CCZmode)
17833 if (set_mode != req_mode)
17838 gcc_unreachable ();
17841 return GET_MODE (SET_SRC (set)) == set_mode;
17844 /* Generate insn patterns to do an integer compare of OPERANDS. */
17847 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17849 enum machine_mode cmpmode;
17852 cmpmode = SELECT_CC_MODE (code, op0, op1);
17853 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17855 /* This is very simple, but making the interface the same as in the
17856 FP case makes the rest of the code easier. */
17857 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17858 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17860 /* Return the test that should be put into the flags user, i.e.
17861 the bcc, scc, or cmov instruction. */
17862 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17865 /* Figure out whether to use ordered or unordered fp comparisons.
17866 Return the appropriate mode to use. */
17869 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17871 /* ??? In order to make all comparisons reversible, we do all comparisons
17872 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17873 all forms trapping and nontrapping comparisons, we can make inequality
17874 comparisons trapping again, since it results in better code when using
17875 FCOM based compares. */
17876 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17880 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17882 enum machine_mode mode = GET_MODE (op0);
17884 if (SCALAR_FLOAT_MODE_P (mode))
17886 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17887 return ix86_fp_compare_mode (code);
17892 /* Only zero flag is needed. */
17893 case EQ: /* ZF=0 */
17894 case NE: /* ZF!=0 */
17896 /* Codes needing carry flag. */
17897 case GEU: /* CF=0 */
17898 case LTU: /* CF=1 */
17899 /* Detect overflow checks. They need just the carry flag. */
17900 if (GET_CODE (op0) == PLUS
17901 && rtx_equal_p (op1, XEXP (op0, 0)))
17905 case GTU: /* CF=0 & ZF=0 */
17906 case LEU: /* CF=1 | ZF=1 */
17907 /* Detect overflow checks. They need just the carry flag. */
17908 if (GET_CODE (op0) == MINUS
17909 && rtx_equal_p (op1, XEXP (op0, 0)))
17913 /* Codes possibly doable only with sign flag when
17914 comparing against zero. */
17915 case GE: /* SF=OF or SF=0 */
17916 case LT: /* SF<>OF or SF=1 */
17917 if (op1 == const0_rtx)
17920 /* For other cases Carry flag is not required. */
17922 /* Codes doable only with sign flag when comparing
17923 against zero, but we miss jump instruction for it
17924 so we need to use relational tests against overflow
17925 that thus needs to be zero. */
17926 case GT: /* ZF=0 & SF=OF */
17927 case LE: /* ZF=1 | SF<>OF */
17928 if (op1 == const0_rtx)
17932 /* strcmp pattern do (use flags) and combine may ask us for proper
17937 gcc_unreachable ();
17941 /* Return the fixed registers used for condition codes. */
17944 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17951 /* If two condition code modes are compatible, return a condition code
17952 mode which is compatible with both. Otherwise, return
17955 static enum machine_mode
17956 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17961 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17964 if ((m1 == CCGCmode && m2 == CCGOCmode)
17965 || (m1 == CCGOCmode && m2 == CCGCmode))
17971 gcc_unreachable ();
18001 /* These are only compatible with themselves, which we already
18008 /* Return a comparison we can do and that it is equivalent to
18009 swap_condition (code) apart possibly from orderedness.
18010 But, never change orderedness if TARGET_IEEE_FP, returning
18011 UNKNOWN in that case if necessary. */
18013 static enum rtx_code
18014 ix86_fp_swap_condition (enum rtx_code code)
18018 case GT: /* GTU - CF=0 & ZF=0 */
18019 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
18020 case GE: /* GEU - CF=0 */
18021 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
18022 case UNLT: /* LTU - CF=1 */
18023 return TARGET_IEEE_FP ? UNKNOWN : GT;
18024 case UNLE: /* LEU - CF=1 | ZF=1 */
18025 return TARGET_IEEE_FP ? UNKNOWN : GE;
18027 return swap_condition (code);
18031 /* Return cost of comparison CODE using the best strategy for performance.
18032 All following functions do use number of instructions as a cost metrics.
18033 In future this should be tweaked to compute bytes for optimize_size and
18034 take into account performance of various instructions on various CPUs. */
18037 ix86_fp_comparison_cost (enum rtx_code code)
18041 /* The cost of code using bit-twiddling on %ah. */
18058 arith_cost = TARGET_IEEE_FP ? 5 : 4;
18062 arith_cost = TARGET_IEEE_FP ? 6 : 4;
18065 gcc_unreachable ();
18068 switch (ix86_fp_comparison_strategy (code))
18070 case IX86_FPCMP_COMI:
18071 return arith_cost > 4 ? 3 : 2;
18072 case IX86_FPCMP_SAHF:
18073 return arith_cost > 4 ? 4 : 3;
18079 /* Return strategy to use for floating-point. We assume that fcomi is always
18080 preferrable where available, since that is also true when looking at size
18081 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18083 enum ix86_fpcmp_strategy
18084 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
18086 /* Do fcomi/sahf based test when profitable. */
18089 return IX86_FPCMP_COMI;
18091 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
18092 return IX86_FPCMP_SAHF;
18094 return IX86_FPCMP_ARITH;
18097 /* Swap, force into registers, or otherwise massage the two operands
18098 to a fp comparison. The operands are updated in place; the new
18099 comparison code is returned. */
18101 static enum rtx_code
18102 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
18104 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
18105 rtx op0 = *pop0, op1 = *pop1;
18106 enum machine_mode op_mode = GET_MODE (op0);
18107 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
18109 /* All of the unordered compare instructions only work on registers.
18110 The same is true of the fcomi compare instructions. The XFmode
18111 compare instructions require registers except when comparing
18112 against zero or when converting operand 1 from fixed point to
18116 && (fpcmp_mode == CCFPUmode
18117 || (op_mode == XFmode
18118 && ! (standard_80387_constant_p (op0) == 1
18119 || standard_80387_constant_p (op1) == 1)
18120 && GET_CODE (op1) != FLOAT)
18121 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
18123 op0 = force_reg (op_mode, op0);
18124 op1 = force_reg (op_mode, op1);
18128 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18129 things around if they appear profitable, otherwise force op0
18130 into a register. */
18132 if (standard_80387_constant_p (op0) == 0
18134 && ! (standard_80387_constant_p (op1) == 0
18137 enum rtx_code new_code = ix86_fp_swap_condition (code);
18138 if (new_code != UNKNOWN)
18141 tmp = op0, op0 = op1, op1 = tmp;
18147 op0 = force_reg (op_mode, op0);
18149 if (CONSTANT_P (op1))
18151 int tmp = standard_80387_constant_p (op1);
18153 op1 = validize_mem (force_const_mem (op_mode, op1));
18157 op1 = force_reg (op_mode, op1);
18160 op1 = force_reg (op_mode, op1);
18164 /* Try to rearrange the comparison to make it cheaper. */
18165 if (ix86_fp_comparison_cost (code)
18166 > ix86_fp_comparison_cost (swap_condition (code))
18167 && (REG_P (op1) || can_create_pseudo_p ()))
18170 tmp = op0, op0 = op1, op1 = tmp;
18171 code = swap_condition (code);
18173 op0 = force_reg (op_mode, op0);
18181 /* Convert comparison codes we use to represent FP comparison to integer
18182 code that will result in proper branch. Return UNKNOWN if no such code
18186 ix86_fp_compare_code_to_integer (enum rtx_code code)
18215 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18218 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
18220 enum machine_mode fpcmp_mode, intcmp_mode;
18223 fpcmp_mode = ix86_fp_compare_mode (code);
18224 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
18226 /* Do fcomi/sahf based test when profitable. */
18227 switch (ix86_fp_comparison_strategy (code))
18229 case IX86_FPCMP_COMI:
18230 intcmp_mode = fpcmp_mode;
18231 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18232 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
18237 case IX86_FPCMP_SAHF:
18238 intcmp_mode = fpcmp_mode;
18239 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18240 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
18244 scratch = gen_reg_rtx (HImode);
18245 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
18246 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
18249 case IX86_FPCMP_ARITH:
18250 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18251 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18252 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
18254 scratch = gen_reg_rtx (HImode);
18255 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
18257 /* In the unordered case, we have to check C2 for NaN's, which
18258 doesn't happen to work out to anything nice combination-wise.
18259 So do some bit twiddling on the value we've got in AH to come
18260 up with an appropriate set of condition codes. */
18262 intcmp_mode = CCNOmode;
18267 if (code == GT || !TARGET_IEEE_FP)
18269 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
18274 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18275 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
18276 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
18277 intcmp_mode = CCmode;
18283 if (code == LT && TARGET_IEEE_FP)
18285 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18286 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
18287 intcmp_mode = CCmode;
18292 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
18298 if (code == GE || !TARGET_IEEE_FP)
18300 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
18305 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18306 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
18312 if (code == LE && TARGET_IEEE_FP)
18314 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18315 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
18316 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
18317 intcmp_mode = CCmode;
18322 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
18328 if (code == EQ && TARGET_IEEE_FP)
18330 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18331 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
18332 intcmp_mode = CCmode;
18337 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
18343 if (code == NE && TARGET_IEEE_FP)
18345 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
18346 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
18352 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
18358 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
18362 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
18367 gcc_unreachable ();
18375 /* Return the test that should be put into the flags user, i.e.
18376 the bcc, scc, or cmov instruction. */
18377 return gen_rtx_fmt_ee (code, VOIDmode,
18378 gen_rtx_REG (intcmp_mode, FLAGS_REG),
18383 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
18387 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
18388 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
18390 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
18392 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
18393 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18396 ret = ix86_expand_int_compare (code, op0, op1);
18402 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
18404 enum machine_mode mode = GET_MODE (op0);
18416 tmp = ix86_expand_compare (code, op0, op1);
18417 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18418 gen_rtx_LABEL_REF (VOIDmode, label),
18420 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18427 /* Expand DImode branch into multiple compare+branch. */
18429 rtx lo[2], hi[2], label2;
18430 enum rtx_code code1, code2, code3;
18431 enum machine_mode submode;
18433 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
18435 tmp = op0, op0 = op1, op1 = tmp;
18436 code = swap_condition (code);
18439 split_double_mode (mode, &op0, 1, lo+0, hi+0);
18440 split_double_mode (mode, &op1, 1, lo+1, hi+1);
18442 submode = mode == DImode ? SImode : DImode;
18444 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18445 avoid two branches. This costs one extra insn, so disable when
18446 optimizing for size. */
18448 if ((code == EQ || code == NE)
18449 && (!optimize_insn_for_size_p ()
18450 || hi[1] == const0_rtx || lo[1] == const0_rtx))
18455 if (hi[1] != const0_rtx)
18456 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
18457 NULL_RTX, 0, OPTAB_WIDEN);
18460 if (lo[1] != const0_rtx)
18461 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
18462 NULL_RTX, 0, OPTAB_WIDEN);
18464 tmp = expand_binop (submode, ior_optab, xor1, xor0,
18465 NULL_RTX, 0, OPTAB_WIDEN);
18467 ix86_expand_branch (code, tmp, const0_rtx, label);
18471 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18472 op1 is a constant and the low word is zero, then we can just
18473 examine the high word. Similarly for low word -1 and
18474 less-or-equal-than or greater-than. */
18476 if (CONST_INT_P (hi[1]))
18479 case LT: case LTU: case GE: case GEU:
18480 if (lo[1] == const0_rtx)
18482 ix86_expand_branch (code, hi[0], hi[1], label);
18486 case LE: case LEU: case GT: case GTU:
18487 if (lo[1] == constm1_rtx)
18489 ix86_expand_branch (code, hi[0], hi[1], label);
18497 /* Otherwise, we need two or three jumps. */
18499 label2 = gen_label_rtx ();
18502 code2 = swap_condition (code);
18503 code3 = unsigned_condition (code);
18507 case LT: case GT: case LTU: case GTU:
18510 case LE: code1 = LT; code2 = GT; break;
18511 case GE: code1 = GT; code2 = LT; break;
18512 case LEU: code1 = LTU; code2 = GTU; break;
18513 case GEU: code1 = GTU; code2 = LTU; break;
18515 case EQ: code1 = UNKNOWN; code2 = NE; break;
18516 case NE: code2 = UNKNOWN; break;
18519 gcc_unreachable ();
18524 * if (hi(a) < hi(b)) goto true;
18525 * if (hi(a) > hi(b)) goto false;
18526 * if (lo(a) < lo(b)) goto true;
18530 if (code1 != UNKNOWN)
18531 ix86_expand_branch (code1, hi[0], hi[1], label);
18532 if (code2 != UNKNOWN)
18533 ix86_expand_branch (code2, hi[0], hi[1], label2);
18535 ix86_expand_branch (code3, lo[0], lo[1], label);
18537 if (code2 != UNKNOWN)
18538 emit_label (label2);
18543 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
18548 /* Split branch based on floating point condition. */
18550 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
18551 rtx target1, rtx target2, rtx tmp, rtx pushed)
18556 if (target2 != pc_rtx)
18559 code = reverse_condition_maybe_unordered (code);
18564 condition = ix86_expand_fp_compare (code, op1, op2,
18567 /* Remove pushed operand from stack. */
18569 ix86_free_from_memory (GET_MODE (pushed));
18571 i = emit_jump_insn (gen_rtx_SET
18573 gen_rtx_IF_THEN_ELSE (VOIDmode,
18574 condition, target1, target2)));
18575 if (split_branch_probability >= 0)
18576 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
18580 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
18584 gcc_assert (GET_MODE (dest) == QImode);
18586 ret = ix86_expand_compare (code, op0, op1);
18587 PUT_MODE (ret, QImode);
18588 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
18591 /* Expand comparison setting or clearing carry flag. Return true when
18592 successful and set pop for the operation. */
18594 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
18596 enum machine_mode mode =
18597 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
18599 /* Do not handle double-mode compares that go through special path. */
18600 if (mode == (TARGET_64BIT ? TImode : DImode))
18603 if (SCALAR_FLOAT_MODE_P (mode))
18605 rtx compare_op, compare_seq;
18607 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18609 /* Shortcut: following common codes never translate
18610 into carry flag compares. */
18611 if (code == EQ || code == NE || code == UNEQ || code == LTGT
18612 || code == ORDERED || code == UNORDERED)
18615 /* These comparisons require zero flag; swap operands so they won't. */
18616 if ((code == GT || code == UNLE || code == LE || code == UNGT)
18617 && !TARGET_IEEE_FP)
18622 code = swap_condition (code);
18625 /* Try to expand the comparison and verify that we end up with
18626 carry flag based comparison. This fails to be true only when
18627 we decide to expand comparison using arithmetic that is not
18628 too common scenario. */
18630 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18631 compare_seq = get_insns ();
18634 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
18635 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
18636 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
18638 code = GET_CODE (compare_op);
18640 if (code != LTU && code != GEU)
18643 emit_insn (compare_seq);
18648 if (!INTEGRAL_MODE_P (mode))
18657 /* Convert a==0 into (unsigned)a<1. */
18660 if (op1 != const0_rtx)
18663 code = (code == EQ ? LTU : GEU);
18666 /* Convert a>b into b<a or a>=b-1. */
18669 if (CONST_INT_P (op1))
18671 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18672 /* Bail out on overflow. We still can swap operands but that
18673 would force loading of the constant into register. */
18674 if (op1 == const0_rtx
18675 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18677 code = (code == GTU ? GEU : LTU);
18684 code = (code == GTU ? LTU : GEU);
18688 /* Convert a>=0 into (unsigned)a<0x80000000. */
18691 if (mode == DImode || op1 != const0_rtx)
18693 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18694 code = (code == LT ? GEU : LTU);
18698 if (mode == DImode || op1 != constm1_rtx)
18700 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18701 code = (code == LE ? GEU : LTU);
18707 /* Swapping operands may cause constant to appear as first operand. */
18708 if (!nonimmediate_operand (op0, VOIDmode))
18710 if (!can_create_pseudo_p ())
18712 op0 = force_reg (mode, op0);
18714 *pop = ix86_expand_compare (code, op0, op1);
18715 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18720 ix86_expand_int_movcc (rtx operands[])
18722 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18723 rtx compare_seq, compare_op;
18724 enum machine_mode mode = GET_MODE (operands[0]);
18725 bool sign_bit_compare_p = false;
18726 rtx op0 = XEXP (operands[1], 0);
18727 rtx op1 = XEXP (operands[1], 1);
18730 compare_op = ix86_expand_compare (code, op0, op1);
18731 compare_seq = get_insns ();
18734 compare_code = GET_CODE (compare_op);
18736 if ((op1 == const0_rtx && (code == GE || code == LT))
18737 || (op1 == constm1_rtx && (code == GT || code == LE)))
18738 sign_bit_compare_p = true;
18740 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18741 HImode insns, we'd be swallowed in word prefix ops. */
18743 if ((mode != HImode || TARGET_FAST_PREFIX)
18744 && (mode != (TARGET_64BIT ? TImode : DImode))
18745 && CONST_INT_P (operands[2])
18746 && CONST_INT_P (operands[3]))
18748 rtx out = operands[0];
18749 HOST_WIDE_INT ct = INTVAL (operands[2]);
18750 HOST_WIDE_INT cf = INTVAL (operands[3]);
18751 HOST_WIDE_INT diff;
18754 /* Sign bit compares are better done using shifts than we do by using
18756 if (sign_bit_compare_p
18757 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18759 /* Detect overlap between destination and compare sources. */
18762 if (!sign_bit_compare_p)
18765 bool fpcmp = false;
18767 compare_code = GET_CODE (compare_op);
18769 flags = XEXP (compare_op, 0);
18771 if (GET_MODE (flags) == CCFPmode
18772 || GET_MODE (flags) == CCFPUmode)
18776 = ix86_fp_compare_code_to_integer (compare_code);
18779 /* To simplify rest of code, restrict to the GEU case. */
18780 if (compare_code == LTU)
18782 HOST_WIDE_INT tmp = ct;
18785 compare_code = reverse_condition (compare_code);
18786 code = reverse_condition (code);
18791 PUT_CODE (compare_op,
18792 reverse_condition_maybe_unordered
18793 (GET_CODE (compare_op)));
18795 PUT_CODE (compare_op,
18796 reverse_condition (GET_CODE (compare_op)));
18800 if (reg_overlap_mentioned_p (out, op0)
18801 || reg_overlap_mentioned_p (out, op1))
18802 tmp = gen_reg_rtx (mode);
18804 if (mode == DImode)
18805 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18807 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18808 flags, compare_op));
18812 if (code == GT || code == GE)
18813 code = reverse_condition (code);
18816 HOST_WIDE_INT tmp = ct;
18821 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18834 tmp = expand_simple_binop (mode, PLUS,
18836 copy_rtx (tmp), 1, OPTAB_DIRECT);
18847 tmp = expand_simple_binop (mode, IOR,
18849 copy_rtx (tmp), 1, OPTAB_DIRECT);
18851 else if (diff == -1 && ct)
18861 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18863 tmp = expand_simple_binop (mode, PLUS,
18864 copy_rtx (tmp), GEN_INT (cf),
18865 copy_rtx (tmp), 1, OPTAB_DIRECT);
18873 * andl cf - ct, dest
18883 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18886 tmp = expand_simple_binop (mode, AND,
18888 gen_int_mode (cf - ct, mode),
18889 copy_rtx (tmp), 1, OPTAB_DIRECT);
18891 tmp = expand_simple_binop (mode, PLUS,
18892 copy_rtx (tmp), GEN_INT (ct),
18893 copy_rtx (tmp), 1, OPTAB_DIRECT);
18896 if (!rtx_equal_p (tmp, out))
18897 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18904 enum machine_mode cmp_mode = GET_MODE (op0);
18907 tmp = ct, ct = cf, cf = tmp;
18910 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18912 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18914 /* We may be reversing unordered compare to normal compare, that
18915 is not valid in general (we may convert non-trapping condition
18916 to trapping one), however on i386 we currently emit all
18917 comparisons unordered. */
18918 compare_code = reverse_condition_maybe_unordered (compare_code);
18919 code = reverse_condition_maybe_unordered (code);
18923 compare_code = reverse_condition (compare_code);
18924 code = reverse_condition (code);
18928 compare_code = UNKNOWN;
18929 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18930 && CONST_INT_P (op1))
18932 if (op1 == const0_rtx
18933 && (code == LT || code == GE))
18934 compare_code = code;
18935 else if (op1 == constm1_rtx)
18939 else if (code == GT)
18944 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18945 if (compare_code != UNKNOWN
18946 && GET_MODE (op0) == GET_MODE (out)
18947 && (cf == -1 || ct == -1))
18949 /* If lea code below could be used, only optimize
18950 if it results in a 2 insn sequence. */
18952 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18953 || diff == 3 || diff == 5 || diff == 9)
18954 || (compare_code == LT && ct == -1)
18955 || (compare_code == GE && cf == -1))
18958 * notl op1 (if necessary)
18966 code = reverse_condition (code);
18969 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18971 out = expand_simple_binop (mode, IOR,
18973 out, 1, OPTAB_DIRECT);
18974 if (out != operands[0])
18975 emit_move_insn (operands[0], out);
18982 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18983 || diff == 3 || diff == 5 || diff == 9)
18984 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18986 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18992 * lea cf(dest*(ct-cf)),dest
18996 * This also catches the degenerate setcc-only case.
19002 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
19005 /* On x86_64 the lea instruction operates on Pmode, so we need
19006 to get arithmetics done in proper mode to match. */
19008 tmp = copy_rtx (out);
19012 out1 = copy_rtx (out);
19013 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
19017 tmp = gen_rtx_PLUS (mode, tmp, out1);
19023 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
19026 if (!rtx_equal_p (tmp, out))
19029 out = force_operand (tmp, copy_rtx (out));
19031 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
19033 if (!rtx_equal_p (out, operands[0]))
19034 emit_move_insn (operands[0], copy_rtx (out));
19040 * General case: Jumpful:
19041 * xorl dest,dest cmpl op1, op2
19042 * cmpl op1, op2 movl ct, dest
19043 * setcc dest jcc 1f
19044 * decl dest movl cf, dest
19045 * andl (cf-ct),dest 1:
19048 * Size 20. Size 14.
19050 * This is reasonably steep, but branch mispredict costs are
19051 * high on modern cpus, so consider failing only if optimizing
19055 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
19056 && BRANCH_COST (optimize_insn_for_speed_p (),
19061 enum machine_mode cmp_mode = GET_MODE (op0);
19066 if (SCALAR_FLOAT_MODE_P (cmp_mode))
19068 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
19070 /* We may be reversing unordered compare to normal compare,
19071 that is not valid in general (we may convert non-trapping
19072 condition to trapping one), however on i386 we currently
19073 emit all comparisons unordered. */
19074 code = reverse_condition_maybe_unordered (code);
19078 code = reverse_condition (code);
19079 if (compare_code != UNKNOWN)
19080 compare_code = reverse_condition (compare_code);
19084 if (compare_code != UNKNOWN)
19086 /* notl op1 (if needed)
19091 For x < 0 (resp. x <= -1) there will be no notl,
19092 so if possible swap the constants to get rid of the
19094 True/false will be -1/0 while code below (store flag
19095 followed by decrement) is 0/-1, so the constants need
19096 to be exchanged once more. */
19098 if (compare_code == GE || !cf)
19100 code = reverse_condition (code);
19105 HOST_WIDE_INT tmp = cf;
19110 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
19114 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
19116 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
19118 copy_rtx (out), 1, OPTAB_DIRECT);
19121 out = expand_simple_binop (mode, AND, copy_rtx (out),
19122 gen_int_mode (cf - ct, mode),
19123 copy_rtx (out), 1, OPTAB_DIRECT);
19125 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
19126 copy_rtx (out), 1, OPTAB_DIRECT);
19127 if (!rtx_equal_p (out, operands[0]))
19128 emit_move_insn (operands[0], copy_rtx (out));
19134 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
19136 /* Try a few things more with specific constants and a variable. */
19139 rtx var, orig_out, out, tmp;
19141 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19144 /* If one of the two operands is an interesting constant, load a
19145 constant with the above and mask it in with a logical operation. */
19147 if (CONST_INT_P (operands[2]))
19150 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
19151 operands[3] = constm1_rtx, op = and_optab;
19152 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
19153 operands[3] = const0_rtx, op = ior_optab;
19157 else if (CONST_INT_P (operands[3]))
19160 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
19161 operands[2] = constm1_rtx, op = and_optab;
19162 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
19163 operands[2] = const0_rtx, op = ior_optab;
19170 orig_out = operands[0];
19171 tmp = gen_reg_rtx (mode);
19174 /* Recurse to get the constant loaded. */
19175 if (ix86_expand_int_movcc (operands) == 0)
19178 /* Mask in the interesting variable. */
19179 out = expand_binop (mode, op, var, tmp, orig_out, 0,
19181 if (!rtx_equal_p (out, orig_out))
19182 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
19188 * For comparison with above,
19198 if (! nonimmediate_operand (operands[2], mode))
19199 operands[2] = force_reg (mode, operands[2]);
19200 if (! nonimmediate_operand (operands[3], mode))
19201 operands[3] = force_reg (mode, operands[3]);
19203 if (! register_operand (operands[2], VOIDmode)
19205 || ! register_operand (operands[3], VOIDmode)))
19206 operands[2] = force_reg (mode, operands[2]);
19209 && ! register_operand (operands[3], VOIDmode))
19210 operands[3] = force_reg (mode, operands[3]);
19212 emit_insn (compare_seq);
19213 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19214 gen_rtx_IF_THEN_ELSE (mode,
19215 compare_op, operands[2],
19220 /* Swap, force into registers, or otherwise massage the two operands
19221 to an sse comparison with a mask result. Thus we differ a bit from
19222 ix86_prepare_fp_compare_args which expects to produce a flags result.
19224 The DEST operand exists to help determine whether to commute commutative
19225 operators. The POP0/POP1 operands are updated in place. The new
19226 comparison code is returned, or UNKNOWN if not implementable. */
19228 static enum rtx_code
19229 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
19230 rtx *pop0, rtx *pop1)
19238 /* AVX supports all the needed comparisons. */
19241 /* We have no LTGT as an operator. We could implement it with
19242 NE & ORDERED, but this requires an extra temporary. It's
19243 not clear that it's worth it. */
19250 /* These are supported directly. */
19257 /* AVX has 3 operand comparisons, no need to swap anything. */
19260 /* For commutative operators, try to canonicalize the destination
19261 operand to be first in the comparison - this helps reload to
19262 avoid extra moves. */
19263 if (!dest || !rtx_equal_p (dest, *pop1))
19271 /* These are not supported directly before AVX, and furthermore
19272 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19273 comparison operands to transform into something that is
19278 code = swap_condition (code);
19282 gcc_unreachable ();
19288 /* Detect conditional moves that exactly match min/max operational
19289 semantics. Note that this is IEEE safe, as long as we don't
19290 interchange the operands.
19292 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19293 and TRUE if the operation is successful and instructions are emitted. */
19296 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
19297 rtx cmp_op1, rtx if_true, rtx if_false)
19299 enum machine_mode mode;
19305 else if (code == UNGE)
19308 if_true = if_false;
19314 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
19316 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
19321 mode = GET_MODE (dest);
19323 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19324 but MODE may be a vector mode and thus not appropriate. */
19325 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
19327 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
19330 if_true = force_reg (mode, if_true);
19331 v = gen_rtvec (2, if_true, if_false);
19332 tmp = gen_rtx_UNSPEC (mode, v, u);
19336 code = is_min ? SMIN : SMAX;
19337 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
19340 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
19344 /* Expand an sse vector comparison. Return the register with the result. */
19347 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
19348 rtx op_true, rtx op_false)
19350 enum machine_mode mode = GET_MODE (dest);
19351 enum machine_mode cmp_mode = GET_MODE (cmp_op0);
19354 cmp_op0 = force_reg (cmp_mode, cmp_op0);
19355 if (!nonimmediate_operand (cmp_op1, cmp_mode))
19356 cmp_op1 = force_reg (cmp_mode, cmp_op1);
19359 || reg_overlap_mentioned_p (dest, op_true)
19360 || reg_overlap_mentioned_p (dest, op_false))
19361 dest = gen_reg_rtx (mode);
19363 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
19364 if (cmp_mode != mode)
19366 x = force_reg (cmp_mode, x);
19367 convert_move (dest, x, false);
19370 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19375 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19376 operations. This is used for both scalar and vector conditional moves. */
19379 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
19381 enum machine_mode mode = GET_MODE (dest);
19384 if (vector_all_ones_operand (op_true, mode)
19385 && rtx_equal_p (op_false, CONST0_RTX (mode)))
19387 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
19389 else if (op_false == CONST0_RTX (mode))
19391 op_true = force_reg (mode, op_true);
19392 x = gen_rtx_AND (mode, cmp, op_true);
19393 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19395 else if (op_true == CONST0_RTX (mode))
19397 op_false = force_reg (mode, op_false);
19398 x = gen_rtx_NOT (mode, cmp);
19399 x = gen_rtx_AND (mode, x, op_false);
19400 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19402 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
19404 op_false = force_reg (mode, op_false);
19405 x = gen_rtx_IOR (mode, cmp, op_false);
19406 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19408 else if (TARGET_XOP)
19410 op_true = force_reg (mode, op_true);
19412 if (!nonimmediate_operand (op_false, mode))
19413 op_false = force_reg (mode, op_false);
19415 emit_insn (gen_rtx_SET (mode, dest,
19416 gen_rtx_IF_THEN_ELSE (mode, cmp,
19422 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
19424 if (!nonimmediate_operand (op_true, mode))
19425 op_true = force_reg (mode, op_true);
19427 op_false = force_reg (mode, op_false);
19433 gen = gen_sse4_1_blendvps;
19437 gen = gen_sse4_1_blendvpd;
19445 gen = gen_sse4_1_pblendvb;
19446 dest = gen_lowpart (V16QImode, dest);
19447 op_false = gen_lowpart (V16QImode, op_false);
19448 op_true = gen_lowpart (V16QImode, op_true);
19449 cmp = gen_lowpart (V16QImode, cmp);
19454 gen = gen_avx_blendvps256;
19458 gen = gen_avx_blendvpd256;
19466 gen = gen_avx2_pblendvb;
19467 dest = gen_lowpart (V32QImode, dest);
19468 op_false = gen_lowpart (V32QImode, op_false);
19469 op_true = gen_lowpart (V32QImode, op_true);
19470 cmp = gen_lowpart (V32QImode, cmp);
19478 emit_insn (gen (dest, op_false, op_true, cmp));
19481 op_true = force_reg (mode, op_true);
19483 t2 = gen_reg_rtx (mode);
19485 t3 = gen_reg_rtx (mode);
19489 x = gen_rtx_AND (mode, op_true, cmp);
19490 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
19492 x = gen_rtx_NOT (mode, cmp);
19493 x = gen_rtx_AND (mode, x, op_false);
19494 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
19496 x = gen_rtx_IOR (mode, t3, t2);
19497 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19502 /* Expand a floating-point conditional move. Return true if successful. */
19505 ix86_expand_fp_movcc (rtx operands[])
19507 enum machine_mode mode = GET_MODE (operands[0]);
19508 enum rtx_code code = GET_CODE (operands[1]);
19509 rtx tmp, compare_op;
19510 rtx op0 = XEXP (operands[1], 0);
19511 rtx op1 = XEXP (operands[1], 1);
19513 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
19515 enum machine_mode cmode;
19517 /* Since we've no cmove for sse registers, don't force bad register
19518 allocation just to gain access to it. Deny movcc when the
19519 comparison mode doesn't match the move mode. */
19520 cmode = GET_MODE (op0);
19521 if (cmode == VOIDmode)
19522 cmode = GET_MODE (op1);
19526 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
19527 if (code == UNKNOWN)
19530 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
19531 operands[2], operands[3]))
19534 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
19535 operands[2], operands[3]);
19536 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
19540 /* The floating point conditional move instructions don't directly
19541 support conditions resulting from a signed integer comparison. */
19543 compare_op = ix86_expand_compare (code, op0, op1);
19544 if (!fcmov_comparison_operator (compare_op, VOIDmode))
19546 tmp = gen_reg_rtx (QImode);
19547 ix86_expand_setcc (tmp, code, op0, op1);
19549 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
19552 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19553 gen_rtx_IF_THEN_ELSE (mode, compare_op,
19554 operands[2], operands[3])));
19559 /* Expand a floating-point vector conditional move; a vcond operation
19560 rather than a movcc operation. */
19563 ix86_expand_fp_vcond (rtx operands[])
19565 enum rtx_code code = GET_CODE (operands[3]);
19568 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
19569 &operands[4], &operands[5]);
19570 if (code == UNKNOWN)
19573 switch (GET_CODE (operands[3]))
19576 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
19577 operands[5], operands[0], operands[0]);
19578 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
19579 operands[5], operands[1], operands[2]);
19583 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
19584 operands[5], operands[0], operands[0]);
19585 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
19586 operands[5], operands[1], operands[2]);
19590 gcc_unreachable ();
19592 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
19594 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19598 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
19599 operands[5], operands[1], operands[2]))
19602 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
19603 operands[1], operands[2]);
19604 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
19608 /* Expand a signed/unsigned integral vector conditional move. */
19611 ix86_expand_int_vcond (rtx operands[])
19613 enum machine_mode data_mode = GET_MODE (operands[0]);
19614 enum machine_mode mode = GET_MODE (operands[4]);
19615 enum rtx_code code = GET_CODE (operands[3]);
19616 bool negate = false;
19619 cop0 = operands[4];
19620 cop1 = operands[5];
19622 /* XOP supports all of the comparisons on all vector int types. */
19625 /* Canonicalize the comparison to EQ, GT, GTU. */
19636 code = reverse_condition (code);
19642 code = reverse_condition (code);
19648 code = swap_condition (code);
19649 x = cop0, cop0 = cop1, cop1 = x;
19653 gcc_unreachable ();
19656 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19657 if (mode == V2DImode)
19662 /* SSE4.1 supports EQ. */
19663 if (!TARGET_SSE4_1)
19669 /* SSE4.2 supports GT/GTU. */
19670 if (!TARGET_SSE4_2)
19675 gcc_unreachable ();
19679 /* Unsigned parallel compare is not supported by the hardware.
19680 Play some tricks to turn this into a signed comparison
19684 cop0 = force_reg (mode, cop0);
19694 rtx (*gen_sub3) (rtx, rtx, rtx);
19698 case V8SImode: gen_sub3 = gen_subv8si3; break;
19699 case V4DImode: gen_sub3 = gen_subv4di3; break;
19700 case V4SImode: gen_sub3 = gen_subv4si3; break;
19701 case V2DImode: gen_sub3 = gen_subv2di3; break;
19703 gcc_unreachable ();
19705 /* Subtract (-(INT MAX) - 1) from both operands to make
19707 mask = ix86_build_signbit_mask (mode, true, false);
19708 t1 = gen_reg_rtx (mode);
19709 emit_insn (gen_sub3 (t1, cop0, mask));
19711 t2 = gen_reg_rtx (mode);
19712 emit_insn (gen_sub3 (t2, cop1, mask));
19724 /* Perform a parallel unsigned saturating subtraction. */
19725 x = gen_reg_rtx (mode);
19726 emit_insn (gen_rtx_SET (VOIDmode, x,
19727 gen_rtx_US_MINUS (mode, cop0, cop1)));
19730 cop1 = CONST0_RTX (mode);
19736 gcc_unreachable ();
19741 /* Allow the comparison to be done in one mode, but the movcc to
19742 happen in another mode. */
19743 if (data_mode == mode)
19745 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
19746 operands[1+negate], operands[2-negate]);
19750 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
19751 x = ix86_expand_sse_cmp (gen_lowpart (mode, operands[0]),
19753 operands[1+negate], operands[2-negate]);
19754 x = gen_lowpart (data_mode, x);
19757 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
19758 operands[2-negate]);
19762 /* Expand a variable vector permutation. */
19765 ix86_expand_vec_perm (rtx operands[])
19767 rtx target = operands[0];
19768 rtx op0 = operands[1];
19769 rtx op1 = operands[2];
19770 rtx mask = operands[3];
19771 rtx t1, t2, t3, t4, vt, vt2, vec[32];
19772 enum machine_mode mode = GET_MODE (op0);
19773 enum machine_mode maskmode = GET_MODE (mask);
19775 bool one_operand_shuffle = rtx_equal_p (op0, op1);
19777 /* Number of elements in the vector. */
19778 w = GET_MODE_NUNITS (mode);
19779 e = GET_MODE_UNIT_SIZE (mode);
19780 gcc_assert (w <= 32);
19784 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
19786 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
19787 an constant shuffle operand. With a tiny bit of effort we can
19788 use VPERMD instead. A re-interpretation stall for V4DFmode is
19789 unfortunate but there's no avoiding it.
19790 Similarly for V16HImode we don't have instructions for variable
19791 shuffling, while for V32QImode we can use after preparing suitable
19792 masks vpshufb; vpshufb; vpermq; vpor. */
19794 if (mode == V16HImode)
19796 maskmode = mode = V32QImode;
19802 maskmode = mode = V8SImode;
19806 t1 = gen_reg_rtx (maskmode);
19808 /* Replicate the low bits of the V4DImode mask into V8SImode:
19810 t1 = { A A B B C C D D }. */
19811 for (i = 0; i < w / 2; ++i)
19812 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
19813 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19814 vt = force_reg (maskmode, vt);
19815 mask = gen_lowpart (maskmode, mask);
19816 if (maskmode == V8SImode)
19817 emit_insn (gen_avx2_permvarv8si (t1, vt, mask));
19819 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
19821 /* Multiply the shuffle indicies by two. */
19822 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
19825 /* Add one to the odd shuffle indicies:
19826 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
19827 for (i = 0; i < w / 2; ++i)
19829 vec[i * 2] = const0_rtx;
19830 vec[i * 2 + 1] = const1_rtx;
19832 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
19833 vt = force_const_mem (maskmode, vt);
19834 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
19837 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
19838 operands[3] = mask = t1;
19839 target = gen_lowpart (mode, target);
19840 op0 = gen_lowpart (mode, op0);
19841 op1 = gen_lowpart (mode, op1);
19847 /* The VPERMD and VPERMPS instructions already properly ignore
19848 the high bits of the shuffle elements. No need for us to
19849 perform an AND ourselves. */
19850 if (one_operand_shuffle)
19851 emit_insn (gen_avx2_permvarv8si (target, mask, op0));
19854 t1 = gen_reg_rtx (V8SImode);
19855 t2 = gen_reg_rtx (V8SImode);
19856 emit_insn (gen_avx2_permvarv8si (t1, mask, op0));
19857 emit_insn (gen_avx2_permvarv8si (t2, mask, op1));
19863 mask = gen_lowpart (V8SFmode, mask);
19864 if (one_operand_shuffle)
19865 emit_insn (gen_avx2_permvarv8sf (target, mask, op0));
19868 t1 = gen_reg_rtx (V8SFmode);
19869 t2 = gen_reg_rtx (V8SFmode);
19870 emit_insn (gen_avx2_permvarv8sf (t1, mask, op0));
19871 emit_insn (gen_avx2_permvarv8sf (t2, mask, op1));
19877 /* By combining the two 128-bit input vectors into one 256-bit
19878 input vector, we can use VPERMD and VPERMPS for the full
19879 two-operand shuffle. */
19880 t1 = gen_reg_rtx (V8SImode);
19881 t2 = gen_reg_rtx (V8SImode);
19882 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
19883 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
19884 emit_insn (gen_avx2_permvarv8si (t1, t2, t1));
19885 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
19889 t1 = gen_reg_rtx (V8SFmode);
19890 t2 = gen_reg_rtx (V8SFmode);
19891 mask = gen_lowpart (V4SFmode, mask);
19892 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
19893 emit_insn (gen_avx_vec_concatv8sf (t2, mask, mask));
19894 emit_insn (gen_avx2_permvarv8sf (t1, t2, t1));
19895 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
19899 t1 = gen_reg_rtx (V32QImode);
19900 t2 = gen_reg_rtx (V32QImode);
19901 t3 = gen_reg_rtx (V32QImode);
19902 vt2 = GEN_INT (128);
19903 for (i = 0; i < 32; i++)
19905 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
19906 vt = force_reg (V32QImode, vt);
19907 for (i = 0; i < 32; i++)
19908 vec[i] = i < 16 ? vt2 : const0_rtx;
19909 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
19910 vt2 = force_reg (V32QImode, vt2);
19911 /* From mask create two adjusted masks, which contain the same
19912 bits as mask in the low 7 bits of each vector element.
19913 The first mask will have the most significant bit clear
19914 if it requests element from the same 128-bit lane
19915 and MSB set if it requests element from the other 128-bit lane.
19916 The second mask will have the opposite values of the MSB,
19917 and additionally will have its 128-bit lanes swapped.
19918 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
19919 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
19920 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
19921 stands for other 12 bytes. */
19922 /* The bit whether element is from the same lane or the other
19923 lane is bit 4, so shift it up by 3 to the MSB position. */
19924 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode, t1),
19925 gen_lowpart (V4DImode, mask),
19927 /* Clear MSB bits from the mask just in case it had them set. */
19928 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
19929 /* After this t1 will have MSB set for elements from other lane. */
19930 emit_insn (gen_xorv32qi3 (t1, t1, vt2));
19931 /* Clear bits other than MSB. */
19932 emit_insn (gen_andv32qi3 (t1, t1, vt));
19933 /* Or in the lower bits from mask into t3. */
19934 emit_insn (gen_iorv32qi3 (t3, t1, t2));
19935 /* And invert MSB bits in t1, so MSB is set for elements from the same
19937 emit_insn (gen_xorv32qi3 (t1, t1, vt));
19938 /* Swap 128-bit lanes in t3. */
19939 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
19940 gen_lowpart (V4DImode, t3),
19941 const2_rtx, GEN_INT (3),
19942 const0_rtx, const1_rtx));
19943 /* And or in the lower bits from mask into t1. */
19944 emit_insn (gen_iorv32qi3 (t1, t1, t2));
19945 if (one_operand_shuffle)
19947 /* Each of these shuffles will put 0s in places where
19948 element from the other 128-bit lane is needed, otherwise
19949 will shuffle in the requested value. */
19950 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0, t3));
19951 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
19952 /* For t3 the 128-bit lanes are swapped again. */
19953 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
19954 gen_lowpart (V4DImode, t3),
19955 const2_rtx, GEN_INT (3),
19956 const0_rtx, const1_rtx));
19957 /* And oring both together leads to the result. */
19958 emit_insn (gen_iorv32qi3 (target, t1, t3));
19962 t4 = gen_reg_rtx (V32QImode);
19963 /* Similarly to the above one_operand_shuffle code,
19964 just for repeated twice for each operand. merge_two:
19965 code will merge the two results together. */
19966 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0, t3));
19967 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1, t3));
19968 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
19969 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
19970 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t4),
19971 gen_lowpart (V4DImode, t4),
19972 const2_rtx, GEN_INT (3),
19973 const0_rtx, const1_rtx));
19974 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
19975 gen_lowpart (V4DImode, t3),
19976 const2_rtx, GEN_INT (3),
19977 const0_rtx, const1_rtx));
19978 emit_insn (gen_iorv32qi3 (t4, t2, t4));
19979 emit_insn (gen_iorv32qi3 (t3, t1, t3));
19985 gcc_assert (GET_MODE_SIZE (mode) <= 16);
19992 /* The XOP VPPERM insn supports three inputs. By ignoring the
19993 one_operand_shuffle special case, we avoid creating another
19994 set of constant vectors in memory. */
19995 one_operand_shuffle = false;
19997 /* mask = mask & {2*w-1, ...} */
19998 vt = GEN_INT (2*w - 1);
20002 /* mask = mask & {w-1, ...} */
20003 vt = GEN_INT (w - 1);
20006 for (i = 0; i < w; i++)
20008 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20009 mask = expand_simple_binop (maskmode, AND, mask, vt,
20010 NULL_RTX, 0, OPTAB_DIRECT);
20012 /* For non-QImode operations, convert the word permutation control
20013 into a byte permutation control. */
20014 if (mode != V16QImode)
20016 mask = expand_simple_binop (maskmode, ASHIFT, mask,
20017 GEN_INT (exact_log2 (e)),
20018 NULL_RTX, 0, OPTAB_DIRECT);
20020 /* Convert mask to vector of chars. */
20021 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
20023 /* Replicate each of the input bytes into byte positions:
20024 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20025 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20026 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20027 for (i = 0; i < 16; ++i)
20028 vec[i] = GEN_INT (i/e * e);
20029 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
20030 vt = force_const_mem (V16QImode, vt);
20032 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
20034 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
20036 /* Convert it into the byte positions by doing
20037 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20038 for (i = 0; i < 16; ++i)
20039 vec[i] = GEN_INT (i % e);
20040 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
20041 vt = force_const_mem (V16QImode, vt);
20042 emit_insn (gen_addv16qi3 (mask, mask, vt));
20045 /* The actual shuffle operations all operate on V16QImode. */
20046 op0 = gen_lowpart (V16QImode, op0);
20047 op1 = gen_lowpart (V16QImode, op1);
20048 target = gen_lowpart (V16QImode, target);
20052 emit_insn (gen_xop_pperm (target, op0, op1, mask));
20054 else if (one_operand_shuffle)
20056 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
20063 /* Shuffle the two input vectors independently. */
20064 t1 = gen_reg_rtx (V16QImode);
20065 t2 = gen_reg_rtx (V16QImode);
20066 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
20067 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
20070 /* Then merge them together. The key is whether any given control
20071 element contained a bit set that indicates the second word. */
20072 mask = operands[3];
20074 if (maskmode == V2DImode && !TARGET_SSE4_1)
20076 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20077 more shuffle to convert the V2DI input mask into a V4SI
20078 input mask. At which point the masking that expand_int_vcond
20079 will work as desired. */
20080 rtx t3 = gen_reg_rtx (V4SImode);
20081 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
20082 const0_rtx, const0_rtx,
20083 const2_rtx, const2_rtx));
20085 maskmode = V4SImode;
20089 for (i = 0; i < w; i++)
20091 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20092 vt = force_reg (maskmode, vt);
20093 mask = expand_simple_binop (maskmode, AND, mask, vt,
20094 NULL_RTX, 0, OPTAB_DIRECT);
20096 xops[0] = gen_lowpart (mode, operands[0]);
20097 xops[1] = gen_lowpart (mode, t2);
20098 xops[2] = gen_lowpart (mode, t1);
20099 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
20102 ok = ix86_expand_int_vcond (xops);
20107 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20108 true if we should do zero extension, else sign extension. HIGH_P is
20109 true if we want the N/2 high elements, else the low elements. */
20112 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
20114 enum machine_mode imode = GET_MODE (operands[1]);
20119 rtx (*unpack)(rtx, rtx);
20120 rtx (*extract)(rtx, rtx) = NULL;
20121 enum machine_mode halfmode = BLKmode;
20127 unpack = gen_avx2_zero_extendv16qiv16hi2;
20129 unpack = gen_avx2_sign_extendv16qiv16hi2;
20130 halfmode = V16QImode;
20132 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
20136 unpack = gen_avx2_zero_extendv8hiv8si2;
20138 unpack = gen_avx2_sign_extendv8hiv8si2;
20139 halfmode = V8HImode;
20141 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
20145 unpack = gen_avx2_zero_extendv4siv4di2;
20147 unpack = gen_avx2_sign_extendv4siv4di2;
20148 halfmode = V4SImode;
20150 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
20154 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
20156 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
20160 unpack = gen_sse4_1_zero_extendv4hiv4si2;
20162 unpack = gen_sse4_1_sign_extendv4hiv4si2;
20166 unpack = gen_sse4_1_zero_extendv2siv2di2;
20168 unpack = gen_sse4_1_sign_extendv2siv2di2;
20171 gcc_unreachable ();
20174 if (GET_MODE_SIZE (imode) == 32)
20176 tmp = gen_reg_rtx (halfmode);
20177 emit_insn (extract (tmp, operands[1]));
20181 /* Shift higher 8 bytes to lower 8 bytes. */
20182 tmp = gen_reg_rtx (imode);
20183 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
20184 gen_lowpart (V1TImode, operands[1]),
20190 emit_insn (unpack (operands[0], tmp));
20194 rtx (*unpack)(rtx, rtx, rtx);
20200 unpack = gen_vec_interleave_highv16qi;
20202 unpack = gen_vec_interleave_lowv16qi;
20206 unpack = gen_vec_interleave_highv8hi;
20208 unpack = gen_vec_interleave_lowv8hi;
20212 unpack = gen_vec_interleave_highv4si;
20214 unpack = gen_vec_interleave_lowv4si;
20217 gcc_unreachable ();
20220 dest = gen_lowpart (imode, operands[0]);
20223 tmp = force_reg (imode, CONST0_RTX (imode));
20225 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
20226 operands[1], pc_rtx, pc_rtx);
20228 emit_insn (unpack (dest, operands[1], tmp));
20232 /* Expand conditional increment or decrement using adb/sbb instructions.
20233 The default case using setcc followed by the conditional move can be
20234 done by generic code. */
20236 ix86_expand_int_addcc (rtx operands[])
20238 enum rtx_code code = GET_CODE (operands[1]);
20240 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
20242 rtx val = const0_rtx;
20243 bool fpcmp = false;
20244 enum machine_mode mode;
20245 rtx op0 = XEXP (operands[1], 0);
20246 rtx op1 = XEXP (operands[1], 1);
20248 if (operands[3] != const1_rtx
20249 && operands[3] != constm1_rtx)
20251 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20253 code = GET_CODE (compare_op);
20255 flags = XEXP (compare_op, 0);
20257 if (GET_MODE (flags) == CCFPmode
20258 || GET_MODE (flags) == CCFPUmode)
20261 code = ix86_fp_compare_code_to_integer (code);
20268 PUT_CODE (compare_op,
20269 reverse_condition_maybe_unordered
20270 (GET_CODE (compare_op)));
20272 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
20275 mode = GET_MODE (operands[0]);
20277 /* Construct either adc or sbb insn. */
20278 if ((code == LTU) == (operands[3] == constm1_rtx))
20283 insn = gen_subqi3_carry;
20286 insn = gen_subhi3_carry;
20289 insn = gen_subsi3_carry;
20292 insn = gen_subdi3_carry;
20295 gcc_unreachable ();
20303 insn = gen_addqi3_carry;
20306 insn = gen_addhi3_carry;
20309 insn = gen_addsi3_carry;
20312 insn = gen_adddi3_carry;
20315 gcc_unreachable ();
20318 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
20324 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20325 but works for floating pointer parameters and nonoffsetable memories.
20326 For pushes, it returns just stack offsets; the values will be saved
20327 in the right order. Maximally three parts are generated. */
20330 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
20335 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
20337 size = (GET_MODE_SIZE (mode) + 4) / 8;
20339 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
20340 gcc_assert (size >= 2 && size <= 4);
20342 /* Optimize constant pool reference to immediates. This is used by fp
20343 moves, that force all constants to memory to allow combining. */
20344 if (MEM_P (operand) && MEM_READONLY_P (operand))
20346 rtx tmp = maybe_get_pool_constant (operand);
20351 if (MEM_P (operand) && !offsettable_memref_p (operand))
20353 /* The only non-offsetable memories we handle are pushes. */
20354 int ok = push_operand (operand, VOIDmode);
20358 operand = copy_rtx (operand);
20359 PUT_MODE (operand, Pmode);
20360 parts[0] = parts[1] = parts[2] = parts[3] = operand;
20364 if (GET_CODE (operand) == CONST_VECTOR)
20366 enum machine_mode imode = int_mode_for_mode (mode);
20367 /* Caution: if we looked through a constant pool memory above,
20368 the operand may actually have a different mode now. That's
20369 ok, since we want to pun this all the way back to an integer. */
20370 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
20371 gcc_assert (operand != NULL);
20377 if (mode == DImode)
20378 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
20383 if (REG_P (operand))
20385 gcc_assert (reload_completed);
20386 for (i = 0; i < size; i++)
20387 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
20389 else if (offsettable_memref_p (operand))
20391 operand = adjust_address (operand, SImode, 0);
20392 parts[0] = operand;
20393 for (i = 1; i < size; i++)
20394 parts[i] = adjust_address (operand, SImode, 4 * i);
20396 else if (GET_CODE (operand) == CONST_DOUBLE)
20401 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
20405 real_to_target (l, &r, mode);
20406 parts[3] = gen_int_mode (l[3], SImode);
20407 parts[2] = gen_int_mode (l[2], SImode);
20410 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
20411 parts[2] = gen_int_mode (l[2], SImode);
20414 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
20417 gcc_unreachable ();
20419 parts[1] = gen_int_mode (l[1], SImode);
20420 parts[0] = gen_int_mode (l[0], SImode);
20423 gcc_unreachable ();
20428 if (mode == TImode)
20429 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
20430 if (mode == XFmode || mode == TFmode)
20432 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
20433 if (REG_P (operand))
20435 gcc_assert (reload_completed);
20436 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
20437 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
20439 else if (offsettable_memref_p (operand))
20441 operand = adjust_address (operand, DImode, 0);
20442 parts[0] = operand;
20443 parts[1] = adjust_address (operand, upper_mode, 8);
20445 else if (GET_CODE (operand) == CONST_DOUBLE)
20450 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
20451 real_to_target (l, &r, mode);
20453 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20454 if (HOST_BITS_PER_WIDE_INT >= 64)
20457 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
20458 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
20461 parts[0] = immed_double_const (l[0], l[1], DImode);
20463 if (upper_mode == SImode)
20464 parts[1] = gen_int_mode (l[2], SImode);
20465 else if (HOST_BITS_PER_WIDE_INT >= 64)
20468 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
20469 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
20472 parts[1] = immed_double_const (l[2], l[3], DImode);
20475 gcc_unreachable ();
20482 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20483 Return false when normal moves are needed; true when all required
20484 insns have been emitted. Operands 2-4 contain the input values
20485 int the correct order; operands 5-7 contain the output values. */
20488 ix86_split_long_move (rtx operands[])
20493 int collisions = 0;
20494 enum machine_mode mode = GET_MODE (operands[0]);
20495 bool collisionparts[4];
20497 /* The DFmode expanders may ask us to move double.
20498 For 64bit target this is single move. By hiding the fact
20499 here we simplify i386.md splitters. */
20500 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
20502 /* Optimize constant pool reference to immediates. This is used by
20503 fp moves, that force all constants to memory to allow combining. */
20505 if (MEM_P (operands[1])
20506 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
20507 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
20508 operands[1] = get_pool_constant (XEXP (operands[1], 0));
20509 if (push_operand (operands[0], VOIDmode))
20511 operands[0] = copy_rtx (operands[0]);
20512 PUT_MODE (operands[0], Pmode);
20515 operands[0] = gen_lowpart (DImode, operands[0]);
20516 operands[1] = gen_lowpart (DImode, operands[1]);
20517 emit_move_insn (operands[0], operands[1]);
20521 /* The only non-offsettable memory we handle is push. */
20522 if (push_operand (operands[0], VOIDmode))
20525 gcc_assert (!MEM_P (operands[0])
20526 || offsettable_memref_p (operands[0]));
20528 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
20529 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
20531 /* When emitting push, take care for source operands on the stack. */
20532 if (push && MEM_P (operands[1])
20533 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
20535 rtx src_base = XEXP (part[1][nparts - 1], 0);
20537 /* Compensate for the stack decrement by 4. */
20538 if (!TARGET_64BIT && nparts == 3
20539 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
20540 src_base = plus_constant (src_base, 4);
20542 /* src_base refers to the stack pointer and is
20543 automatically decreased by emitted push. */
20544 for (i = 0; i < nparts; i++)
20545 part[1][i] = change_address (part[1][i],
20546 GET_MODE (part[1][i]), src_base);
20549 /* We need to do copy in the right order in case an address register
20550 of the source overlaps the destination. */
20551 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
20555 for (i = 0; i < nparts; i++)
20558 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
20559 if (collisionparts[i])
20563 /* Collision in the middle part can be handled by reordering. */
20564 if (collisions == 1 && nparts == 3 && collisionparts [1])
20566 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
20567 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
20569 else if (collisions == 1
20571 && (collisionparts [1] || collisionparts [2]))
20573 if (collisionparts [1])
20575 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
20576 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
20580 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
20581 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
20585 /* If there are more collisions, we can't handle it by reordering.
20586 Do an lea to the last part and use only one colliding move. */
20587 else if (collisions > 1)
20593 base = part[0][nparts - 1];
20595 /* Handle the case when the last part isn't valid for lea.
20596 Happens in 64-bit mode storing the 12-byte XFmode. */
20597 if (GET_MODE (base) != Pmode)
20598 base = gen_rtx_REG (Pmode, REGNO (base));
20600 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
20601 part[1][0] = replace_equiv_address (part[1][0], base);
20602 for (i = 1; i < nparts; i++)
20604 tmp = plus_constant (base, UNITS_PER_WORD * i);
20605 part[1][i] = replace_equiv_address (part[1][i], tmp);
20616 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
20617 emit_insn (gen_addsi3 (stack_pointer_rtx,
20618 stack_pointer_rtx, GEN_INT (-4)));
20619 emit_move_insn (part[0][2], part[1][2]);
20621 else if (nparts == 4)
20623 emit_move_insn (part[0][3], part[1][3]);
20624 emit_move_insn (part[0][2], part[1][2]);
20629 /* In 64bit mode we don't have 32bit push available. In case this is
20630 register, it is OK - we will just use larger counterpart. We also
20631 retype memory - these comes from attempt to avoid REX prefix on
20632 moving of second half of TFmode value. */
20633 if (GET_MODE (part[1][1]) == SImode)
20635 switch (GET_CODE (part[1][1]))
20638 part[1][1] = adjust_address (part[1][1], DImode, 0);
20642 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
20646 gcc_unreachable ();
20649 if (GET_MODE (part[1][0]) == SImode)
20650 part[1][0] = part[1][1];
20653 emit_move_insn (part[0][1], part[1][1]);
20654 emit_move_insn (part[0][0], part[1][0]);
20658 /* Choose correct order to not overwrite the source before it is copied. */
20659 if ((REG_P (part[0][0])
20660 && REG_P (part[1][1])
20661 && (REGNO (part[0][0]) == REGNO (part[1][1])
20663 && REGNO (part[0][0]) == REGNO (part[1][2]))
20665 && REGNO (part[0][0]) == REGNO (part[1][3]))))
20667 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
20669 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
20671 operands[2 + i] = part[0][j];
20672 operands[6 + i] = part[1][j];
20677 for (i = 0; i < nparts; i++)
20679 operands[2 + i] = part[0][i];
20680 operands[6 + i] = part[1][i];
20684 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20685 if (optimize_insn_for_size_p ())
20687 for (j = 0; j < nparts - 1; j++)
20688 if (CONST_INT_P (operands[6 + j])
20689 && operands[6 + j] != const0_rtx
20690 && REG_P (operands[2 + j]))
20691 for (i = j; i < nparts - 1; i++)
20692 if (CONST_INT_P (operands[7 + i])
20693 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
20694 operands[7 + i] = operands[2 + j];
20697 for (i = 0; i < nparts; i++)
20698 emit_move_insn (operands[2 + i], operands[6 + i]);
20703 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20704 left shift by a constant, either using a single shift or
20705 a sequence of add instructions. */
20708 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
20710 rtx (*insn)(rtx, rtx, rtx);
20713 || (count * ix86_cost->add <= ix86_cost->shift_const
20714 && !optimize_insn_for_size_p ()))
20716 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
20717 while (count-- > 0)
20718 emit_insn (insn (operand, operand, operand));
20722 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20723 emit_insn (insn (operand, operand, GEN_INT (count)));
20728 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
20730 rtx (*gen_ashl3)(rtx, rtx, rtx);
20731 rtx (*gen_shld)(rtx, rtx, rtx);
20732 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20734 rtx low[2], high[2];
20737 if (CONST_INT_P (operands[2]))
20739 split_double_mode (mode, operands, 2, low, high);
20740 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20742 if (count >= half_width)
20744 emit_move_insn (high[0], low[1]);
20745 emit_move_insn (low[0], const0_rtx);
20747 if (count > half_width)
20748 ix86_expand_ashl_const (high[0], count - half_width, mode);
20752 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
20754 if (!rtx_equal_p (operands[0], operands[1]))
20755 emit_move_insn (operands[0], operands[1]);
20757 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
20758 ix86_expand_ashl_const (low[0], count, mode);
20763 split_double_mode (mode, operands, 1, low, high);
20765 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
20767 if (operands[1] == const1_rtx)
20769 /* Assuming we've chosen a QImode capable registers, then 1 << N
20770 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20771 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
20773 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
20775 ix86_expand_clear (low[0]);
20776 ix86_expand_clear (high[0]);
20777 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
20779 d = gen_lowpart (QImode, low[0]);
20780 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20781 s = gen_rtx_EQ (QImode, flags, const0_rtx);
20782 emit_insn (gen_rtx_SET (VOIDmode, d, s));
20784 d = gen_lowpart (QImode, high[0]);
20785 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
20786 s = gen_rtx_NE (QImode, flags, const0_rtx);
20787 emit_insn (gen_rtx_SET (VOIDmode, d, s));
20790 /* Otherwise, we can get the same results by manually performing
20791 a bit extract operation on bit 5/6, and then performing the two
20792 shifts. The two methods of getting 0/1 into low/high are exactly
20793 the same size. Avoiding the shift in the bit extract case helps
20794 pentium4 a bit; no one else seems to care much either way. */
20797 enum machine_mode half_mode;
20798 rtx (*gen_lshr3)(rtx, rtx, rtx);
20799 rtx (*gen_and3)(rtx, rtx, rtx);
20800 rtx (*gen_xor3)(rtx, rtx, rtx);
20801 HOST_WIDE_INT bits;
20804 if (mode == DImode)
20806 half_mode = SImode;
20807 gen_lshr3 = gen_lshrsi3;
20808 gen_and3 = gen_andsi3;
20809 gen_xor3 = gen_xorsi3;
20814 half_mode = DImode;
20815 gen_lshr3 = gen_lshrdi3;
20816 gen_and3 = gen_anddi3;
20817 gen_xor3 = gen_xordi3;
20821 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
20822 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
20824 x = gen_lowpart (half_mode, operands[2]);
20825 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
20827 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
20828 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
20829 emit_move_insn (low[0], high[0]);
20830 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
20833 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
20834 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
20838 if (operands[1] == constm1_rtx)
20840 /* For -1 << N, we can avoid the shld instruction, because we
20841 know that we're shifting 0...31/63 ones into a -1. */
20842 emit_move_insn (low[0], constm1_rtx);
20843 if (optimize_insn_for_size_p ())
20844 emit_move_insn (high[0], low[0]);
20846 emit_move_insn (high[0], constm1_rtx);
20850 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
20852 if (!rtx_equal_p (operands[0], operands[1]))
20853 emit_move_insn (operands[0], operands[1]);
20855 split_double_mode (mode, operands, 1, low, high);
20856 emit_insn (gen_shld (high[0], low[0], operands[2]));
20859 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
20861 if (TARGET_CMOVE && scratch)
20863 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20864 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20866 ix86_expand_clear (scratch);
20867 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
20871 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
20872 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
20874 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
20879 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
20881 rtx (*gen_ashr3)(rtx, rtx, rtx)
20882 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
20883 rtx (*gen_shrd)(rtx, rtx, rtx);
20884 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20886 rtx low[2], high[2];
20889 if (CONST_INT_P (operands[2]))
20891 split_double_mode (mode, operands, 2, low, high);
20892 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20894 if (count == GET_MODE_BITSIZE (mode) - 1)
20896 emit_move_insn (high[0], high[1]);
20897 emit_insn (gen_ashr3 (high[0], high[0],
20898 GEN_INT (half_width - 1)));
20899 emit_move_insn (low[0], high[0]);
20902 else if (count >= half_width)
20904 emit_move_insn (low[0], high[1]);
20905 emit_move_insn (high[0], low[0]);
20906 emit_insn (gen_ashr3 (high[0], high[0],
20907 GEN_INT (half_width - 1)));
20909 if (count > half_width)
20910 emit_insn (gen_ashr3 (low[0], low[0],
20911 GEN_INT (count - half_width)));
20915 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20917 if (!rtx_equal_p (operands[0], operands[1]))
20918 emit_move_insn (operands[0], operands[1]);
20920 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
20921 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
20926 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20928 if (!rtx_equal_p (operands[0], operands[1]))
20929 emit_move_insn (operands[0], operands[1]);
20931 split_double_mode (mode, operands, 1, low, high);
20933 emit_insn (gen_shrd (low[0], high[0], operands[2]));
20934 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
20936 if (TARGET_CMOVE && scratch)
20938 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
20939 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
20941 emit_move_insn (scratch, high[0]);
20942 emit_insn (gen_ashr3 (scratch, scratch,
20943 GEN_INT (half_width - 1)));
20944 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
20949 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
20950 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
20952 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
20958 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
20960 rtx (*gen_lshr3)(rtx, rtx, rtx)
20961 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
20962 rtx (*gen_shrd)(rtx, rtx, rtx);
20963 int half_width = GET_MODE_BITSIZE (mode) >> 1;
20965 rtx low[2], high[2];
20968 if (CONST_INT_P (operands[2]))
20970 split_double_mode (mode, operands, 2, low, high);
20971 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
20973 if (count >= half_width)
20975 emit_move_insn (low[0], high[1]);
20976 ix86_expand_clear (high[0]);
20978 if (count > half_width)
20979 emit_insn (gen_lshr3 (low[0], low[0],
20980 GEN_INT (count - half_width)));
20984 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20986 if (!rtx_equal_p (operands[0], operands[1]))
20987 emit_move_insn (operands[0], operands[1]);
20989 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
20990 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
20995 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
20997 if (!rtx_equal_p (operands[0], operands[1]))
20998 emit_move_insn (operands[0], operands[1]);
21000 split_double_mode (mode, operands, 1, low, high);
21002 emit_insn (gen_shrd (low[0], high[0], operands[2]));
21003 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
21005 if (TARGET_CMOVE && scratch)
21007 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21008 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21010 ix86_expand_clear (scratch);
21011 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
21016 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
21017 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
21019 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
21024 /* Predict just emitted jump instruction to be taken with probability PROB. */
21026 predict_jump (int prob)
21028 rtx insn = get_last_insn ();
21029 gcc_assert (JUMP_P (insn));
21030 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
21033 /* Helper function for the string operations below. Dest VARIABLE whether
21034 it is aligned to VALUE bytes. If true, jump to the label. */
21036 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
21038 rtx label = gen_label_rtx ();
21039 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
21040 if (GET_MODE (variable) == DImode)
21041 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
21043 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
21044 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
21047 predict_jump (REG_BR_PROB_BASE * 50 / 100);
21049 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21053 /* Adjust COUNTER by the VALUE. */
21055 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
21057 rtx (*gen_add)(rtx, rtx, rtx)
21058 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
21060 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
21063 /* Zero extend possibly SImode EXP to Pmode register. */
21065 ix86_zero_extend_to_Pmode (rtx exp)
21068 if (GET_MODE (exp) == VOIDmode)
21069 return force_reg (Pmode, exp);
21070 if (GET_MODE (exp) == Pmode)
21071 return copy_to_mode_reg (Pmode, exp);
21072 r = gen_reg_rtx (Pmode);
21073 emit_insn (gen_zero_extendsidi2 (r, exp));
21077 /* Divide COUNTREG by SCALE. */
21079 scale_counter (rtx countreg, int scale)
21085 if (CONST_INT_P (countreg))
21086 return GEN_INT (INTVAL (countreg) / scale);
21087 gcc_assert (REG_P (countreg));
21089 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
21090 GEN_INT (exact_log2 (scale)),
21091 NULL, 1, OPTAB_DIRECT);
21095 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21096 DImode for constant loop counts. */
21098 static enum machine_mode
21099 counter_mode (rtx count_exp)
21101 if (GET_MODE (count_exp) != VOIDmode)
21102 return GET_MODE (count_exp);
21103 if (!CONST_INT_P (count_exp))
21105 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
21110 /* Helper function for expand_set_or_movmem_via_loop.
21112 When SRCPTR is non-NULL, output simple loop to move memory
21113 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21114 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21115 equivalent loop to set memory by VALUE (supposed to be in MODE).
21117 The size is rounded down to whole number of chunk size moved at once.
21118 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info.
21120 If ITER isn't NULL, than it'll be used in the generated loop without
21121 initialization (that allows to generate several consequent loops using the
21123 If CHANGE_PTRS is specified, DESTPTR and SRCPTR would be increased by
21124 iterator value at the end of the function (as if they iterate in the loop).
21125 Otherwise, their vaules'll stay unchanged.
21127 If EXPECTED_SIZE isn't -1, than it's used to compute branch-probabilities on
21128 the loop backedge. When expected size is unknown (it's -1), the probability
21131 Return value is rtx of iterator, used in the loop - it could be reused in
21132 consequent calls of this function. */
21134 expand_set_or_movmem_via_loop_with_iter (rtx destmem, rtx srcmem,
21135 rtx destptr, rtx srcptr, rtx value,
21136 rtx count, rtx iter,
21137 enum machine_mode mode, int unroll,
21138 int expected_size, bool change_ptrs)
21140 rtx out_label, top_label, tmp;
21141 enum machine_mode iter_mode = counter_mode (count);
21142 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
21143 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
21148 bool reuse_iter = (iter != NULL_RTX);
21150 top_label = gen_label_rtx ();
21151 out_label = gen_label_rtx ();
21153 iter = gen_reg_rtx (iter_mode);
21155 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
21156 NULL, 1, OPTAB_DIRECT);
21157 /* Those two should combine. */
21158 if (piece_size == const1_rtx)
21160 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
21162 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21165 emit_move_insn (iter, const0_rtx);
21167 emit_label (top_label);
21169 tmp = convert_modes (Pmode, iter_mode, iter, true);
21170 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
21172 adjust_automodify_address_nv (copy_rtx (destmem), mode, x_addr, 0);
21176 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
21178 adjust_automodify_address_nv (copy_rtx (srcmem), mode, y_addr, 0);
21180 /* When unrolling for chips that reorder memory reads and writes,
21181 we can save registers by using single temporary.
21182 Also using 4 temporaries is overkill in 32bit mode. */
21183 if (!TARGET_64BIT && 0)
21185 for (i = 0; i < unroll; i++)
21190 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21192 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21194 emit_move_insn (destmem, srcmem);
21200 gcc_assert (unroll <= 4);
21201 for (i = 0; i < unroll; i++)
21203 tmpreg[i] = gen_reg_rtx (mode);
21207 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21209 emit_move_insn (tmpreg[i], srcmem);
21211 for (i = 0; i < unroll; i++)
21216 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21218 emit_move_insn (destmem, tmpreg[i]);
21223 for (i = 0; i < unroll; i++)
21227 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21228 emit_move_insn (destmem, value);
21231 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
21232 true, OPTAB_LIB_WIDEN);
21234 emit_move_insn (iter, tmp);
21236 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
21238 if (expected_size != -1)
21240 expected_size /= GET_MODE_SIZE (mode) * unroll;
21241 if (expected_size == 0)
21243 else if (expected_size > REG_BR_PROB_BASE)
21244 predict_jump (REG_BR_PROB_BASE - 1);
21246 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
21249 predict_jump (REG_BR_PROB_BASE * 80 / 100);
21252 iter = ix86_zero_extend_to_Pmode (iter);
21253 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
21254 true, OPTAB_LIB_WIDEN);
21255 if (tmp != destptr)
21256 emit_move_insn (destptr, tmp);
21259 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
21260 true, OPTAB_LIB_WIDEN);
21262 emit_move_insn (srcptr, tmp);
21265 emit_label (out_label);
21269 /* When SRCPTR is non-NULL, output simple loop to move memory
21270 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21271 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21272 equivalent loop to set memory by VALUE (supposed to be in MODE).
21274 The size is rounded down to whole number of chunk size moved at once.
21275 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21278 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
21279 rtx destptr, rtx srcptr, rtx value,
21280 rtx count, enum machine_mode mode, int unroll,
21283 expand_set_or_movmem_via_loop_with_iter (destmem, srcmem,
21284 destptr, srcptr, value,
21285 count, NULL_RTX, mode, unroll,
21286 expected_size, true);
21289 /* Output "rep; mov" instruction.
21290 Arguments have same meaning as for previous function */
21292 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
21293 rtx destptr, rtx srcptr,
21295 enum machine_mode mode)
21300 HOST_WIDE_INT rounded_count;
21302 /* If the size is known, it is shorter to use rep movs. */
21303 if (mode == QImode && CONST_INT_P (count)
21304 && !(INTVAL (count) & 3))
21307 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
21308 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
21309 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
21310 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
21311 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
21312 if (mode != QImode)
21314 destexp = gen_rtx_ASHIFT (Pmode, countreg,
21315 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21316 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
21317 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
21318 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21319 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
21323 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
21324 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
21326 if (CONST_INT_P (count))
21328 rounded_count = (INTVAL (count)
21329 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
21330 destmem = shallow_copy_rtx (destmem);
21331 srcmem = shallow_copy_rtx (srcmem);
21332 set_mem_size (destmem, rounded_count);
21333 set_mem_size (srcmem, rounded_count);
21337 if (MEM_SIZE_KNOWN_P (destmem))
21338 clear_mem_size (destmem);
21339 if (MEM_SIZE_KNOWN_P (srcmem))
21340 clear_mem_size (srcmem);
21342 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
21346 /* Output "rep; stos" instruction.
21347 Arguments have same meaning as for previous function */
21349 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
21350 rtx count, enum machine_mode mode,
21355 HOST_WIDE_INT rounded_count;
21357 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
21358 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
21359 value = force_reg (mode, gen_lowpart (mode, value));
21360 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
21361 if (mode != QImode)
21363 destexp = gen_rtx_ASHIFT (Pmode, countreg,
21364 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
21365 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
21368 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
21369 if (orig_value == const0_rtx && CONST_INT_P (count))
21371 rounded_count = (INTVAL (count)
21372 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
21373 destmem = shallow_copy_rtx (destmem);
21374 set_mem_size (destmem, rounded_count);
21376 else if (MEM_SIZE_KNOWN_P (destmem))
21377 clear_mem_size (destmem);
21378 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
21382 emit_strmov (rtx destmem, rtx srcmem,
21383 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
21385 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
21386 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
21387 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21390 /* Emit strset instuction. If RHS is constant, and vector mode will be used,
21391 then move this constant to a vector register before emitting strset. */
21393 emit_strset (rtx destmem, rtx value,
21394 rtx destptr, enum machine_mode mode, int offset)
21396 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
21397 emit_insn (gen_strset (destptr, dest, value));
21400 /* Output code to copy (COUNT % MAX_SIZE) bytes from SRCPTR to DESTPTR.
21401 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21403 expand_movmem_epilogue (rtx destmem, rtx srcmem,
21404 rtx destptr, rtx srcptr, rtx count, int max_size)
21407 if (CONST_INT_P (count))
21409 HOST_WIDE_INT countval = INTVAL (count);
21412 int remainder_size = countval % max_size;
21413 enum machine_mode move_mode = Pmode;
21415 /* Firstly, try to move data with the widest possible mode.
21416 Remaining part we'll move using Pmode and narrower modes. */
21419 if (max_size >= GET_MODE_SIZE (V4SImode))
21420 move_mode = V4SImode;
21421 else if (max_size >= GET_MODE_SIZE (DImode))
21422 move_mode = DImode;
21425 while (remainder_size >= GET_MODE_SIZE (move_mode))
21427 emit_strmov (destmem, srcmem, destptr, srcptr, move_mode, offset);
21428 offset += GET_MODE_SIZE (move_mode);
21429 remainder_size -= GET_MODE_SIZE (move_mode);
21432 /* Move the remaining part of epilogue - its size might be
21433 a size of the widest mode. */
21435 while (remainder_size >= GET_MODE_SIZE (move_mode))
21437 emit_strmov (destmem, srcmem, destptr, srcptr, move_mode, offset);
21438 offset += GET_MODE_SIZE (move_mode);
21439 remainder_size -= GET_MODE_SIZE (move_mode);
21442 if (remainder_size >= 4)
21444 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
21446 remainder_size -= 4;
21448 if (remainder_size >= 2)
21450 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
21452 remainder_size -= 2;
21454 if (remainder_size >= 1)
21456 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
21458 remainder_size -= 1;
21460 gcc_assert (remainder_size == 0);
21465 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
21466 count, 1, OPTAB_DIRECT);
21467 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
21468 count, QImode, 1, 4);
21472 /* When there are stringops, we can cheaply increase dest and src pointers.
21473 Otherwise we save code size by maintaining offset (zero is readily
21474 available from preceding rep operation) and using x86 addressing modes.
21476 if (TARGET_SINGLE_STRINGOP)
21480 rtx label = ix86_expand_aligntest (count, 4, true);
21481 src = change_address (srcmem, SImode, srcptr);
21482 dest = change_address (destmem, SImode, destptr);
21483 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21484 emit_label (label);
21485 LABEL_NUSES (label) = 1;
21489 rtx label = ix86_expand_aligntest (count, 2, true);
21490 src = change_address (srcmem, HImode, srcptr);
21491 dest = change_address (destmem, HImode, destptr);
21492 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21493 emit_label (label);
21494 LABEL_NUSES (label) = 1;
21498 rtx label = ix86_expand_aligntest (count, 1, true);
21499 src = change_address (srcmem, QImode, srcptr);
21500 dest = change_address (destmem, QImode, destptr);
21501 emit_insn (gen_strmov (destptr, dest, srcptr, src));
21502 emit_label (label);
21503 LABEL_NUSES (label) = 1;
21508 rtx offset = force_reg (Pmode, const0_rtx);
21513 rtx label = ix86_expand_aligntest (count, 4, true);
21514 src = change_address (srcmem, SImode, srcptr);
21515 dest = change_address (destmem, SImode, destptr);
21516 emit_move_insn (dest, src);
21517 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
21518 true, OPTAB_LIB_WIDEN);
21520 emit_move_insn (offset, tmp);
21521 emit_label (label);
21522 LABEL_NUSES (label) = 1;
21526 rtx label = ix86_expand_aligntest (count, 2, true);
21527 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
21528 src = change_address (srcmem, HImode, tmp);
21529 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
21530 dest = change_address (destmem, HImode, tmp);
21531 emit_move_insn (dest, src);
21532 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
21533 true, OPTAB_LIB_WIDEN);
21535 emit_move_insn (offset, tmp);
21536 emit_label (label);
21537 LABEL_NUSES (label) = 1;
21541 rtx label = ix86_expand_aligntest (count, 1, true);
21542 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
21543 src = change_address (srcmem, QImode, tmp);
21544 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
21545 dest = change_address (destmem, QImode, tmp);
21546 emit_move_insn (dest, src);
21547 emit_label (label);
21548 LABEL_NUSES (label) = 1;
21553 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21555 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
21556 rtx count, int max_size)
21559 expand_simple_binop (counter_mode (count), AND, count,
21560 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
21561 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
21562 gen_lowpart (QImode, value), count, QImode,
21566 /* Output code to set with VALUE at most (COUNT % MAX_SIZE) bytes starting from
21568 DESTMEM provides MEMrtx to feed proper aliasing info.
21569 PROMOTED_TO_GPR_VALUE is rtx representing a GPR containing broadcasted VALUE.
21570 PROMOTED_TO_VECTOR_VALUE is rtx representing a vector register containing
21572 PROMOTED_TO_GPR_VALUE and PROMOTED_TO_VECTOR_VALUE could be NULL if the
21573 promotion hasn't been generated before. */
21575 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx promoted_to_vector_value,
21576 rtx promoted_to_gpr_value, rtx value, rtx count,
21579 if (CONST_INT_P (count))
21581 HOST_WIDE_INT countval = INTVAL (count);
21584 int remainder_size = countval % max_size;
21585 enum machine_mode move_mode = Pmode;
21587 /* Firstly, try to move data with the widest possible mode.
21588 Remaining part we'll move using Pmode and narrower modes. */
21590 if (promoted_to_vector_value)
21591 while (remainder_size >= 16)
21593 if (GET_MODE (destmem) != move_mode)
21594 destmem = adjust_automodify_address_nv (destmem, move_mode,
21596 emit_strset (destmem, promoted_to_vector_value, destptr,
21597 move_mode, offset);
21600 remainder_size -= 16;
21603 /* Move the remaining part of epilogue - its size might be
21604 a size of the widest mode. */
21605 while (remainder_size >= GET_MODE_SIZE (Pmode))
21607 if (!promoted_to_gpr_value)
21608 promoted_to_gpr_value = promote_duplicated_reg (Pmode, value);
21609 emit_strset (destmem, promoted_to_gpr_value, destptr, Pmode, offset);
21610 offset += GET_MODE_SIZE (Pmode);
21611 remainder_size -= GET_MODE_SIZE (Pmode);
21614 if (!promoted_to_gpr_value && remainder_size > 1)
21615 promoted_to_gpr_value = promote_duplicated_reg (remainder_size >= 4
21616 ? SImode : HImode, value);
21617 if (remainder_size >= 4)
21619 emit_strset (destmem, gen_lowpart (SImode, promoted_to_gpr_value), destptr,
21622 remainder_size -= 4;
21624 if (remainder_size >= 2)
21626 emit_strset (destmem, gen_lowpart (HImode, promoted_to_gpr_value), destptr,
21629 remainder_size -= 2;
21631 if (remainder_size >= 1)
21633 emit_strset (destmem,
21634 promoted_to_gpr_value ? gen_lowpart (QImode, promoted_to_gpr_value) : value,
21638 remainder_size -= 1;
21640 gcc_assert (remainder_size == 0);
21644 /* count isn't const. */
21647 expand_setmem_epilogue_via_loop (destmem, destptr, value, count,
21652 if (!promoted_to_gpr_value)
21653 promoted_to_gpr_value = promote_duplicated_reg_to_size (value,
21654 GET_MODE_SIZE (Pmode),
21655 GET_MODE_SIZE (Pmode),
21656 GET_MODE_SIZE (Pmode));
21660 rtx label = ix86_expand_aligntest (count, 16, true);
21661 if (TARGET_SSE && promoted_to_vector_value)
21663 destmem = change_address (destmem,
21664 GET_MODE (promoted_to_vector_value),
21666 emit_insn (gen_strset (destptr, destmem, promoted_to_vector_value));
21668 else if (TARGET_64BIT)
21670 destmem = change_address (destmem, DImode, destptr);
21671 emit_insn (gen_strset (destptr, destmem, promoted_to_gpr_value));
21672 emit_insn (gen_strset (destptr, destmem, promoted_to_gpr_value));
21676 destmem = change_address (destmem, SImode, destptr);
21677 emit_insn (gen_strset (destptr, destmem, promoted_to_gpr_value));
21678 emit_insn (gen_strset (destptr, destmem, promoted_to_gpr_value));
21679 emit_insn (gen_strset (destptr, destmem, promoted_to_gpr_value));
21680 emit_insn (gen_strset (destptr, destmem, promoted_to_gpr_value));
21682 emit_label (label);
21683 LABEL_NUSES (label) = 1;
21687 rtx label = ix86_expand_aligntest (count, 8, true);
21690 destmem = change_address (destmem, DImode, destptr);
21691 emit_insn (gen_strset (destptr, destmem, promoted_to_gpr_value));
21693 /* FIXME: When this hunk it output, IRA classifies promoted_to_vector_value
21695 else if (TARGET_SSE && promoted_to_vector_value && 0)
21697 destmem = change_address (destmem, V2SImode, destptr);
21698 emit_insn (gen_strset (destptr, destmem,
21699 gen_lowpart (V2SImode, promoted_to_vector_value)));
21703 destmem = change_address (destmem, SImode, destptr);
21704 emit_insn (gen_strset (destptr, destmem, promoted_to_gpr_value));
21705 emit_insn (gen_strset (destptr, destmem, promoted_to_gpr_value));
21707 emit_label (label);
21708 LABEL_NUSES (label) = 1;
21712 rtx label = ix86_expand_aligntest (count, 4, true);
21713 destmem = change_address (destmem, SImode, destptr);
21714 emit_insn (gen_strset (destptr, destmem,
21715 gen_lowpart (SImode, promoted_to_gpr_value)));
21716 emit_label (label);
21717 LABEL_NUSES (label) = 1;
21721 rtx label = ix86_expand_aligntest (count, 2, true);
21722 destmem = change_address (destmem, HImode, destptr);
21723 emit_insn (gen_strset (destptr, destmem,
21724 gen_lowpart (HImode, promoted_to_gpr_value)));
21725 emit_label (label);
21726 LABEL_NUSES (label) = 1;
21730 rtx label = ix86_expand_aligntest (count, 1, true);
21731 destmem = change_address (destmem, QImode, destptr);
21732 emit_insn (gen_strset (destptr, destmem,
21733 gen_lowpart (QImode, promoted_to_gpr_value)));
21734 emit_label (label);
21735 LABEL_NUSES (label) = 1;
21739 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21740 DESIRED_ALIGNMENT. */
21742 expand_movmem_prologue (rtx destmem, rtx srcmem,
21743 rtx destptr, rtx srcptr, rtx count,
21744 int align, int desired_alignment)
21746 if (align <= 1 && desired_alignment > 1)
21748 rtx label = ix86_expand_aligntest (destptr, 1, false);
21749 srcmem = adjust_automodify_address_nv (srcmem, QImode, srcptr, 0);
21750 destmem = adjust_automodify_address_nv (destmem, QImode, destptr, 0);
21751 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21752 ix86_adjust_counter (count, 1);
21753 emit_label (label);
21754 LABEL_NUSES (label) = 1;
21756 if (align <= 2 && desired_alignment > 2)
21758 rtx label = ix86_expand_aligntest (destptr, 2, false);
21759 srcmem = adjust_automodify_address_nv (srcmem, HImode, srcptr, 0);
21760 destmem = adjust_automodify_address_nv (destmem, HImode, destptr, 0);
21761 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21762 ix86_adjust_counter (count, 2);
21763 emit_label (label);
21764 LABEL_NUSES (label) = 1;
21766 if (align <= 4 && desired_alignment > 4)
21768 rtx label = ix86_expand_aligntest (destptr, 4, false);
21769 srcmem = adjust_automodify_address_nv (srcmem, SImode, srcptr, 0);
21770 destmem = adjust_automodify_address_nv (destmem, SImode, destptr, 0);
21771 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21772 ix86_adjust_counter (count, 4);
21773 emit_label (label);
21774 LABEL_NUSES (label) = 1;
21776 if (align <= 8 && desired_alignment > 8)
21778 rtx label = ix86_expand_aligntest (destptr, 8, false);
21779 if (TARGET_64BIT || TARGET_SSE)
21781 srcmem = adjust_automodify_address_nv (srcmem, DImode, srcptr, 0);
21782 destmem = adjust_automodify_address_nv (destmem, DImode, destptr, 0);
21783 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21787 srcmem = adjust_automodify_address_nv (srcmem, SImode, srcptr, 0);
21788 destmem = adjust_automodify_address_nv (destmem, SImode, destptr, 0);
21789 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21790 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
21792 ix86_adjust_counter (count, 8);
21793 emit_label (label);
21794 LABEL_NUSES (label) = 1;
21796 gcc_assert (desired_alignment <= 16);
21799 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21800 ALIGN_BYTES is how many bytes need to be copied. */
21802 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
21803 int desired_align, int align_bytes)
21806 rtx orig_dst = dst;
21807 rtx orig_src = src;
21809 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
21810 if (src_align_bytes >= 0)
21811 src_align_bytes = desired_align - src_align_bytes;
21812 if (align_bytes & 1)
21814 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
21815 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
21817 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21819 if (align_bytes & 2)
21821 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
21822 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
21823 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
21824 set_mem_align (dst, 2 * BITS_PER_UNIT);
21825 if (src_align_bytes >= 0
21826 && (src_align_bytes & 1) == (align_bytes & 1)
21827 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
21828 set_mem_align (src, 2 * BITS_PER_UNIT);
21830 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21832 if (align_bytes & 4)
21834 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21835 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
21836 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
21837 set_mem_align (dst, 4 * BITS_PER_UNIT);
21838 if (src_align_bytes >= 0)
21840 unsigned int src_align = 0;
21841 if ((src_align_bytes & 3) == (align_bytes & 3))
21843 else if ((src_align_bytes & 1) == (align_bytes & 1))
21845 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21846 set_mem_align (src, src_align * BITS_PER_UNIT);
21849 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21851 if (align_bytes & 8)
21853 if (TARGET_64BIT || TARGET_SSE)
21855 dst = adjust_automodify_address_nv (dst, DImode, destreg, off);
21856 src = adjust_automodify_address_nv (src, DImode, srcreg, off);
21857 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21861 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21862 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
21863 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21864 emit_insn (gen_strmov (destreg, dst, srcreg, src));
21866 if (MEM_ALIGN (dst) < 8 * BITS_PER_UNIT)
21867 set_mem_align (dst, 8 * BITS_PER_UNIT);
21868 if (src_align_bytes >= 0)
21870 unsigned int src_align = 0;
21871 if ((src_align_bytes & 7) == (align_bytes & 7))
21873 else if ((src_align_bytes & 3) == (align_bytes & 3))
21875 else if ((src_align_bytes & 1) == (align_bytes & 1))
21877 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21878 set_mem_align (src, src_align * BITS_PER_UNIT);
21882 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
21883 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
21884 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
21885 set_mem_align (dst, desired_align * BITS_PER_UNIT);
21886 if (src_align_bytes >= 0)
21888 unsigned int src_align = 0;
21889 if ((src_align_bytes & 15) == (align_bytes & 15))
21891 else if ((src_align_bytes & 7) == (align_bytes & 7))
21893 else if ((src_align_bytes & 3) == (align_bytes & 3))
21895 else if ((src_align_bytes & 1) == (align_bytes & 1))
21897 if (src_align > (unsigned int) desired_align)
21898 src_align = desired_align;
21899 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
21900 set_mem_align (src, src_align * BITS_PER_UNIT);
21902 if (MEM_SIZE_KNOWN_P (orig_dst))
21903 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
21904 if (MEM_SIZE_KNOWN_P (orig_src))
21905 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
21910 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21911 DESIRED_ALIGNMENT. */
21913 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
21914 int align, int desired_alignment)
21916 if (align <= 1 && desired_alignment > 1)
21918 rtx label = ix86_expand_aligntest (destptr, 1, false);
21919 destmem = adjust_automodify_address_nv (destmem, QImode, destptr, 0);
21920 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
21921 ix86_adjust_counter (count, 1);
21922 emit_label (label);
21923 LABEL_NUSES (label) = 1;
21925 if (align <= 2 && desired_alignment > 2)
21927 rtx label = ix86_expand_aligntest (destptr, 2, false);
21928 destmem = adjust_automodify_address_nv (destmem, HImode, destptr, 0);
21929 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
21930 ix86_adjust_counter (count, 2);
21931 emit_label (label);
21932 LABEL_NUSES (label) = 1;
21934 if (align <= 4 && desired_alignment > 4)
21936 rtx label = ix86_expand_aligntest (destptr, 4, false);
21937 destmem = adjust_automodify_address_nv (destmem, SImode, destptr, 0);
21938 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
21939 ix86_adjust_counter (count, 4);
21940 emit_label (label);
21941 LABEL_NUSES (label) = 1;
21943 if (align <= 8 && desired_alignment > 8)
21945 rtx label = ix86_expand_aligntest (destptr, 8, false);
21946 destmem = adjust_automodify_address_nv (destmem, SImode, destptr, 0);
21947 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
21948 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
21949 ix86_adjust_counter (count, 8);
21950 emit_label (label);
21951 LABEL_NUSES (label) = 1;
21953 gcc_assert (desired_alignment <= 16);
21956 /* Set enough from DST to align DST known to by aligned by ALIGN to
21957 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
21959 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
21960 int desired_align, int align_bytes)
21963 rtx orig_dst = dst;
21964 if (align_bytes & 1)
21966 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
21968 emit_insn (gen_strset (destreg, dst,
21969 gen_lowpart (QImode, value)));
21971 if (align_bytes & 2)
21973 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
21974 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
21975 set_mem_align (dst, 2 * BITS_PER_UNIT);
21977 emit_insn (gen_strset (destreg, dst,
21978 gen_lowpart (HImode, value)));
21980 if (align_bytes & 4)
21982 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21983 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
21984 set_mem_align (dst, 4 * BITS_PER_UNIT);
21986 emit_insn (gen_strset (destreg, dst,
21987 gen_lowpart (SImode, value)));
21989 if (align_bytes & 8)
21991 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21992 emit_insn (gen_strset (destreg, dst,
21993 gen_lowpart (SImode, value)));
21995 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
21996 emit_insn (gen_strset (destreg, dst,
21997 gen_lowpart (SImode, value)));
21998 if (MEM_ALIGN (dst) < 8 * BITS_PER_UNIT)
21999 set_mem_align (dst, 8 * BITS_PER_UNIT);
22002 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
22003 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
22004 set_mem_align (dst, desired_align * BITS_PER_UNIT);
22005 if (MEM_SIZE_KNOWN_P (orig_dst))
22006 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
22010 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22011 static enum stringop_alg
22012 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
22013 int *dynamic_check, bool align_unknown)
22015 const struct stringop_algs * algs;
22016 bool optimize_for_speed;
22017 /* Algorithms using the rep prefix want at least edi and ecx;
22018 additionally, memset wants eax and memcpy wants esi. Don't
22019 consider such algorithms if the user has appropriated those
22020 registers for their own purposes. */
22021 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
22023 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
22025 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22026 || (alg != rep_prefix_1_byte \
22027 && alg != rep_prefix_4_byte \
22028 && alg != rep_prefix_8_byte))
22029 const struct processor_costs *cost;
22031 /* Even if the string operation call is cold, we still might spend a lot
22032 of time processing large blocks. */
22033 if (optimize_function_for_size_p (cfun)
22034 || (optimize_insn_for_size_p ()
22035 && expected_size != -1 && expected_size < 256))
22036 optimize_for_speed = false;
22038 optimize_for_speed = true;
22040 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
22042 *dynamic_check = -1;
22044 algs = &cost->memset[align_unknown][TARGET_64BIT != 0];
22046 algs = &cost->memcpy[align_unknown][TARGET_64BIT != 0];
22047 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
22048 return ix86_stringop_alg;
22049 /* rep; movq or rep; movl is the smallest variant. */
22050 else if (!optimize_for_speed)
22052 if (!count || (count & 3))
22053 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
22055 return rep_prefix_usable ? rep_prefix_4_byte : loop;
22057 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22059 else if (expected_size != -1 && expected_size < 4)
22060 return loop_1_byte;
22061 else if (expected_size != -1)
22064 enum stringop_alg alg = libcall;
22065 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
22067 /* We get here if the algorithms that were not libcall-based
22068 were rep-prefix based and we are unable to use rep prefixes
22069 based on global register usage. Break out of the loop and
22070 use the heuristic below. */
22071 if (algs->size[i].max == 0)
22073 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
22075 enum stringop_alg candidate = algs->size[i].alg;
22077 if (candidate != libcall && ALG_USABLE_P (candidate))
22079 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22080 last non-libcall inline algorithm. */
22081 if (TARGET_INLINE_ALL_STRINGOPS)
22083 /* When the current size is best to be copied by a libcall,
22084 but we are still forced to inline, run the heuristic below
22085 that will pick code for medium sized blocks. */
22086 if (alg != libcall)
22090 else if (ALG_USABLE_P (candidate))
22094 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
22096 /* When asked to inline the call anyway, try to pick meaningful choice.
22097 We look for maximal size of block that is faster to copy by hand and
22098 take blocks of at most of that size guessing that average size will
22099 be roughly half of the block.
22101 If this turns out to be bad, we might simply specify the preferred
22102 choice in ix86_costs. */
22103 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22104 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
22107 enum stringop_alg alg;
22109 bool any_alg_usable_p = true;
22110 bool only_libcall_fits = true;
22112 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
22114 enum stringop_alg candidate = algs->size[i].alg;
22115 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
22117 if (candidate != libcall && candidate
22118 && ALG_USABLE_P (candidate))
22120 max = algs->size[i].max;
22121 only_libcall_fits = false;
22124 /* If there aren't any usable algorithms, then recursing on
22125 smaller sizes isn't going to find anything. Just return the
22126 simple byte-at-a-time copy loop. */
22127 if (!any_alg_usable_p || only_libcall_fits)
22129 /* Pick something reasonable. */
22130 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22131 *dynamic_check = 128;
22132 return loop_1_byte;
22136 alg = decide_alg (count, max / 2, memset, dynamic_check, align_unknown);
22137 gcc_assert (*dynamic_check == -1);
22138 gcc_assert (alg != libcall);
22139 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22140 *dynamic_check = max;
22143 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
22144 #undef ALG_USABLE_P
22147 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22148 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22150 decide_alignment (int align,
22151 enum stringop_alg alg,
22154 int desired_align = 0;
22158 gcc_unreachable ();
22160 desired_align = GET_MODE_SIZE (Pmode);
22162 case unrolled_loop:
22163 desired_align = GET_MODE_SIZE (Pmode);
22166 desired_align = 16;
22168 case rep_prefix_8_byte:
22171 case rep_prefix_4_byte:
22172 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22173 copying whole cacheline at once. */
22174 if (TARGET_PENTIUMPRO)
22179 case rep_prefix_1_byte:
22180 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22181 copying whole cacheline at once. */
22182 if (TARGET_PENTIUMPRO)
22196 if (desired_align < align)
22197 desired_align = align;
22198 if (expected_size != -1 && expected_size < 4)
22199 desired_align = align;
22200 return desired_align;
22203 /* Return the smallest power of 2 greater than VAL. */
22205 smallest_pow2_greater_than (int val)
22213 /* Expand string move (memcpy) operation. Use i386 string operations
22214 when profitable. expand_setmem contains similar code. The code
22215 depends upon architecture, block size and alignment, but always has
22216 the same overall structure:
22218 1) Prologue guard: Conditional that jumps up to epilogues for small
22219 blocks that can be handled by epilogue alone. This is faster
22220 but also needed for correctness, since prologue assume the block
22221 is larger than the desired alignment.
22223 Optional dynamic check for size and libcall for large
22224 blocks is emitted here too, with -minline-stringops-dynamically.
22226 2) Prologue: copy first few bytes in order to get destination
22227 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22228 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22229 copied. We emit either a jump tree on power of two sized
22230 blocks, or a byte loop.
22232 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22233 with specified algorithm.
22235 4) Epilogue: code copying tail of the block that is too small to be
22236 handled by main body (or up to size guarded by prologue guard). */
22239 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
22240 rtx expected_align_exp, rtx expected_size_exp)
22246 rtx jump_around_label = NULL;
22247 HOST_WIDE_INT align = 1;
22248 unsigned HOST_WIDE_INT count = 0;
22249 HOST_WIDE_INT expected_size = -1;
22250 int size_needed = 0, epilogue_size_needed;
22251 int desired_align = 0, align_bytes = 0;
22252 enum stringop_alg alg;
22254 bool need_zero_guard = false;
22255 bool align_unknown;
22257 enum machine_mode move_mode;
22258 rtx loop_iter = NULL_RTX;
22259 int dst_offset, src_offset;
22261 if (CONST_INT_P (align_exp))
22262 align = INTVAL (align_exp);
22263 /* i386 can do misaligned access on reasonably increased cost. */
22264 if (CONST_INT_P (expected_align_exp)
22265 && INTVAL (expected_align_exp) > align)
22266 align = INTVAL (expected_align_exp);
22267 /* ALIGN is the minimum of destination and source alignment, but we care here
22268 just about destination alignment. */
22269 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
22270 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
22272 if (CONST_INT_P (count_exp))
22273 count = expected_size = INTVAL (count_exp);
22274 if (CONST_INT_P (expected_size_exp) && count == 0)
22275 expected_size = INTVAL (expected_size_exp);
22277 /* Make sure we don't need to care about overflow later on. */
22278 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
22281 /* Step 0: Decide on preferred algorithm, desired alignment and
22282 size of chunks to be copied by main loop. */
22283 dst_offset = get_mem_align_offset (dst, MOVE_MAX*BITS_PER_UNIT);
22284 src_offset = get_mem_align_offset (src, MOVE_MAX*BITS_PER_UNIT);
22285 align_unknown = (dst_offset < 0
22287 || src_offset != dst_offset);
22288 alg = decide_alg (count, expected_size, false, &dynamic_check, align_unknown);
22289 desired_align = decide_alignment (align, alg, expected_size);
22291 desired_align = align;
22295 if (!TARGET_ALIGN_STRINGOPS)
22296 align = desired_align;
22298 if (alg == libcall)
22300 gcc_assert (alg != no_stringop);
22302 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
22303 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
22304 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
22309 gcc_unreachable ();
22311 need_zero_guard = true;
22314 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
22316 case unrolled_loop:
22317 need_zero_guard = true;
22319 unroll_factor = TARGET_64BIT ? 4 : 2;
22320 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
22323 need_zero_guard = true;
22324 /* Use SSE instructions, if possible. */
22325 move_mode = align_unknown ? DImode : V4SImode;
22326 unroll_factor = TARGET_64BIT ? 4 : 2;
22327 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
22329 case rep_prefix_8_byte:
22332 case rep_prefix_4_byte:
22335 case rep_prefix_1_byte:
22339 need_zero_guard = true;
22344 epilogue_size_needed = size_needed;
22346 /* Step 1: Prologue guard. */
22348 /* Alignment code needs count to be in register. */
22349 if (CONST_INT_P (count_exp) && desired_align > align)
22351 if (INTVAL (count_exp) > desired_align
22352 && INTVAL (count_exp) > size_needed)
22355 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
22356 if (align_bytes <= 0)
22359 align_bytes = desired_align - align_bytes;
22361 if (align_bytes == 0)
22362 count_exp = force_reg (counter_mode (count_exp), count_exp);
22364 gcc_assert (desired_align >= 1 && align >= 1);
22366 /* Ensure that alignment prologue won't copy past end of block. */
22367 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
22369 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
22370 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22371 Make sure it is power of 2. */
22372 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
22376 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
22378 /* If main algorithm works on QImode, no epilogue is needed.
22379 For small sizes just don't align anything. */
22380 if (size_needed == 1)
22381 desired_align = align;
22388 /* SSE and unrolled algs re-use iteration counter in the epilogue. */
22389 if (alg == sse_loop || alg == unrolled_loop)
22391 loop_iter = gen_reg_rtx (counter_mode (count_exp));
22392 emit_move_insn (loop_iter, const0_rtx);
22394 label = gen_label_rtx ();
22395 emit_cmp_and_jump_insns (count_exp,
22396 GEN_INT (epilogue_size_needed),
22397 LTU, 0, counter_mode (count_exp), 1, label);
22398 if (expected_size == -1 || expected_size < epilogue_size_needed)
22399 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22401 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22405 /* Emit code to decide on runtime whether library call or inline should be
22407 if (dynamic_check != -1)
22409 if (CONST_INT_P (count_exp))
22411 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
22413 emit_block_move_via_libcall (dst, src, count_exp, false);
22414 count_exp = const0_rtx;
22420 rtx hot_label = gen_label_rtx ();
22421 jump_around_label = gen_label_rtx ();
22422 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
22423 LEU, 0, GET_MODE (count_exp), 1, hot_label);
22424 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22425 emit_block_move_via_libcall (dst, src, count_exp, false);
22426 emit_jump (jump_around_label);
22427 emit_label (hot_label);
22431 /* Step 2: Alignment prologue. */
22433 if (desired_align > align)
22435 if (align_bytes == 0)
22437 /* Except for the first move in epilogue, we no longer know
22438 constant offset in aliasing info. It don't seems to worth
22439 the pain to maintain it for the first move, so throw away
22441 src = change_address (src, BLKmode, srcreg);
22442 dst = change_address (dst, BLKmode, destreg);
22443 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
22445 set_mem_align (src, desired_align*BITS_PER_UNIT);
22446 set_mem_align (dst, desired_align*BITS_PER_UNIT);
22450 /* If we know how many bytes need to be stored before dst is
22451 sufficiently aligned, maintain aliasing info accurately. */
22452 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
22453 desired_align, align_bytes);
22454 count_exp = plus_constant (count_exp, -align_bytes);
22455 count -= align_bytes;
22457 if (need_zero_guard
22458 && (count < (unsigned HOST_WIDE_INT) size_needed
22459 || (align_bytes == 0
22460 && count < ((unsigned HOST_WIDE_INT) size_needed
22461 + desired_align - align))))
22463 /* It is possible that we copied enough so the main loop will not
22465 gcc_assert (size_needed > 1);
22466 if (label == NULL_RTX)
22467 label = gen_label_rtx ();
22468 emit_cmp_and_jump_insns (count_exp,
22469 GEN_INT (size_needed),
22470 LTU, 0, counter_mode (count_exp), 1, label);
22471 if (expected_size == -1
22472 || expected_size < (desired_align - align) / 2 + size_needed)
22473 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22475 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22478 if (label && size_needed == 1)
22480 emit_label (label);
22481 LABEL_NUSES (label) = 1;
22483 epilogue_size_needed = 1;
22485 else if (label == NULL_RTX)
22486 epilogue_size_needed = size_needed;
22488 /* Step 3: Main loop. */
22494 gcc_unreachable ();
22496 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22497 count_exp, QImode, 1, expected_size);
22500 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
22501 count_exp, Pmode, 1, expected_size);
22504 case unrolled_loop:
22505 /* In some cases we want to use the same iterator in several adjacent
22506 loops, so here we save loop iterator rtx and don't update addresses. */
22507 loop_iter = expand_set_or_movmem_via_loop_with_iter (dst, src, destreg,
22509 count_exp, loop_iter,
22512 expected_size, false);
22514 case rep_prefix_8_byte:
22515 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22518 case rep_prefix_4_byte:
22519 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22522 case rep_prefix_1_byte:
22523 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
22527 /* Adjust properly the offset of src and dest memory for aliasing. */
22528 if (CONST_INT_P (count_exp))
22530 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
22531 (count / size_needed) * size_needed);
22532 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
22533 (count / size_needed) * size_needed);
22537 src = change_address (src, BLKmode, srcreg);
22538 dst = change_address (dst, BLKmode, destreg);
22541 /* Step 4: Epilogue to copy the remaining bytes. */
22545 /* When the main loop is done, COUNT_EXP might hold original count,
22546 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22547 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22548 bytes. Compensate if needed. */
22550 if (size_needed < epilogue_size_needed)
22553 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
22554 GEN_INT (size_needed - 1), count_exp, 1,
22556 if (tmp != count_exp)
22557 emit_move_insn (count_exp, tmp);
22559 emit_label (label);
22560 LABEL_NUSES (label) = 1;
22563 /* We haven't updated addresses, so we'll do it now.
22564 Also, if the epilogue seems to be big, we'll generate a loop (not
22565 unrolled) in it. We'll do it only if alignment is unknown, because in
22566 this case in epilogue we have to perform memmove by bytes, which is very
22568 if (alg == sse_loop || alg == unrolled_loop)
22571 if (align_unknown && unroll_factor > 1)
22573 /* Reduce epilogue's size by creating not-unrolled loop. If we won't
22574 do this, we can have very big epilogue - when alignment is statically
22575 unknown we'll have the epilogue byte by byte which may be very slow. */
22576 loop_iter = expand_set_or_movmem_via_loop_with_iter (dst, src, destreg,
22577 srcreg, NULL, count_exp,
22578 loop_iter, move_mode, 1,
22579 expected_size, false);
22580 src = change_address (src, BLKmode, srcreg);
22581 dst = change_address (dst, BLKmode, destreg);
22582 epilogue_size_needed = GET_MODE_SIZE (move_mode);
22584 tmp = expand_simple_binop (Pmode, PLUS, destreg, loop_iter, destreg,
22585 true, OPTAB_LIB_WIDEN);
22586 if (tmp != destreg)
22587 emit_move_insn (destreg, tmp);
22589 tmp = expand_simple_binop (Pmode, PLUS, srcreg, loop_iter, srcreg,
22590 true, OPTAB_LIB_WIDEN);
22592 emit_move_insn (srcreg, tmp);
22594 if (count_exp != const0_rtx && epilogue_size_needed > 1)
22595 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
22596 epilogue_size_needed);
22598 if (jump_around_label)
22599 emit_label (jump_around_label);
22603 /* Helper function for memcpy. For QImode value 0xXY produce
22604 0xXYXYXYXY of wide specified by MODE. This is essentially
22605 a * 0x10101010, but we can do slightly better than
22606 synth_mult by unwinding the sequence by hand on CPUs with
22609 promote_duplicated_reg (enum machine_mode mode, rtx val)
22611 enum machine_mode valmode = GET_MODE (val);
22613 int nops = mode == DImode ? 3 : 2;
22615 if (VECTOR_MODE_P (mode))
22617 enum machine_mode inner = GET_MODE_INNER (mode);
22618 rtx promoted_val, vec_reg;
22619 if (CONST_INT_P (val))
22620 return ix86_build_const_vector (mode, true, val);
22622 promoted_val = promote_duplicated_reg (inner, val);
22623 vec_reg = gen_reg_rtx (mode);
22627 emit_insn (gen_vec_dupv2di (vec_reg, promoted_val));
22630 emit_insn (gen_vec_dupv4si (vec_reg, promoted_val));
22633 gcc_unreachable ();
22639 gcc_assert (mode == SImode || mode == DImode);
22640 if (mode == DImode && !TARGET_64BIT)
22642 rtx vec_reg = promote_duplicated_reg (V4SImode, val);
22643 vec_reg = convert_to_mode (V2DImode, vec_reg, 1);
22646 if (val == const0_rtx)
22647 return copy_to_mode_reg (mode, const0_rtx);
22648 if (CONST_INT_P (val))
22650 HOST_WIDE_INT v = INTVAL (val) & 255;
22654 if (mode == DImode)
22655 v |= (v << 16) << 16;
22656 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
22659 if (valmode == VOIDmode)
22661 if (valmode != QImode)
22662 val = gen_lowpart (QImode, val);
22663 if (mode == QImode)
22665 if (!TARGET_PARTIAL_REG_STALL)
22667 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
22668 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
22669 <= (ix86_cost->shift_const + ix86_cost->add) * nops
22670 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
22672 rtx reg = convert_modes (mode, QImode, val, true);
22673 tmp = promote_duplicated_reg (mode, const1_rtx);
22674 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
22679 rtx reg = convert_modes (mode, QImode, val, true);
22681 if (!TARGET_PARTIAL_REG_STALL)
22682 if (mode == SImode)
22683 emit_insn (gen_movsi_insv_1 (reg, reg));
22685 emit_insn (gen_movdi_insv_1 (reg, reg));
22688 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
22689 NULL, 1, OPTAB_DIRECT);
22691 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22693 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
22694 NULL, 1, OPTAB_DIRECT);
22695 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22696 if (mode == SImode)
22698 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
22699 NULL, 1, OPTAB_DIRECT);
22700 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
22705 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22706 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22707 alignment from ALIGN to DESIRED_ALIGN. */
22709 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
22711 rtx promoted_val = NULL_RTX;
22713 if (size_needed > 8 || (desired_align > align && desired_align > 8))
22715 /* We want to promote to vector register, so we expect that at least SSE
22717 gcc_assert (TARGET_SSE);
22719 /* In case of promotion to vector register, we expect that val is a
22720 constant or already promoted to GPR value. */
22721 gcc_assert (GET_MODE (val) == Pmode || CONSTANT_P (val));
22723 promoted_val = promote_duplicated_reg (V2DImode, val);
22725 promoted_val = promote_duplicated_reg (V4SImode, val);
22727 else if (size_needed > 4 || (desired_align > align && desired_align > 4))
22729 gcc_assert (TARGET_64BIT);
22730 promoted_val = promote_duplicated_reg (DImode, val);
22732 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
22733 promoted_val = promote_duplicated_reg (SImode, val);
22734 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
22735 promoted_val = promote_duplicated_reg (HImode, val);
22737 promoted_val = val;
22739 return promoted_val;
22742 /* Expand string clear operation (bzero). Use i386 string operations when
22743 profitable. See expand_movmem comment for explanation of individual
22744 steps performed. */
22746 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
22747 rtx expected_align_exp, rtx expected_size_exp)
22752 rtx jump_around_label = NULL;
22753 HOST_WIDE_INT align = 1;
22754 unsigned HOST_WIDE_INT count = 0;
22755 HOST_WIDE_INT expected_size = -1;
22756 int size_needed = 0, epilogue_size_needed;
22757 int desired_align = 0, align_bytes = 0;
22758 enum stringop_alg alg;
22759 rtx gpr_promoted_val = NULL;
22760 rtx vec_promoted_val = NULL;
22762 bool need_zero_guard = false;
22763 bool align_unknown;
22764 unsigned int unroll_factor;
22765 enum machine_mode move_mode;
22766 rtx loop_iter = NULL_RTX;
22768 if (CONST_INT_P (align_exp))
22769 align = INTVAL (align_exp);
22770 /* i386 can do misaligned access on reasonably increased cost. */
22771 if (CONST_INT_P (expected_align_exp)
22772 && INTVAL (expected_align_exp) > align)
22773 align = INTVAL (expected_align_exp);
22774 if (CONST_INT_P (count_exp))
22775 count = expected_size = INTVAL (count_exp);
22776 if (CONST_INT_P (expected_size_exp) && count == 0)
22777 expected_size = INTVAL (expected_size_exp);
22779 /* Make sure we don't need to care about overflow later on. */
22780 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
22783 /* Step 0: Decide on preferred algorithm, desired alignment and
22784 size of chunks to be copied by main loop. */
22786 align_unknown = CONST_INT_P (align_exp) && INTVAL (align_exp) > 0;
22787 alg = decide_alg (count, expected_size, true, &dynamic_check, align_unknown);
22788 desired_align = decide_alignment (align, alg, expected_size);
22792 if (!TARGET_ALIGN_STRINGOPS)
22793 align = desired_align;
22795 if (alg == libcall)
22797 gcc_assert (alg != no_stringop);
22799 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
22800 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
22805 gcc_unreachable ();
22807 need_zero_guard = true;
22809 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
22811 case unrolled_loop:
22812 need_zero_guard = true;
22815 /* Select maximal available 1,2 or 4 unroll factor. */
22816 while (GET_MODE_SIZE (move_mode) * unroll_factor * 2 < count
22817 && unroll_factor < 4)
22818 unroll_factor *= 2;
22819 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
22822 need_zero_guard = true;
22823 move_mode = TARGET_64BIT ? V2DImode : V4SImode;
22825 /* Select maximal available 1,2 or 4 unroll factor. */
22826 while (GET_MODE_SIZE (move_mode) * unroll_factor * 2 < count
22827 && unroll_factor < 4)
22828 unroll_factor *= 2;
22829 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
22831 case rep_prefix_8_byte:
22834 case rep_prefix_4_byte:
22837 case rep_prefix_1_byte:
22841 need_zero_guard = true;
22845 epilogue_size_needed = size_needed;
22847 /* Step 1: Prologue guard. */
22849 /* Alignment code needs count to be in register. */
22850 if (CONST_INT_P (count_exp) && desired_align > align)
22852 if (INTVAL (count_exp) > desired_align
22853 && INTVAL (count_exp) > size_needed)
22856 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
22857 if (align_bytes <= 0)
22860 align_bytes = desired_align - align_bytes;
22862 if (align_bytes == 0)
22864 enum machine_mode mode = SImode;
22865 if (TARGET_64BIT && (count & ~0xffffffff))
22867 count_exp = force_reg (mode, count_exp);
22870 /* Do the cheap promotion to allow better CSE across the
22871 main loop and epilogue (ie one load of the big constant in the
22872 front of all code. */
22873 if (CONST_INT_P (val_exp))
22874 gpr_promoted_val = promote_duplicated_reg_to_size (val_exp,
22875 GET_MODE_SIZE (Pmode),
22876 GET_MODE_SIZE (Pmode),
22878 /* Ensure that alignment prologue won't copy past end of block. */
22879 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
22881 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
22882 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22883 Make sure it is power of 2. */
22884 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
22888 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
22890 /* If main algorithm works on QImode, no epilogue is needed.
22891 For small sizes just don't align anything. */
22892 if (size_needed == 1)
22893 desired_align = align;
22900 /* SSE and unrolled_lopo algs re-use iteration counter in the epilogue. */
22901 if (alg == sse_loop || alg == unrolled_loop)
22903 loop_iter = gen_reg_rtx (counter_mode (count_exp));
22904 emit_move_insn (loop_iter, const0_rtx);
22906 label = gen_label_rtx ();
22907 emit_cmp_and_jump_insns (count_exp,
22908 GEN_INT (epilogue_size_needed),
22909 LTU, 0, counter_mode (count_exp), 1, label);
22910 if (expected_size == -1 || expected_size <= epilogue_size_needed)
22911 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22913 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22916 if (dynamic_check != -1)
22918 rtx hot_label = gen_label_rtx ();
22919 jump_around_label = gen_label_rtx ();
22920 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
22921 LEU, 0, counter_mode (count_exp), 1, hot_label);
22922 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22923 set_storage_via_libcall (dst, count_exp, val_exp, false);
22924 emit_jump (jump_around_label);
22925 emit_label (hot_label);
22928 /* Step 2: Alignment prologue. */
22930 /* Do the expensive promotion once we branched off the small blocks. */
22931 if (!gpr_promoted_val)
22932 gpr_promoted_val = promote_duplicated_reg_to_size (val_exp,
22933 GET_MODE_SIZE (Pmode),
22934 GET_MODE_SIZE (Pmode),
22936 gcc_assert (desired_align >= 1 && align >= 1);
22938 if (desired_align > align)
22940 if (align_bytes == 0)
22942 /* Except for the first move in epilogue, we no longer know
22943 constant offset in aliasing info. It don't seems to worth
22944 the pain to maintain it for the first move, so throw away
22946 dst = change_address (dst, BLKmode, destreg);
22947 expand_setmem_prologue (dst, destreg, gpr_promoted_val, count_exp, align,
22949 set_mem_align (dst, desired_align*BITS_PER_UNIT);
22953 /* If we know how many bytes need to be stored before dst is
22954 sufficiently aligned, maintain aliasing info accurately. */
22955 dst = expand_constant_setmem_prologue (dst, destreg, gpr_promoted_val,
22956 desired_align, align_bytes);
22957 count_exp = plus_constant (count_exp, -align_bytes);
22958 count -= align_bytes;
22959 if (count < (unsigned HOST_WIDE_INT) size_needed)
22962 if (need_zero_guard
22963 && (count < (unsigned HOST_WIDE_INT) size_needed
22964 || (align_bytes == 0
22965 && count < ((unsigned HOST_WIDE_INT) size_needed
22966 + desired_align - align))))
22968 /* It is possible that we copied enough so the main loop will not
22970 gcc_assert (size_needed > 1);
22971 if (label == NULL_RTX)
22972 label = gen_label_rtx ();
22973 emit_cmp_and_jump_insns (count_exp,
22974 GEN_INT (size_needed),
22975 LTU, 0, counter_mode (count_exp), 1, label);
22976 if (expected_size == -1
22977 || expected_size < (desired_align - align) / 2 + size_needed)
22978 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22980 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22983 if (label && size_needed == 1)
22985 emit_label (label);
22986 LABEL_NUSES (label) = 1;
22988 gpr_promoted_val = val_exp;
22989 epilogue_size_needed = 1;
22991 else if (label == NULL_RTX)
22992 epilogue_size_needed = size_needed;
22994 /* Step 3: Main loop. */
23000 gcc_unreachable ();
23002 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, val_exp,
23003 count_exp, QImode, 1, expected_size);
23006 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, gpr_promoted_val,
23007 count_exp, Pmode, 1, expected_size);
23009 case unrolled_loop:
23010 loop_iter = expand_set_or_movmem_via_loop_with_iter (dst, NULL, destreg,
23011 NULL, gpr_promoted_val, count_exp,
23012 loop_iter, move_mode, unroll_factor,
23013 expected_size, false);
23017 promote_duplicated_reg_to_size (gpr_promoted_val,
23018 GET_MODE_SIZE (move_mode),
23019 desired_align, align);
23020 loop_iter = expand_set_or_movmem_via_loop_with_iter (dst, NULL, destreg,
23021 NULL, vec_promoted_val, count_exp,
23022 loop_iter, move_mode, unroll_factor,
23023 expected_size, false);
23025 case rep_prefix_8_byte:
23026 gcc_assert (TARGET_64BIT);
23027 expand_setmem_via_rep_stos (dst, destreg, gpr_promoted_val, count_exp,
23030 case rep_prefix_4_byte:
23031 expand_setmem_via_rep_stos (dst, destreg, gpr_promoted_val, count_exp,
23034 case rep_prefix_1_byte:
23035 expand_setmem_via_rep_stos (dst, destreg, gpr_promoted_val, count_exp,
23039 /* Adjust properly the offset of src and dest memory for aliasing. */
23040 if (CONST_INT_P (count_exp))
23041 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
23042 (count / size_needed) * size_needed);
23044 dst = change_address (dst, BLKmode, destreg);
23046 /* Step 4: Epilogue to copy the remaining bytes. */
23050 /* When the main loop is done, COUNT_EXP might hold original count,
23051 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23052 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23053 bytes. Compensate if needed. */
23055 if (size_needed < epilogue_size_needed)
23058 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
23059 GEN_INT (size_needed - 1), count_exp, 1,
23061 if (tmp != count_exp)
23062 emit_move_insn (count_exp, tmp);
23064 emit_label (label);
23065 LABEL_NUSES (label) = 1;
23066 /* We can not rely on fact that promoved value is known. */
23067 vec_promoted_val = 0;
23068 gpr_promoted_val = 0;
23071 if (alg == unrolled_loop || alg == sse_loop)
23074 if (align_unknown && unroll_factor > 1
23075 && epilogue_size_needed >= GET_MODE_SIZE (move_mode)
23076 && vec_promoted_val)
23078 /* Reduce epilogue's size by creating not-unrolled loop. If we won't
23079 do this, we can have very big epilogue - when alignment is statically
23080 unknown we'll have the epilogue byte by byte which may be very slow. */
23081 loop_iter = expand_set_or_movmem_via_loop_with_iter (dst, NULL, destreg,
23082 NULL, vec_promoted_val, count_exp,
23083 loop_iter, move_mode, 1,
23084 expected_size, false);
23085 dst = change_address (dst, BLKmode, destreg);
23086 epilogue_size_needed = GET_MODE_SIZE (move_mode);
23088 tmp = expand_simple_binop (Pmode, PLUS, destreg, loop_iter, destreg,
23089 true, OPTAB_LIB_WIDEN);
23090 if (tmp != destreg)
23091 emit_move_insn (destreg, tmp);
23093 if (count_exp == const0_rtx)
23095 else if (!gpr_promoted_val && epilogue_size_needed > 1)
23096 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
23097 epilogue_size_needed);
23100 if (epilogue_size_needed > 1)
23101 expand_setmem_epilogue (dst, destreg, vec_promoted_val, gpr_promoted_val,
23102 val_exp, count_exp, epilogue_size_needed);
23104 if (jump_around_label)
23105 emit_label (jump_around_label);
23109 /* Expand the appropriate insns for doing strlen if not just doing
23112 out = result, initialized with the start address
23113 align_rtx = alignment of the address.
23114 scratch = scratch register, initialized with the startaddress when
23115 not aligned, otherwise undefined
23117 This is just the body. It needs the initializations mentioned above and
23118 some address computing at the end. These things are done in i386.md. */
23121 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
23125 rtx align_2_label = NULL_RTX;
23126 rtx align_3_label = NULL_RTX;
23127 rtx align_4_label = gen_label_rtx ();
23128 rtx end_0_label = gen_label_rtx ();
23130 rtx tmpreg = gen_reg_rtx (SImode);
23131 rtx scratch = gen_reg_rtx (SImode);
23135 if (CONST_INT_P (align_rtx))
23136 align = INTVAL (align_rtx);
23138 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23140 /* Is there a known alignment and is it less than 4? */
23143 rtx scratch1 = gen_reg_rtx (Pmode);
23144 emit_move_insn (scratch1, out);
23145 /* Is there a known alignment and is it not 2? */
23148 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
23149 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
23151 /* Leave just the 3 lower bits. */
23152 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
23153 NULL_RTX, 0, OPTAB_WIDEN);
23155 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
23156 Pmode, 1, align_4_label);
23157 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
23158 Pmode, 1, align_2_label);
23159 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
23160 Pmode, 1, align_3_label);
23164 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23165 check if is aligned to 4 - byte. */
23167 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
23168 NULL_RTX, 0, OPTAB_WIDEN);
23170 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
23171 Pmode, 1, align_4_label);
23174 mem = change_address (src, QImode, out);
23176 /* Now compare the bytes. */
23178 /* Compare the first n unaligned byte on a byte per byte basis. */
23179 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
23180 QImode, 1, end_0_label);
23182 /* Increment the address. */
23183 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
23185 /* Not needed with an alignment of 2 */
23188 emit_label (align_2_label);
23190 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
23193 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
23195 emit_label (align_3_label);
23198 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
23201 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
23204 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23205 align this loop. It gives only huge programs, but does not help to
23207 emit_label (align_4_label);
23209 mem = change_address (src, SImode, out);
23210 emit_move_insn (scratch, mem);
23211 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
23213 /* This formula yields a nonzero result iff one of the bytes is zero.
23214 This saves three branches inside loop and many cycles. */
23216 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
23217 emit_insn (gen_one_cmplsi2 (scratch, scratch));
23218 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
23219 emit_insn (gen_andsi3 (tmpreg, tmpreg,
23220 gen_int_mode (0x80808080, SImode)));
23221 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
23226 rtx reg = gen_reg_rtx (SImode);
23227 rtx reg2 = gen_reg_rtx (Pmode);
23228 emit_move_insn (reg, tmpreg);
23229 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
23231 /* If zero is not in the first two bytes, move two bytes forward. */
23232 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
23233 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
23234 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
23235 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
23236 gen_rtx_IF_THEN_ELSE (SImode, tmp,
23239 /* Emit lea manually to avoid clobbering of flags. */
23240 emit_insn (gen_rtx_SET (SImode, reg2,
23241 gen_rtx_PLUS (Pmode, out, const2_rtx)));
23243 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
23244 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
23245 emit_insn (gen_rtx_SET (VOIDmode, out,
23246 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
23252 rtx end_2_label = gen_label_rtx ();
23253 /* Is zero in the first two bytes? */
23255 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
23256 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
23257 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
23258 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
23259 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
23261 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
23262 JUMP_LABEL (tmp) = end_2_label;
23264 /* Not in the first two. Move two bytes forward. */
23265 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
23266 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
23268 emit_label (end_2_label);
23272 /* Avoid branch in fixing the byte. */
23273 tmpreg = gen_lowpart (QImode, tmpreg);
23274 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
23275 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
23276 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
23277 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
23279 emit_label (end_0_label);
23282 /* Expand strlen. */
23285 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
23287 rtx addr, scratch1, scratch2, scratch3, scratch4;
23289 /* The generic case of strlen expander is long. Avoid it's
23290 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23292 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
23293 && !TARGET_INLINE_ALL_STRINGOPS
23294 && !optimize_insn_for_size_p ()
23295 && (!CONST_INT_P (align) || INTVAL (align) < 4))
23298 addr = force_reg (Pmode, XEXP (src, 0));
23299 scratch1 = gen_reg_rtx (Pmode);
23301 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
23302 && !optimize_insn_for_size_p ())
23304 /* Well it seems that some optimizer does not combine a call like
23305 foo(strlen(bar), strlen(bar));
23306 when the move and the subtraction is done here. It does calculate
23307 the length just once when these instructions are done inside of
23308 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23309 often used and I use one fewer register for the lifetime of
23310 output_strlen_unroll() this is better. */
23312 emit_move_insn (out, addr);
23314 ix86_expand_strlensi_unroll_1 (out, src, align);
23316 /* strlensi_unroll_1 returns the address of the zero at the end of
23317 the string, like memchr(), so compute the length by subtracting
23318 the start address. */
23319 emit_insn (ix86_gen_sub3 (out, out, addr));
23325 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23326 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
23329 scratch2 = gen_reg_rtx (Pmode);
23330 scratch3 = gen_reg_rtx (Pmode);
23331 scratch4 = force_reg (Pmode, constm1_rtx);
23333 emit_move_insn (scratch3, addr);
23334 eoschar = force_reg (QImode, eoschar);
23336 src = replace_equiv_address_nv (src, scratch3);
23338 /* If .md starts supporting :P, this can be done in .md. */
23339 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
23340 scratch4), UNSPEC_SCAS);
23341 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
23342 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
23343 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
23348 /* For given symbol (function) construct code to compute address of it's PLT
23349 entry in large x86-64 PIC model. */
23351 construct_plt_address (rtx symbol)
23353 rtx tmp = gen_reg_rtx (Pmode);
23354 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
23356 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
23357 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
23359 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
23360 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
23365 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
23367 rtx pop, bool sibcall)
23369 /* We need to represent that SI and DI registers are clobbered
23371 static int clobbered_registers[] = {
23372 XMM6_REG, XMM7_REG, XMM8_REG,
23373 XMM9_REG, XMM10_REG, XMM11_REG,
23374 XMM12_REG, XMM13_REG, XMM14_REG,
23375 XMM15_REG, SI_REG, DI_REG
23377 rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
23378 rtx use = NULL, call;
23379 unsigned int vec_len;
23381 if (pop == const0_rtx)
23383 gcc_assert (!TARGET_64BIT || !pop);
23385 if (TARGET_MACHO && !TARGET_64BIT)
23388 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
23389 fnaddr = machopic_indirect_call_target (fnaddr);
23394 /* Static functions and indirect calls don't need the pic register. */
23395 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
23396 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
23397 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
23398 use_reg (&use, pic_offset_table_rtx);
23401 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
23403 rtx al = gen_rtx_REG (QImode, AX_REG);
23404 emit_move_insn (al, callarg2);
23405 use_reg (&use, al);
23408 if (ix86_cmodel == CM_LARGE_PIC
23410 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
23411 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
23412 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
23414 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
23415 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
23417 fnaddr = XEXP (fnaddr, 0);
23418 if (GET_MODE (fnaddr) != Pmode)
23419 fnaddr = convert_to_mode (Pmode, fnaddr, 1);
23420 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
23424 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
23426 call = gen_rtx_SET (VOIDmode, retval, call);
23427 vec[vec_len++] = call;
23431 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
23432 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
23433 vec[vec_len++] = pop;
23436 if (TARGET_64BIT_MS_ABI
23437 && (!callarg2 || INTVAL (callarg2) != -2))
23441 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
23442 UNSPEC_MS_TO_SYSV_CALL);
23444 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
23446 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
23448 gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
23450 clobbered_registers[i]));
23453 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
23454 if (TARGET_VZEROUPPER)
23457 if (cfun->machine->callee_pass_avx256_p)
23459 if (cfun->machine->callee_return_avx256_p)
23460 avx256 = callee_return_pass_avx256;
23462 avx256 = callee_pass_avx256;
23464 else if (cfun->machine->callee_return_avx256_p)
23465 avx256 = callee_return_avx256;
23467 avx256 = call_no_avx256;
23469 if (reload_completed)
23470 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
23472 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
23473 gen_rtvec (1, GEN_INT (avx256)),
23474 UNSPEC_CALL_NEEDS_VZEROUPPER);
23478 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
23479 call = emit_call_insn (call);
23481 CALL_INSN_FUNCTION_USAGE (call) = use;
23487 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
23489 rtx pat = PATTERN (insn);
23490 rtvec vec = XVEC (pat, 0);
23491 int len = GET_NUM_ELEM (vec) - 1;
23493 /* Strip off the last entry of the parallel. */
23494 gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
23495 gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
23497 pat = RTVEC_ELT (vec, 0);
23499 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
23501 emit_insn (gen_avx_vzeroupper (vzeroupper));
23502 emit_call_insn (pat);
23505 /* Output the assembly for a call instruction. */
23508 ix86_output_call_insn (rtx insn, rtx call_op)
23510 bool direct_p = constant_call_address_operand (call_op, Pmode);
23511 bool seh_nop_p = false;
23514 if (SIBLING_CALL_P (insn))
23518 /* SEH epilogue detection requires the indirect branch case
23519 to include REX.W. */
23520 else if (TARGET_SEH)
23521 xasm = "rex.W jmp %A0";
23525 output_asm_insn (xasm, &call_op);
23529 /* SEH unwinding can require an extra nop to be emitted in several
23530 circumstances. Determine if we have one of those. */
23535 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
23537 /* If we get to another real insn, we don't need the nop. */
23541 /* If we get to the epilogue note, prevent a catch region from
23542 being adjacent to the standard epilogue sequence. If non-
23543 call-exceptions, we'll have done this during epilogue emission. */
23544 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
23545 && !flag_non_call_exceptions
23546 && !can_throw_internal (insn))
23553 /* If we didn't find a real insn following the call, prevent the
23554 unwinder from looking into the next function. */
23560 xasm = "call\t%P0";
23562 xasm = "call\t%A0";
23564 output_asm_insn (xasm, &call_op);
23572 /* Clear stack slot assignments remembered from previous functions.
23573 This is called from INIT_EXPANDERS once before RTL is emitted for each
23576 static struct machine_function *
23577 ix86_init_machine_status (void)
23579 struct machine_function *f;
23581 f = ggc_alloc_cleared_machine_function ();
23582 f->use_fast_prologue_epilogue_nregs = -1;
23583 f->tls_descriptor_call_expanded_p = 0;
23584 f->call_abi = ix86_abi;
23589 /* Return a MEM corresponding to a stack slot with mode MODE.
23590 Allocate a new slot if necessary.
23592 The RTL for a function can have several slots available: N is
23593 which slot to use. */
23596 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
23598 struct stack_local_entry *s;
23600 gcc_assert (n < MAX_386_STACK_LOCALS);
23602 /* Virtual slot is valid only before vregs are instantiated. */
23603 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
23605 for (s = ix86_stack_locals; s; s = s->next)
23606 if (s->mode == mode && s->n == n)
23607 return validize_mem (copy_rtx (s->rtl));
23609 s = ggc_alloc_stack_local_entry ();
23612 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
23614 s->next = ix86_stack_locals;
23615 ix86_stack_locals = s;
23616 return validize_mem (s->rtl);
23619 /* Calculate the length of the memory address in the instruction encoding.
23620 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23621 or other prefixes. */
23624 memory_address_length (rtx addr)
23626 struct ix86_address parts;
23627 rtx base, index, disp;
23631 if (GET_CODE (addr) == PRE_DEC
23632 || GET_CODE (addr) == POST_INC
23633 || GET_CODE (addr) == PRE_MODIFY
23634 || GET_CODE (addr) == POST_MODIFY)
23637 ok = ix86_decompose_address (addr, &parts);
23640 if (parts.base && GET_CODE (parts.base) == SUBREG)
23641 parts.base = SUBREG_REG (parts.base);
23642 if (parts.index && GET_CODE (parts.index) == SUBREG)
23643 parts.index = SUBREG_REG (parts.index);
23646 index = parts.index;
23649 /* Add length of addr32 prefix. */
23650 len = (GET_CODE (addr) == ZERO_EXTEND
23651 || GET_CODE (addr) == AND);
23654 - esp as the base always wants an index,
23655 - ebp as the base always wants a displacement,
23656 - r12 as the base always wants an index,
23657 - r13 as the base always wants a displacement. */
23659 /* Register Indirect. */
23660 if (base && !index && !disp)
23662 /* esp (for its index) and ebp (for its displacement) need
23663 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23666 && (addr == arg_pointer_rtx
23667 || addr == frame_pointer_rtx
23668 || REGNO (addr) == SP_REG
23669 || REGNO (addr) == BP_REG
23670 || REGNO (addr) == R12_REG
23671 || REGNO (addr) == R13_REG))
23675 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23676 is not disp32, but disp32(%rip), so for disp32
23677 SIB byte is needed, unless print_operand_address
23678 optimizes it into disp32(%rip) or (%rip) is implied
23680 else if (disp && !base && !index)
23687 if (GET_CODE (disp) == CONST)
23688 symbol = XEXP (disp, 0);
23689 if (GET_CODE (symbol) == PLUS
23690 && CONST_INT_P (XEXP (symbol, 1)))
23691 symbol = XEXP (symbol, 0);
23693 if (GET_CODE (symbol) != LABEL_REF
23694 && (GET_CODE (symbol) != SYMBOL_REF
23695 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
23696 && (GET_CODE (symbol) != UNSPEC
23697 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
23698 && XINT (symbol, 1) != UNSPEC_PCREL
23699 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
23706 /* Find the length of the displacement constant. */
23709 if (base && satisfies_constraint_K (disp))
23714 /* ebp always wants a displacement. Similarly r13. */
23715 else if (base && REG_P (base)
23716 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
23719 /* An index requires the two-byte modrm form.... */
23721 /* ...like esp (or r12), which always wants an index. */
23722 || base == arg_pointer_rtx
23723 || base == frame_pointer_rtx
23724 || (base && REG_P (base)
23725 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
23742 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23743 is set, expect that insn have 8bit immediate alternative. */
23745 ix86_attr_length_immediate_default (rtx insn, bool shortform)
23749 extract_insn_cached (insn);
23750 for (i = recog_data.n_operands - 1; i >= 0; --i)
23751 if (CONSTANT_P (recog_data.operand[i]))
23753 enum attr_mode mode = get_attr_mode (insn);
23756 if (shortform && CONST_INT_P (recog_data.operand[i]))
23758 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
23765 ival = trunc_int_for_mode (ival, HImode);
23768 ival = trunc_int_for_mode (ival, SImode);
23773 if (IN_RANGE (ival, -128, 127))
23790 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
23795 fatal_insn ("unknown insn mode", insn);
23800 /* Compute default value for "length_address" attribute. */
23802 ix86_attr_length_address_default (rtx insn)
23806 if (get_attr_type (insn) == TYPE_LEA)
23808 rtx set = PATTERN (insn), addr;
23810 if (GET_CODE (set) == PARALLEL)
23811 set = XVECEXP (set, 0, 0);
23813 gcc_assert (GET_CODE (set) == SET);
23815 addr = SET_SRC (set);
23816 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
23818 if (GET_CODE (addr) == ZERO_EXTEND)
23819 addr = XEXP (addr, 0);
23820 if (GET_CODE (addr) == SUBREG)
23821 addr = SUBREG_REG (addr);
23824 return memory_address_length (addr);
23827 extract_insn_cached (insn);
23828 for (i = recog_data.n_operands - 1; i >= 0; --i)
23829 if (MEM_P (recog_data.operand[i]))
23831 constrain_operands_cached (reload_completed);
23832 if (which_alternative != -1)
23834 const char *constraints = recog_data.constraints[i];
23835 int alt = which_alternative;
23837 while (*constraints == '=' || *constraints == '+')
23840 while (*constraints++ != ',')
23842 /* Skip ignored operands. */
23843 if (*constraints == 'X')
23846 return memory_address_length (XEXP (recog_data.operand[i], 0));
23851 /* Compute default value for "length_vex" attribute. It includes
23852 2 or 3 byte VEX prefix and 1 opcode byte. */
23855 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
23859 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23860 byte VEX prefix. */
23861 if (!has_0f_opcode || has_vex_w)
23864 /* We can always use 2 byte VEX prefix in 32bit. */
23868 extract_insn_cached (insn);
23870 for (i = recog_data.n_operands - 1; i >= 0; --i)
23871 if (REG_P (recog_data.operand[i]))
23873 /* REX.W bit uses 3 byte VEX prefix. */
23874 if (GET_MODE (recog_data.operand[i]) == DImode
23875 && GENERAL_REG_P (recog_data.operand[i]))
23880 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23881 if (MEM_P (recog_data.operand[i])
23882 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
23889 /* Return the maximum number of instructions a cpu can issue. */
23892 ix86_issue_rate (void)
23896 case PROCESSOR_PENTIUM:
23897 case PROCESSOR_ATOM:
23901 case PROCESSOR_PENTIUMPRO:
23902 case PROCESSOR_PENTIUM4:
23903 case PROCESSOR_CORE2_32:
23904 case PROCESSOR_CORE2_64:
23905 case PROCESSOR_COREI7_32:
23906 case PROCESSOR_COREI7_64:
23907 case PROCESSOR_ATHLON:
23909 case PROCESSOR_AMDFAM10:
23910 case PROCESSOR_NOCONA:
23911 case PROCESSOR_GENERIC32:
23912 case PROCESSOR_GENERIC64:
23913 case PROCESSOR_BDVER1:
23914 case PROCESSOR_BDVER2:
23915 case PROCESSOR_BTVER1:
23923 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23924 by DEP_INSN and nothing set by DEP_INSN. */
23927 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
23931 /* Simplify the test for uninteresting insns. */
23932 if (insn_type != TYPE_SETCC
23933 && insn_type != TYPE_ICMOV
23934 && insn_type != TYPE_FCMOV
23935 && insn_type != TYPE_IBR)
23938 if ((set = single_set (dep_insn)) != 0)
23940 set = SET_DEST (set);
23943 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
23944 && XVECLEN (PATTERN (dep_insn), 0) == 2
23945 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
23946 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
23948 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
23949 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
23954 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
23957 /* This test is true if the dependent insn reads the flags but
23958 not any other potentially set register. */
23959 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
23962 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
23968 /* Return true iff USE_INSN has a memory address with operands set by
23972 ix86_agi_dependent (rtx set_insn, rtx use_insn)
23975 extract_insn_cached (use_insn);
23976 for (i = recog_data.n_operands - 1; i >= 0; --i)
23977 if (MEM_P (recog_data.operand[i]))
23979 rtx addr = XEXP (recog_data.operand[i], 0);
23980 return modified_in_p (addr, set_insn) != 0;
23986 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
23988 enum attr_type insn_type, dep_insn_type;
23989 enum attr_memory memory;
23991 int dep_insn_code_number;
23993 /* Anti and output dependencies have zero cost on all CPUs. */
23994 if (REG_NOTE_KIND (link) != 0)
23997 dep_insn_code_number = recog_memoized (dep_insn);
23999 /* If we can't recognize the insns, we can't really do anything. */
24000 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
24003 insn_type = get_attr_type (insn);
24004 dep_insn_type = get_attr_type (dep_insn);
24008 case PROCESSOR_PENTIUM:
24009 /* Address Generation Interlock adds a cycle of latency. */
24010 if (insn_type == TYPE_LEA)
24012 rtx addr = PATTERN (insn);
24014 if (GET_CODE (addr) == PARALLEL)
24015 addr = XVECEXP (addr, 0, 0);
24017 gcc_assert (GET_CODE (addr) == SET);
24019 addr = SET_SRC (addr);
24020 if (modified_in_p (addr, dep_insn))
24023 else if (ix86_agi_dependent (dep_insn, insn))
24026 /* ??? Compares pair with jump/setcc. */
24027 if (ix86_flags_dependent (insn, dep_insn, insn_type))
24030 /* Floating point stores require value to be ready one cycle earlier. */
24031 if (insn_type == TYPE_FMOV
24032 && get_attr_memory (insn) == MEMORY_STORE
24033 && !ix86_agi_dependent (dep_insn, insn))
24037 case PROCESSOR_PENTIUMPRO:
24038 memory = get_attr_memory (insn);
24040 /* INT->FP conversion is expensive. */
24041 if (get_attr_fp_int_src (dep_insn))
24044 /* There is one cycle extra latency between an FP op and a store. */
24045 if (insn_type == TYPE_FMOV
24046 && (set = single_set (dep_insn)) != NULL_RTX
24047 && (set2 = single_set (insn)) != NULL_RTX
24048 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
24049 && MEM_P (SET_DEST (set2)))
24052 /* Show ability of reorder buffer to hide latency of load by executing
24053 in parallel with previous instruction in case
24054 previous instruction is not needed to compute the address. */
24055 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
24056 && !ix86_agi_dependent (dep_insn, insn))
24058 /* Claim moves to take one cycle, as core can issue one load
24059 at time and the next load can start cycle later. */
24060 if (dep_insn_type == TYPE_IMOV
24061 || dep_insn_type == TYPE_FMOV)
24069 memory = get_attr_memory (insn);
24071 /* The esp dependency is resolved before the instruction is really
24073 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
24074 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
24077 /* INT->FP conversion is expensive. */
24078 if (get_attr_fp_int_src (dep_insn))
24081 /* Show ability of reorder buffer to hide latency of load by executing
24082 in parallel with previous instruction in case
24083 previous instruction is not needed to compute the address. */
24084 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
24085 && !ix86_agi_dependent (dep_insn, insn))
24087 /* Claim moves to take one cycle, as core can issue one load
24088 at time and the next load can start cycle later. */
24089 if (dep_insn_type == TYPE_IMOV
24090 || dep_insn_type == TYPE_FMOV)
24099 case PROCESSOR_ATHLON:
24101 case PROCESSOR_AMDFAM10:
24102 case PROCESSOR_BDVER1:
24103 case PROCESSOR_BDVER2:
24104 case PROCESSOR_BTVER1:
24105 case PROCESSOR_ATOM:
24106 case PROCESSOR_GENERIC32:
24107 case PROCESSOR_GENERIC64:
24108 memory = get_attr_memory (insn);
24110 /* Show ability of reorder buffer to hide latency of load by executing
24111 in parallel with previous instruction in case
24112 previous instruction is not needed to compute the address. */
24113 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
24114 && !ix86_agi_dependent (dep_insn, insn))
24116 enum attr_unit unit = get_attr_unit (insn);
24119 /* Because of the difference between the length of integer and
24120 floating unit pipeline preparation stages, the memory operands
24121 for floating point are cheaper.
24123 ??? For Athlon it the difference is most probably 2. */
24124 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
24127 loadcost = TARGET_ATHLON ? 2 : 0;
24129 if (cost >= loadcost)
24142 /* How many alternative schedules to try. This should be as wide as the
24143 scheduling freedom in the DFA, but no wider. Making this value too
24144 large results extra work for the scheduler. */
24147 ia32_multipass_dfa_lookahead (void)
24151 case PROCESSOR_PENTIUM:
24154 case PROCESSOR_PENTIUMPRO:
24158 case PROCESSOR_CORE2_32:
24159 case PROCESSOR_CORE2_64:
24160 case PROCESSOR_COREI7_32:
24161 case PROCESSOR_COREI7_64:
24162 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24163 as many instructions can be executed on a cycle, i.e.,
24164 issue_rate. I wonder why tuning for many CPUs does not do this. */
24165 return ix86_issue_rate ();
24174 /* Model decoder of Core 2/i7.
24175 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
24176 track the instruction fetch block boundaries and make sure that long
24177 (9+ bytes) instructions are assigned to D0. */
24179 /* Maximum length of an insn that can be handled by
24180 a secondary decoder unit. '8' for Core 2/i7. */
24181 static int core2i7_secondary_decoder_max_insn_size;
24183 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
24184 '16' for Core 2/i7. */
24185 static int core2i7_ifetch_block_size;
24187 /* Maximum number of instructions decoder can handle per cycle.
24188 '6' for Core 2/i7. */
24189 static int core2i7_ifetch_block_max_insns;
24191 typedef struct ix86_first_cycle_multipass_data_ *
24192 ix86_first_cycle_multipass_data_t;
24193 typedef const struct ix86_first_cycle_multipass_data_ *
24194 const_ix86_first_cycle_multipass_data_t;
24196 /* A variable to store target state across calls to max_issue within
24198 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
24199 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
24201 /* Initialize DATA. */
24203 core2i7_first_cycle_multipass_init (void *_data)
24205 ix86_first_cycle_multipass_data_t data
24206 = (ix86_first_cycle_multipass_data_t) _data;
24208 data->ifetch_block_len = 0;
24209 data->ifetch_block_n_insns = 0;
24210 data->ready_try_change = NULL;
24211 data->ready_try_change_size = 0;
24214 /* Advancing the cycle; reset ifetch block counts. */
24216 core2i7_dfa_post_advance_cycle (void)
24218 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
24220 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
24222 data->ifetch_block_len = 0;
24223 data->ifetch_block_n_insns = 0;
24226 static int min_insn_size (rtx);
24228 /* Filter out insns from ready_try that the core will not be able to issue
24229 on current cycle due to decoder. */
24231 core2i7_first_cycle_multipass_filter_ready_try
24232 (const_ix86_first_cycle_multipass_data_t data,
24233 char *ready_try, int n_ready, bool first_cycle_insn_p)
24240 if (ready_try[n_ready])
24243 insn = get_ready_element (n_ready);
24244 insn_size = min_insn_size (insn);
24246 if (/* If this is a too long an insn for a secondary decoder ... */
24247 (!first_cycle_insn_p
24248 && insn_size > core2i7_secondary_decoder_max_insn_size)
24249 /* ... or it would not fit into the ifetch block ... */
24250 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
24251 /* ... or the decoder is full already ... */
24252 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
24253 /* ... mask the insn out. */
24255 ready_try[n_ready] = 1;
24257 if (data->ready_try_change)
24258 SET_BIT (data->ready_try_change, n_ready);
24263 /* Prepare for a new round of multipass lookahead scheduling. */
24265 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
24266 bool first_cycle_insn_p)
24268 ix86_first_cycle_multipass_data_t data
24269 = (ix86_first_cycle_multipass_data_t) _data;
24270 const_ix86_first_cycle_multipass_data_t prev_data
24271 = ix86_first_cycle_multipass_data;
24273 /* Restore the state from the end of the previous round. */
24274 data->ifetch_block_len = prev_data->ifetch_block_len;
24275 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
24277 /* Filter instructions that cannot be issued on current cycle due to
24278 decoder restrictions. */
24279 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
24280 first_cycle_insn_p);
24283 /* INSN is being issued in current solution. Account for its impact on
24284 the decoder model. */
24286 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
24287 rtx insn, const void *_prev_data)
24289 ix86_first_cycle_multipass_data_t data
24290 = (ix86_first_cycle_multipass_data_t) _data;
24291 const_ix86_first_cycle_multipass_data_t prev_data
24292 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
24294 int insn_size = min_insn_size (insn);
24296 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
24297 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
24298 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
24299 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
24301 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
24302 if (!data->ready_try_change)
24304 data->ready_try_change = sbitmap_alloc (n_ready);
24305 data->ready_try_change_size = n_ready;
24307 else if (data->ready_try_change_size < n_ready)
24309 data->ready_try_change = sbitmap_resize (data->ready_try_change,
24311 data->ready_try_change_size = n_ready;
24313 sbitmap_zero (data->ready_try_change);
24315 /* Filter out insns from ready_try that the core will not be able to issue
24316 on current cycle due to decoder. */
24317 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
24321 /* Revert the effect on ready_try. */
24323 core2i7_first_cycle_multipass_backtrack (const void *_data,
24325 int n_ready ATTRIBUTE_UNUSED)
24327 const_ix86_first_cycle_multipass_data_t data
24328 = (const_ix86_first_cycle_multipass_data_t) _data;
24329 unsigned int i = 0;
24330 sbitmap_iterator sbi;
24332 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
24333 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
24339 /* Save the result of multipass lookahead scheduling for the next round. */
24341 core2i7_first_cycle_multipass_end (const void *_data)
24343 const_ix86_first_cycle_multipass_data_t data
24344 = (const_ix86_first_cycle_multipass_data_t) _data;
24345 ix86_first_cycle_multipass_data_t next_data
24346 = ix86_first_cycle_multipass_data;
24350 next_data->ifetch_block_len = data->ifetch_block_len;
24351 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
24355 /* Deallocate target data. */
24357 core2i7_first_cycle_multipass_fini (void *_data)
24359 ix86_first_cycle_multipass_data_t data
24360 = (ix86_first_cycle_multipass_data_t) _data;
24362 if (data->ready_try_change)
24364 sbitmap_free (data->ready_try_change);
24365 data->ready_try_change = NULL;
24366 data->ready_try_change_size = 0;
24370 /* Prepare for scheduling pass. */
24372 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
24373 int verbose ATTRIBUTE_UNUSED,
24374 int max_uid ATTRIBUTE_UNUSED)
24376 /* Install scheduling hooks for current CPU. Some of these hooks are used
24377 in time-critical parts of the scheduler, so we only set them up when
24378 they are actually used. */
24381 case PROCESSOR_CORE2_32:
24382 case PROCESSOR_CORE2_64:
24383 case PROCESSOR_COREI7_32:
24384 case PROCESSOR_COREI7_64:
24385 targetm.sched.dfa_post_advance_cycle
24386 = core2i7_dfa_post_advance_cycle;
24387 targetm.sched.first_cycle_multipass_init
24388 = core2i7_first_cycle_multipass_init;
24389 targetm.sched.first_cycle_multipass_begin
24390 = core2i7_first_cycle_multipass_begin;
24391 targetm.sched.first_cycle_multipass_issue
24392 = core2i7_first_cycle_multipass_issue;
24393 targetm.sched.first_cycle_multipass_backtrack
24394 = core2i7_first_cycle_multipass_backtrack;
24395 targetm.sched.first_cycle_multipass_end
24396 = core2i7_first_cycle_multipass_end;
24397 targetm.sched.first_cycle_multipass_fini
24398 = core2i7_first_cycle_multipass_fini;
24400 /* Set decoder parameters. */
24401 core2i7_secondary_decoder_max_insn_size = 8;
24402 core2i7_ifetch_block_size = 16;
24403 core2i7_ifetch_block_max_insns = 6;
24407 targetm.sched.dfa_post_advance_cycle = NULL;
24408 targetm.sched.first_cycle_multipass_init = NULL;
24409 targetm.sched.first_cycle_multipass_begin = NULL;
24410 targetm.sched.first_cycle_multipass_issue = NULL;
24411 targetm.sched.first_cycle_multipass_backtrack = NULL;
24412 targetm.sched.first_cycle_multipass_end = NULL;
24413 targetm.sched.first_cycle_multipass_fini = NULL;
24419 /* Compute the alignment given to a constant that is being placed in memory.
24420 EXP is the constant and ALIGN is the alignment that the object would
24422 The value of this function is used instead of that alignment to align
24426 ix86_constant_alignment (tree exp, int align)
24428 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
24429 || TREE_CODE (exp) == INTEGER_CST)
24431 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
24433 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
24436 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
24437 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
24438 return BITS_PER_WORD;
24443 /* Compute the alignment for a static variable.
24444 TYPE is the data type, and ALIGN is the alignment that
24445 the object would ordinarily have. The value of this function is used
24446 instead of that alignment to align the object. */
24449 ix86_data_alignment (tree type, int align)
24451 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
24453 if (AGGREGATE_TYPE_P (type)
24454 && TYPE_SIZE (type)
24455 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24456 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
24457 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
24458 && align < max_align)
24461 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24462 to 16byte boundary. */
24465 if (AGGREGATE_TYPE_P (type)
24466 && TYPE_SIZE (type)
24467 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24468 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
24469 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
24473 if (TREE_CODE (type) == ARRAY_TYPE)
24475 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
24477 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
24480 else if (TREE_CODE (type) == COMPLEX_TYPE)
24483 if (TYPE_MODE (type) == DCmode && align < 64)
24485 if ((TYPE_MODE (type) == XCmode
24486 || TYPE_MODE (type) == TCmode) && align < 128)
24489 else if ((TREE_CODE (type) == RECORD_TYPE
24490 || TREE_CODE (type) == UNION_TYPE
24491 || TREE_CODE (type) == QUAL_UNION_TYPE)
24492 && TYPE_FIELDS (type))
24494 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
24496 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
24499 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
24500 || TREE_CODE (type) == INTEGER_TYPE)
24502 if (TYPE_MODE (type) == DFmode && align < 64)
24504 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
24511 /* Compute the alignment for a local variable or a stack slot. EXP is
24512 the data type or decl itself, MODE is the widest mode available and
24513 ALIGN is the alignment that the object would ordinarily have. The
24514 value of this macro is used instead of that alignment to align the
24518 ix86_local_alignment (tree exp, enum machine_mode mode,
24519 unsigned int align)
24523 if (exp && DECL_P (exp))
24525 type = TREE_TYPE (exp);
24534 /* Don't do dynamic stack realignment for long long objects with
24535 -mpreferred-stack-boundary=2. */
24538 && ix86_preferred_stack_boundary < 64
24539 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
24540 && (!type || !TYPE_USER_ALIGN (type))
24541 && (!decl || !DECL_USER_ALIGN (decl)))
24544 /* If TYPE is NULL, we are allocating a stack slot for caller-save
24545 register in MODE. We will return the largest alignment of XF
24549 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
24550 align = GET_MODE_ALIGNMENT (DFmode);
24554 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24555 to 16byte boundary. Exact wording is:
24557 An array uses the same alignment as its elements, except that a local or
24558 global array variable of length at least 16 bytes or
24559 a C99 variable-length array variable always has alignment of at least 16 bytes.
24561 This was added to allow use of aligned SSE instructions at arrays. This
24562 rule is meant for static storage (where compiler can not do the analysis
24563 by itself). We follow it for automatic variables only when convenient.
24564 We fully control everything in the function compiled and functions from
24565 other unit can not rely on the alignment.
24567 Exclude va_list type. It is the common case of local array where
24568 we can not benefit from the alignment. */
24569 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
24572 if (AGGREGATE_TYPE_P (type)
24573 && (va_list_type_node == NULL_TREE
24574 || (TYPE_MAIN_VARIANT (type)
24575 != TYPE_MAIN_VARIANT (va_list_type_node)))
24576 && TYPE_SIZE (type)
24577 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
24578 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
24579 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
24582 if (TREE_CODE (type) == ARRAY_TYPE)
24584 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
24586 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
24589 else if (TREE_CODE (type) == COMPLEX_TYPE)
24591 if (TYPE_MODE (type) == DCmode && align < 64)
24593 if ((TYPE_MODE (type) == XCmode
24594 || TYPE_MODE (type) == TCmode) && align < 128)
24597 else if ((TREE_CODE (type) == RECORD_TYPE
24598 || TREE_CODE (type) == UNION_TYPE
24599 || TREE_CODE (type) == QUAL_UNION_TYPE)
24600 && TYPE_FIELDS (type))
24602 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
24604 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
24607 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
24608 || TREE_CODE (type) == INTEGER_TYPE)
24611 if (TYPE_MODE (type) == DFmode && align < 64)
24613 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
24619 /* Compute the minimum required alignment for dynamic stack realignment
24620 purposes for a local variable, parameter or a stack slot. EXP is
24621 the data type or decl itself, MODE is its mode and ALIGN is the
24622 alignment that the object would ordinarily have. */
24625 ix86_minimum_alignment (tree exp, enum machine_mode mode,
24626 unsigned int align)
24630 if (exp && DECL_P (exp))
24632 type = TREE_TYPE (exp);
24641 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
24644 /* Don't do dynamic stack realignment for long long objects with
24645 -mpreferred-stack-boundary=2. */
24646 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
24647 && (!type || !TYPE_USER_ALIGN (type))
24648 && (!decl || !DECL_USER_ALIGN (decl)))
24654 /* Find a location for the static chain incoming to a nested function.
24655 This is a register, unless all free registers are used by arguments. */
24658 ix86_static_chain (const_tree fndecl, bool incoming_p)
24662 if (!DECL_STATIC_CHAIN (fndecl))
24667 /* We always use R10 in 64-bit mode. */
24675 /* By default in 32-bit mode we use ECX to pass the static chain. */
24678 fntype = TREE_TYPE (fndecl);
24679 ccvt = ix86_get_callcvt (fntype);
24680 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
24682 /* Fastcall functions use ecx/edx for arguments, which leaves
24683 us with EAX for the static chain.
24684 Thiscall functions use ecx for arguments, which also
24685 leaves us with EAX for the static chain. */
24688 else if (ix86_function_regparm (fntype, fndecl) == 3)
24690 /* For regparm 3, we have no free call-clobbered registers in
24691 which to store the static chain. In order to implement this,
24692 we have the trampoline push the static chain to the stack.
24693 However, we can't push a value below the return address when
24694 we call the nested function directly, so we have to use an
24695 alternate entry point. For this we use ESI, and have the
24696 alternate entry point push ESI, so that things appear the
24697 same once we're executing the nested function. */
24700 if (fndecl == current_function_decl)
24701 ix86_static_chain_on_stack = true;
24702 return gen_frame_mem (SImode,
24703 plus_constant (arg_pointer_rtx, -8));
24709 return gen_rtx_REG (Pmode, regno);
24712 /* Emit RTL insns to initialize the variable parts of a trampoline.
24713 FNDECL is the decl of the target address; M_TRAMP is a MEM for
24714 the trampoline, and CHAIN_VALUE is an RTX for the static chain
24715 to be passed to the target function. */
24718 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
24724 fnaddr = XEXP (DECL_RTL (fndecl), 0);
24730 /* Load the function address to r11. Try to load address using
24731 the shorter movl instead of movabs. We may want to support
24732 movq for kernel mode, but kernel does not use trampolines at
24734 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
24736 fnaddr = copy_to_mode_reg (DImode, fnaddr);
24738 mem = adjust_address (m_tramp, HImode, offset);
24739 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
24741 mem = adjust_address (m_tramp, SImode, offset + 2);
24742 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
24747 mem = adjust_address (m_tramp, HImode, offset);
24748 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
24750 mem = adjust_address (m_tramp, DImode, offset + 2);
24751 emit_move_insn (mem, fnaddr);
24755 /* Load static chain using movabs to r10. Use the
24756 shorter movl instead of movabs for x32. */
24768 mem = adjust_address (m_tramp, HImode, offset);
24769 emit_move_insn (mem, gen_int_mode (opcode, HImode));
24771 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
24772 emit_move_insn (mem, chain_value);
24775 /* Jump to r11; the last (unused) byte is a nop, only there to
24776 pad the write out to a single 32-bit store. */
24777 mem = adjust_address (m_tramp, SImode, offset);
24778 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
24785 /* Depending on the static chain location, either load a register
24786 with a constant, or push the constant to the stack. All of the
24787 instructions are the same size. */
24788 chain = ix86_static_chain (fndecl, true);
24791 switch (REGNO (chain))
24794 opcode = 0xb8; break;
24796 opcode = 0xb9; break;
24798 gcc_unreachable ();
24804 mem = adjust_address (m_tramp, QImode, offset);
24805 emit_move_insn (mem, gen_int_mode (opcode, QImode));
24807 mem = adjust_address (m_tramp, SImode, offset + 1);
24808 emit_move_insn (mem, chain_value);
24811 mem = adjust_address (m_tramp, QImode, offset);
24812 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
24814 mem = adjust_address (m_tramp, SImode, offset + 1);
24816 /* Compute offset from the end of the jmp to the target function.
24817 In the case in which the trampoline stores the static chain on
24818 the stack, we need to skip the first insn which pushes the
24819 (call-saved) register static chain; this push is 1 byte. */
24821 disp = expand_binop (SImode, sub_optab, fnaddr,
24822 plus_constant (XEXP (m_tramp, 0),
24823 offset - (MEM_P (chain) ? 1 : 0)),
24824 NULL_RTX, 1, OPTAB_DIRECT);
24825 emit_move_insn (mem, disp);
24828 gcc_assert (offset <= TRAMPOLINE_SIZE);
24830 #ifdef HAVE_ENABLE_EXECUTE_STACK
24831 #ifdef CHECK_EXECUTE_STACK_ENABLED
24832 if (CHECK_EXECUTE_STACK_ENABLED)
24834 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
24835 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
24839 /* The following file contains several enumerations and data structures
24840 built from the definitions in i386-builtin-types.def. */
24842 #include "i386-builtin-types.inc"
24844 /* Table for the ix86 builtin non-function types. */
24845 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
24847 /* Retrieve an element from the above table, building some of
24848 the types lazily. */
24851 ix86_get_builtin_type (enum ix86_builtin_type tcode)
24853 unsigned int index;
24856 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
24858 type = ix86_builtin_type_tab[(int) tcode];
24862 gcc_assert (tcode > IX86_BT_LAST_PRIM);
24863 if (tcode <= IX86_BT_LAST_VECT)
24865 enum machine_mode mode;
24867 index = tcode - IX86_BT_LAST_PRIM - 1;
24868 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
24869 mode = ix86_builtin_type_vect_mode[index];
24871 type = build_vector_type_for_mode (itype, mode);
24877 index = tcode - IX86_BT_LAST_VECT - 1;
24878 if (tcode <= IX86_BT_LAST_PTR)
24879 quals = TYPE_UNQUALIFIED;
24881 quals = TYPE_QUAL_CONST;
24883 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
24884 if (quals != TYPE_UNQUALIFIED)
24885 itype = build_qualified_type (itype, quals);
24887 type = build_pointer_type (itype);
24890 ix86_builtin_type_tab[(int) tcode] = type;
24894 /* Table for the ix86 builtin function types. */
24895 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
24897 /* Retrieve an element from the above table, building some of
24898 the types lazily. */
24901 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
24905 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
24907 type = ix86_builtin_func_type_tab[(int) tcode];
24911 if (tcode <= IX86_BT_LAST_FUNC)
24913 unsigned start = ix86_builtin_func_start[(int) tcode];
24914 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
24915 tree rtype, atype, args = void_list_node;
24918 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
24919 for (i = after - 1; i > start; --i)
24921 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
24922 args = tree_cons (NULL, atype, args);
24925 type = build_function_type (rtype, args);
24929 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
24930 enum ix86_builtin_func_type icode;
24932 icode = ix86_builtin_func_alias_base[index];
24933 type = ix86_get_builtin_func_type (icode);
24936 ix86_builtin_func_type_tab[(int) tcode] = type;
24941 /* Codes for all the SSE/MMX builtins. */
24944 IX86_BUILTIN_ADDPS,
24945 IX86_BUILTIN_ADDSS,
24946 IX86_BUILTIN_DIVPS,
24947 IX86_BUILTIN_DIVSS,
24948 IX86_BUILTIN_MULPS,
24949 IX86_BUILTIN_MULSS,
24950 IX86_BUILTIN_SUBPS,
24951 IX86_BUILTIN_SUBSS,
24953 IX86_BUILTIN_CMPEQPS,
24954 IX86_BUILTIN_CMPLTPS,
24955 IX86_BUILTIN_CMPLEPS,
24956 IX86_BUILTIN_CMPGTPS,
24957 IX86_BUILTIN_CMPGEPS,
24958 IX86_BUILTIN_CMPNEQPS,
24959 IX86_BUILTIN_CMPNLTPS,
24960 IX86_BUILTIN_CMPNLEPS,
24961 IX86_BUILTIN_CMPNGTPS,
24962 IX86_BUILTIN_CMPNGEPS,
24963 IX86_BUILTIN_CMPORDPS,
24964 IX86_BUILTIN_CMPUNORDPS,
24965 IX86_BUILTIN_CMPEQSS,
24966 IX86_BUILTIN_CMPLTSS,
24967 IX86_BUILTIN_CMPLESS,
24968 IX86_BUILTIN_CMPNEQSS,
24969 IX86_BUILTIN_CMPNLTSS,
24970 IX86_BUILTIN_CMPNLESS,
24971 IX86_BUILTIN_CMPNGTSS,
24972 IX86_BUILTIN_CMPNGESS,
24973 IX86_BUILTIN_CMPORDSS,
24974 IX86_BUILTIN_CMPUNORDSS,
24976 IX86_BUILTIN_COMIEQSS,
24977 IX86_BUILTIN_COMILTSS,
24978 IX86_BUILTIN_COMILESS,
24979 IX86_BUILTIN_COMIGTSS,
24980 IX86_BUILTIN_COMIGESS,
24981 IX86_BUILTIN_COMINEQSS,
24982 IX86_BUILTIN_UCOMIEQSS,
24983 IX86_BUILTIN_UCOMILTSS,
24984 IX86_BUILTIN_UCOMILESS,
24985 IX86_BUILTIN_UCOMIGTSS,
24986 IX86_BUILTIN_UCOMIGESS,
24987 IX86_BUILTIN_UCOMINEQSS,
24989 IX86_BUILTIN_CVTPI2PS,
24990 IX86_BUILTIN_CVTPS2PI,
24991 IX86_BUILTIN_CVTSI2SS,
24992 IX86_BUILTIN_CVTSI642SS,
24993 IX86_BUILTIN_CVTSS2SI,
24994 IX86_BUILTIN_CVTSS2SI64,
24995 IX86_BUILTIN_CVTTPS2PI,
24996 IX86_BUILTIN_CVTTSS2SI,
24997 IX86_BUILTIN_CVTTSS2SI64,
24999 IX86_BUILTIN_MAXPS,
25000 IX86_BUILTIN_MAXSS,
25001 IX86_BUILTIN_MINPS,
25002 IX86_BUILTIN_MINSS,
25004 IX86_BUILTIN_LOADUPS,
25005 IX86_BUILTIN_STOREUPS,
25006 IX86_BUILTIN_MOVSS,
25008 IX86_BUILTIN_MOVHLPS,
25009 IX86_BUILTIN_MOVLHPS,
25010 IX86_BUILTIN_LOADHPS,
25011 IX86_BUILTIN_LOADLPS,
25012 IX86_BUILTIN_STOREHPS,
25013 IX86_BUILTIN_STORELPS,
25015 IX86_BUILTIN_MASKMOVQ,
25016 IX86_BUILTIN_MOVMSKPS,
25017 IX86_BUILTIN_PMOVMSKB,
25019 IX86_BUILTIN_MOVNTPS,
25020 IX86_BUILTIN_MOVNTQ,
25022 IX86_BUILTIN_LOADDQU,
25023 IX86_BUILTIN_STOREDQU,
25025 IX86_BUILTIN_PACKSSWB,
25026 IX86_BUILTIN_PACKSSDW,
25027 IX86_BUILTIN_PACKUSWB,
25029 IX86_BUILTIN_PADDB,
25030 IX86_BUILTIN_PADDW,
25031 IX86_BUILTIN_PADDD,
25032 IX86_BUILTIN_PADDQ,
25033 IX86_BUILTIN_PADDSB,
25034 IX86_BUILTIN_PADDSW,
25035 IX86_BUILTIN_PADDUSB,
25036 IX86_BUILTIN_PADDUSW,
25037 IX86_BUILTIN_PSUBB,
25038 IX86_BUILTIN_PSUBW,
25039 IX86_BUILTIN_PSUBD,
25040 IX86_BUILTIN_PSUBQ,
25041 IX86_BUILTIN_PSUBSB,
25042 IX86_BUILTIN_PSUBSW,
25043 IX86_BUILTIN_PSUBUSB,
25044 IX86_BUILTIN_PSUBUSW,
25047 IX86_BUILTIN_PANDN,
25051 IX86_BUILTIN_PAVGB,
25052 IX86_BUILTIN_PAVGW,
25054 IX86_BUILTIN_PCMPEQB,
25055 IX86_BUILTIN_PCMPEQW,
25056 IX86_BUILTIN_PCMPEQD,
25057 IX86_BUILTIN_PCMPGTB,
25058 IX86_BUILTIN_PCMPGTW,
25059 IX86_BUILTIN_PCMPGTD,
25061 IX86_BUILTIN_PMADDWD,
25063 IX86_BUILTIN_PMAXSW,
25064 IX86_BUILTIN_PMAXUB,
25065 IX86_BUILTIN_PMINSW,
25066 IX86_BUILTIN_PMINUB,
25068 IX86_BUILTIN_PMULHUW,
25069 IX86_BUILTIN_PMULHW,
25070 IX86_BUILTIN_PMULLW,
25072 IX86_BUILTIN_PSADBW,
25073 IX86_BUILTIN_PSHUFW,
25075 IX86_BUILTIN_PSLLW,
25076 IX86_BUILTIN_PSLLD,
25077 IX86_BUILTIN_PSLLQ,
25078 IX86_BUILTIN_PSRAW,
25079 IX86_BUILTIN_PSRAD,
25080 IX86_BUILTIN_PSRLW,
25081 IX86_BUILTIN_PSRLD,
25082 IX86_BUILTIN_PSRLQ,
25083 IX86_BUILTIN_PSLLWI,
25084 IX86_BUILTIN_PSLLDI,
25085 IX86_BUILTIN_PSLLQI,
25086 IX86_BUILTIN_PSRAWI,
25087 IX86_BUILTIN_PSRADI,
25088 IX86_BUILTIN_PSRLWI,
25089 IX86_BUILTIN_PSRLDI,
25090 IX86_BUILTIN_PSRLQI,
25092 IX86_BUILTIN_PUNPCKHBW,
25093 IX86_BUILTIN_PUNPCKHWD,
25094 IX86_BUILTIN_PUNPCKHDQ,
25095 IX86_BUILTIN_PUNPCKLBW,
25096 IX86_BUILTIN_PUNPCKLWD,
25097 IX86_BUILTIN_PUNPCKLDQ,
25099 IX86_BUILTIN_SHUFPS,
25101 IX86_BUILTIN_RCPPS,
25102 IX86_BUILTIN_RCPSS,
25103 IX86_BUILTIN_RSQRTPS,
25104 IX86_BUILTIN_RSQRTPS_NR,
25105 IX86_BUILTIN_RSQRTSS,
25106 IX86_BUILTIN_RSQRTF,
25107 IX86_BUILTIN_SQRTPS,
25108 IX86_BUILTIN_SQRTPS_NR,
25109 IX86_BUILTIN_SQRTSS,
25111 IX86_BUILTIN_UNPCKHPS,
25112 IX86_BUILTIN_UNPCKLPS,
25114 IX86_BUILTIN_ANDPS,
25115 IX86_BUILTIN_ANDNPS,
25117 IX86_BUILTIN_XORPS,
25120 IX86_BUILTIN_LDMXCSR,
25121 IX86_BUILTIN_STMXCSR,
25122 IX86_BUILTIN_SFENCE,
25124 /* 3DNow! Original */
25125 IX86_BUILTIN_FEMMS,
25126 IX86_BUILTIN_PAVGUSB,
25127 IX86_BUILTIN_PF2ID,
25128 IX86_BUILTIN_PFACC,
25129 IX86_BUILTIN_PFADD,
25130 IX86_BUILTIN_PFCMPEQ,
25131 IX86_BUILTIN_PFCMPGE,
25132 IX86_BUILTIN_PFCMPGT,
25133 IX86_BUILTIN_PFMAX,
25134 IX86_BUILTIN_PFMIN,
25135 IX86_BUILTIN_PFMUL,
25136 IX86_BUILTIN_PFRCP,
25137 IX86_BUILTIN_PFRCPIT1,
25138 IX86_BUILTIN_PFRCPIT2,
25139 IX86_BUILTIN_PFRSQIT1,
25140 IX86_BUILTIN_PFRSQRT,
25141 IX86_BUILTIN_PFSUB,
25142 IX86_BUILTIN_PFSUBR,
25143 IX86_BUILTIN_PI2FD,
25144 IX86_BUILTIN_PMULHRW,
25146 /* 3DNow! Athlon Extensions */
25147 IX86_BUILTIN_PF2IW,
25148 IX86_BUILTIN_PFNACC,
25149 IX86_BUILTIN_PFPNACC,
25150 IX86_BUILTIN_PI2FW,
25151 IX86_BUILTIN_PSWAPDSI,
25152 IX86_BUILTIN_PSWAPDSF,
25155 IX86_BUILTIN_ADDPD,
25156 IX86_BUILTIN_ADDSD,
25157 IX86_BUILTIN_DIVPD,
25158 IX86_BUILTIN_DIVSD,
25159 IX86_BUILTIN_MULPD,
25160 IX86_BUILTIN_MULSD,
25161 IX86_BUILTIN_SUBPD,
25162 IX86_BUILTIN_SUBSD,
25164 IX86_BUILTIN_CMPEQPD,
25165 IX86_BUILTIN_CMPLTPD,
25166 IX86_BUILTIN_CMPLEPD,
25167 IX86_BUILTIN_CMPGTPD,
25168 IX86_BUILTIN_CMPGEPD,
25169 IX86_BUILTIN_CMPNEQPD,
25170 IX86_BUILTIN_CMPNLTPD,
25171 IX86_BUILTIN_CMPNLEPD,
25172 IX86_BUILTIN_CMPNGTPD,
25173 IX86_BUILTIN_CMPNGEPD,
25174 IX86_BUILTIN_CMPORDPD,
25175 IX86_BUILTIN_CMPUNORDPD,
25176 IX86_BUILTIN_CMPEQSD,
25177 IX86_BUILTIN_CMPLTSD,
25178 IX86_BUILTIN_CMPLESD,
25179 IX86_BUILTIN_CMPNEQSD,
25180 IX86_BUILTIN_CMPNLTSD,
25181 IX86_BUILTIN_CMPNLESD,
25182 IX86_BUILTIN_CMPORDSD,
25183 IX86_BUILTIN_CMPUNORDSD,
25185 IX86_BUILTIN_COMIEQSD,
25186 IX86_BUILTIN_COMILTSD,
25187 IX86_BUILTIN_COMILESD,
25188 IX86_BUILTIN_COMIGTSD,
25189 IX86_BUILTIN_COMIGESD,
25190 IX86_BUILTIN_COMINEQSD,
25191 IX86_BUILTIN_UCOMIEQSD,
25192 IX86_BUILTIN_UCOMILTSD,
25193 IX86_BUILTIN_UCOMILESD,
25194 IX86_BUILTIN_UCOMIGTSD,
25195 IX86_BUILTIN_UCOMIGESD,
25196 IX86_BUILTIN_UCOMINEQSD,
25198 IX86_BUILTIN_MAXPD,
25199 IX86_BUILTIN_MAXSD,
25200 IX86_BUILTIN_MINPD,
25201 IX86_BUILTIN_MINSD,
25203 IX86_BUILTIN_ANDPD,
25204 IX86_BUILTIN_ANDNPD,
25206 IX86_BUILTIN_XORPD,
25208 IX86_BUILTIN_SQRTPD,
25209 IX86_BUILTIN_SQRTSD,
25211 IX86_BUILTIN_UNPCKHPD,
25212 IX86_BUILTIN_UNPCKLPD,
25214 IX86_BUILTIN_SHUFPD,
25216 IX86_BUILTIN_LOADUPD,
25217 IX86_BUILTIN_STOREUPD,
25218 IX86_BUILTIN_MOVSD,
25220 IX86_BUILTIN_LOADHPD,
25221 IX86_BUILTIN_LOADLPD,
25223 IX86_BUILTIN_CVTDQ2PD,
25224 IX86_BUILTIN_CVTDQ2PS,
25226 IX86_BUILTIN_CVTPD2DQ,
25227 IX86_BUILTIN_CVTPD2PI,
25228 IX86_BUILTIN_CVTPD2PS,
25229 IX86_BUILTIN_CVTTPD2DQ,
25230 IX86_BUILTIN_CVTTPD2PI,
25232 IX86_BUILTIN_CVTPI2PD,
25233 IX86_BUILTIN_CVTSI2SD,
25234 IX86_BUILTIN_CVTSI642SD,
25236 IX86_BUILTIN_CVTSD2SI,
25237 IX86_BUILTIN_CVTSD2SI64,
25238 IX86_BUILTIN_CVTSD2SS,
25239 IX86_BUILTIN_CVTSS2SD,
25240 IX86_BUILTIN_CVTTSD2SI,
25241 IX86_BUILTIN_CVTTSD2SI64,
25243 IX86_BUILTIN_CVTPS2DQ,
25244 IX86_BUILTIN_CVTPS2PD,
25245 IX86_BUILTIN_CVTTPS2DQ,
25247 IX86_BUILTIN_MOVNTI,
25248 IX86_BUILTIN_MOVNTPD,
25249 IX86_BUILTIN_MOVNTDQ,
25251 IX86_BUILTIN_MOVQ128,
25254 IX86_BUILTIN_MASKMOVDQU,
25255 IX86_BUILTIN_MOVMSKPD,
25256 IX86_BUILTIN_PMOVMSKB128,
25258 IX86_BUILTIN_PACKSSWB128,
25259 IX86_BUILTIN_PACKSSDW128,
25260 IX86_BUILTIN_PACKUSWB128,
25262 IX86_BUILTIN_PADDB128,
25263 IX86_BUILTIN_PADDW128,
25264 IX86_BUILTIN_PADDD128,
25265 IX86_BUILTIN_PADDQ128,
25266 IX86_BUILTIN_PADDSB128,
25267 IX86_BUILTIN_PADDSW128,
25268 IX86_BUILTIN_PADDUSB128,
25269 IX86_BUILTIN_PADDUSW128,
25270 IX86_BUILTIN_PSUBB128,
25271 IX86_BUILTIN_PSUBW128,
25272 IX86_BUILTIN_PSUBD128,
25273 IX86_BUILTIN_PSUBQ128,
25274 IX86_BUILTIN_PSUBSB128,
25275 IX86_BUILTIN_PSUBSW128,
25276 IX86_BUILTIN_PSUBUSB128,
25277 IX86_BUILTIN_PSUBUSW128,
25279 IX86_BUILTIN_PAND128,
25280 IX86_BUILTIN_PANDN128,
25281 IX86_BUILTIN_POR128,
25282 IX86_BUILTIN_PXOR128,
25284 IX86_BUILTIN_PAVGB128,
25285 IX86_BUILTIN_PAVGW128,
25287 IX86_BUILTIN_PCMPEQB128,
25288 IX86_BUILTIN_PCMPEQW128,
25289 IX86_BUILTIN_PCMPEQD128,
25290 IX86_BUILTIN_PCMPGTB128,
25291 IX86_BUILTIN_PCMPGTW128,
25292 IX86_BUILTIN_PCMPGTD128,
25294 IX86_BUILTIN_PMADDWD128,
25296 IX86_BUILTIN_PMAXSW128,
25297 IX86_BUILTIN_PMAXUB128,
25298 IX86_BUILTIN_PMINSW128,
25299 IX86_BUILTIN_PMINUB128,
25301 IX86_BUILTIN_PMULUDQ,
25302 IX86_BUILTIN_PMULUDQ128,
25303 IX86_BUILTIN_PMULHUW128,
25304 IX86_BUILTIN_PMULHW128,
25305 IX86_BUILTIN_PMULLW128,
25307 IX86_BUILTIN_PSADBW128,
25308 IX86_BUILTIN_PSHUFHW,
25309 IX86_BUILTIN_PSHUFLW,
25310 IX86_BUILTIN_PSHUFD,
25312 IX86_BUILTIN_PSLLDQI128,
25313 IX86_BUILTIN_PSLLWI128,
25314 IX86_BUILTIN_PSLLDI128,
25315 IX86_BUILTIN_PSLLQI128,
25316 IX86_BUILTIN_PSRAWI128,
25317 IX86_BUILTIN_PSRADI128,
25318 IX86_BUILTIN_PSRLDQI128,
25319 IX86_BUILTIN_PSRLWI128,
25320 IX86_BUILTIN_PSRLDI128,
25321 IX86_BUILTIN_PSRLQI128,
25323 IX86_BUILTIN_PSLLDQ128,
25324 IX86_BUILTIN_PSLLW128,
25325 IX86_BUILTIN_PSLLD128,
25326 IX86_BUILTIN_PSLLQ128,
25327 IX86_BUILTIN_PSRAW128,
25328 IX86_BUILTIN_PSRAD128,
25329 IX86_BUILTIN_PSRLW128,
25330 IX86_BUILTIN_PSRLD128,
25331 IX86_BUILTIN_PSRLQ128,
25333 IX86_BUILTIN_PUNPCKHBW128,
25334 IX86_BUILTIN_PUNPCKHWD128,
25335 IX86_BUILTIN_PUNPCKHDQ128,
25336 IX86_BUILTIN_PUNPCKHQDQ128,
25337 IX86_BUILTIN_PUNPCKLBW128,
25338 IX86_BUILTIN_PUNPCKLWD128,
25339 IX86_BUILTIN_PUNPCKLDQ128,
25340 IX86_BUILTIN_PUNPCKLQDQ128,
25342 IX86_BUILTIN_CLFLUSH,
25343 IX86_BUILTIN_MFENCE,
25344 IX86_BUILTIN_LFENCE,
25345 IX86_BUILTIN_PAUSE,
25347 IX86_BUILTIN_BSRSI,
25348 IX86_BUILTIN_BSRDI,
25349 IX86_BUILTIN_RDPMC,
25350 IX86_BUILTIN_RDTSC,
25351 IX86_BUILTIN_RDTSCP,
25352 IX86_BUILTIN_ROLQI,
25353 IX86_BUILTIN_ROLHI,
25354 IX86_BUILTIN_RORQI,
25355 IX86_BUILTIN_RORHI,
25358 IX86_BUILTIN_ADDSUBPS,
25359 IX86_BUILTIN_HADDPS,
25360 IX86_BUILTIN_HSUBPS,
25361 IX86_BUILTIN_MOVSHDUP,
25362 IX86_BUILTIN_MOVSLDUP,
25363 IX86_BUILTIN_ADDSUBPD,
25364 IX86_BUILTIN_HADDPD,
25365 IX86_BUILTIN_HSUBPD,
25366 IX86_BUILTIN_LDDQU,
25368 IX86_BUILTIN_MONITOR,
25369 IX86_BUILTIN_MWAIT,
25372 IX86_BUILTIN_PHADDW,
25373 IX86_BUILTIN_PHADDD,
25374 IX86_BUILTIN_PHADDSW,
25375 IX86_BUILTIN_PHSUBW,
25376 IX86_BUILTIN_PHSUBD,
25377 IX86_BUILTIN_PHSUBSW,
25378 IX86_BUILTIN_PMADDUBSW,
25379 IX86_BUILTIN_PMULHRSW,
25380 IX86_BUILTIN_PSHUFB,
25381 IX86_BUILTIN_PSIGNB,
25382 IX86_BUILTIN_PSIGNW,
25383 IX86_BUILTIN_PSIGND,
25384 IX86_BUILTIN_PALIGNR,
25385 IX86_BUILTIN_PABSB,
25386 IX86_BUILTIN_PABSW,
25387 IX86_BUILTIN_PABSD,
25389 IX86_BUILTIN_PHADDW128,
25390 IX86_BUILTIN_PHADDD128,
25391 IX86_BUILTIN_PHADDSW128,
25392 IX86_BUILTIN_PHSUBW128,
25393 IX86_BUILTIN_PHSUBD128,
25394 IX86_BUILTIN_PHSUBSW128,
25395 IX86_BUILTIN_PMADDUBSW128,
25396 IX86_BUILTIN_PMULHRSW128,
25397 IX86_BUILTIN_PSHUFB128,
25398 IX86_BUILTIN_PSIGNB128,
25399 IX86_BUILTIN_PSIGNW128,
25400 IX86_BUILTIN_PSIGND128,
25401 IX86_BUILTIN_PALIGNR128,
25402 IX86_BUILTIN_PABSB128,
25403 IX86_BUILTIN_PABSW128,
25404 IX86_BUILTIN_PABSD128,
25406 /* AMDFAM10 - SSE4A New Instructions. */
25407 IX86_BUILTIN_MOVNTSD,
25408 IX86_BUILTIN_MOVNTSS,
25409 IX86_BUILTIN_EXTRQI,
25410 IX86_BUILTIN_EXTRQ,
25411 IX86_BUILTIN_INSERTQI,
25412 IX86_BUILTIN_INSERTQ,
25415 IX86_BUILTIN_BLENDPD,
25416 IX86_BUILTIN_BLENDPS,
25417 IX86_BUILTIN_BLENDVPD,
25418 IX86_BUILTIN_BLENDVPS,
25419 IX86_BUILTIN_PBLENDVB128,
25420 IX86_BUILTIN_PBLENDW128,
25425 IX86_BUILTIN_INSERTPS128,
25427 IX86_BUILTIN_MOVNTDQA,
25428 IX86_BUILTIN_MPSADBW128,
25429 IX86_BUILTIN_PACKUSDW128,
25430 IX86_BUILTIN_PCMPEQQ,
25431 IX86_BUILTIN_PHMINPOSUW128,
25433 IX86_BUILTIN_PMAXSB128,
25434 IX86_BUILTIN_PMAXSD128,
25435 IX86_BUILTIN_PMAXUD128,
25436 IX86_BUILTIN_PMAXUW128,
25438 IX86_BUILTIN_PMINSB128,
25439 IX86_BUILTIN_PMINSD128,
25440 IX86_BUILTIN_PMINUD128,
25441 IX86_BUILTIN_PMINUW128,
25443 IX86_BUILTIN_PMOVSXBW128,
25444 IX86_BUILTIN_PMOVSXBD128,
25445 IX86_BUILTIN_PMOVSXBQ128,
25446 IX86_BUILTIN_PMOVSXWD128,
25447 IX86_BUILTIN_PMOVSXWQ128,
25448 IX86_BUILTIN_PMOVSXDQ128,
25450 IX86_BUILTIN_PMOVZXBW128,
25451 IX86_BUILTIN_PMOVZXBD128,
25452 IX86_BUILTIN_PMOVZXBQ128,
25453 IX86_BUILTIN_PMOVZXWD128,
25454 IX86_BUILTIN_PMOVZXWQ128,
25455 IX86_BUILTIN_PMOVZXDQ128,
25457 IX86_BUILTIN_PMULDQ128,
25458 IX86_BUILTIN_PMULLD128,
25460 IX86_BUILTIN_ROUNDSD,
25461 IX86_BUILTIN_ROUNDSS,
25463 IX86_BUILTIN_ROUNDPD,
25464 IX86_BUILTIN_ROUNDPS,
25466 IX86_BUILTIN_FLOORPD,
25467 IX86_BUILTIN_CEILPD,
25468 IX86_BUILTIN_TRUNCPD,
25469 IX86_BUILTIN_RINTPD,
25470 IX86_BUILTIN_ROUNDPD_AZ,
25472 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
25473 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
25474 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
25476 IX86_BUILTIN_FLOORPS,
25477 IX86_BUILTIN_CEILPS,
25478 IX86_BUILTIN_TRUNCPS,
25479 IX86_BUILTIN_RINTPS,
25480 IX86_BUILTIN_ROUNDPS_AZ,
25482 IX86_BUILTIN_FLOORPS_SFIX,
25483 IX86_BUILTIN_CEILPS_SFIX,
25484 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
25486 IX86_BUILTIN_PTESTZ,
25487 IX86_BUILTIN_PTESTC,
25488 IX86_BUILTIN_PTESTNZC,
25490 IX86_BUILTIN_VEC_INIT_V2SI,
25491 IX86_BUILTIN_VEC_INIT_V4HI,
25492 IX86_BUILTIN_VEC_INIT_V8QI,
25493 IX86_BUILTIN_VEC_EXT_V2DF,
25494 IX86_BUILTIN_VEC_EXT_V2DI,
25495 IX86_BUILTIN_VEC_EXT_V4SF,
25496 IX86_BUILTIN_VEC_EXT_V4SI,
25497 IX86_BUILTIN_VEC_EXT_V8HI,
25498 IX86_BUILTIN_VEC_EXT_V2SI,
25499 IX86_BUILTIN_VEC_EXT_V4HI,
25500 IX86_BUILTIN_VEC_EXT_V16QI,
25501 IX86_BUILTIN_VEC_SET_V2DI,
25502 IX86_BUILTIN_VEC_SET_V4SF,
25503 IX86_BUILTIN_VEC_SET_V4SI,
25504 IX86_BUILTIN_VEC_SET_V8HI,
25505 IX86_BUILTIN_VEC_SET_V4HI,
25506 IX86_BUILTIN_VEC_SET_V16QI,
25508 IX86_BUILTIN_VEC_PACK_SFIX,
25509 IX86_BUILTIN_VEC_PACK_SFIX256,
25512 IX86_BUILTIN_CRC32QI,
25513 IX86_BUILTIN_CRC32HI,
25514 IX86_BUILTIN_CRC32SI,
25515 IX86_BUILTIN_CRC32DI,
25517 IX86_BUILTIN_PCMPESTRI128,
25518 IX86_BUILTIN_PCMPESTRM128,
25519 IX86_BUILTIN_PCMPESTRA128,
25520 IX86_BUILTIN_PCMPESTRC128,
25521 IX86_BUILTIN_PCMPESTRO128,
25522 IX86_BUILTIN_PCMPESTRS128,
25523 IX86_BUILTIN_PCMPESTRZ128,
25524 IX86_BUILTIN_PCMPISTRI128,
25525 IX86_BUILTIN_PCMPISTRM128,
25526 IX86_BUILTIN_PCMPISTRA128,
25527 IX86_BUILTIN_PCMPISTRC128,
25528 IX86_BUILTIN_PCMPISTRO128,
25529 IX86_BUILTIN_PCMPISTRS128,
25530 IX86_BUILTIN_PCMPISTRZ128,
25532 IX86_BUILTIN_PCMPGTQ,
25534 /* AES instructions */
25535 IX86_BUILTIN_AESENC128,
25536 IX86_BUILTIN_AESENCLAST128,
25537 IX86_BUILTIN_AESDEC128,
25538 IX86_BUILTIN_AESDECLAST128,
25539 IX86_BUILTIN_AESIMC128,
25540 IX86_BUILTIN_AESKEYGENASSIST128,
25542 /* PCLMUL instruction */
25543 IX86_BUILTIN_PCLMULQDQ128,
25546 IX86_BUILTIN_ADDPD256,
25547 IX86_BUILTIN_ADDPS256,
25548 IX86_BUILTIN_ADDSUBPD256,
25549 IX86_BUILTIN_ADDSUBPS256,
25550 IX86_BUILTIN_ANDPD256,
25551 IX86_BUILTIN_ANDPS256,
25552 IX86_BUILTIN_ANDNPD256,
25553 IX86_BUILTIN_ANDNPS256,
25554 IX86_BUILTIN_BLENDPD256,
25555 IX86_BUILTIN_BLENDPS256,
25556 IX86_BUILTIN_BLENDVPD256,
25557 IX86_BUILTIN_BLENDVPS256,
25558 IX86_BUILTIN_DIVPD256,
25559 IX86_BUILTIN_DIVPS256,
25560 IX86_BUILTIN_DPPS256,
25561 IX86_BUILTIN_HADDPD256,
25562 IX86_BUILTIN_HADDPS256,
25563 IX86_BUILTIN_HSUBPD256,
25564 IX86_BUILTIN_HSUBPS256,
25565 IX86_BUILTIN_MAXPD256,
25566 IX86_BUILTIN_MAXPS256,
25567 IX86_BUILTIN_MINPD256,
25568 IX86_BUILTIN_MINPS256,
25569 IX86_BUILTIN_MULPD256,
25570 IX86_BUILTIN_MULPS256,
25571 IX86_BUILTIN_ORPD256,
25572 IX86_BUILTIN_ORPS256,
25573 IX86_BUILTIN_SHUFPD256,
25574 IX86_BUILTIN_SHUFPS256,
25575 IX86_BUILTIN_SUBPD256,
25576 IX86_BUILTIN_SUBPS256,
25577 IX86_BUILTIN_XORPD256,
25578 IX86_BUILTIN_XORPS256,
25579 IX86_BUILTIN_CMPSD,
25580 IX86_BUILTIN_CMPSS,
25581 IX86_BUILTIN_CMPPD,
25582 IX86_BUILTIN_CMPPS,
25583 IX86_BUILTIN_CMPPD256,
25584 IX86_BUILTIN_CMPPS256,
25585 IX86_BUILTIN_CVTDQ2PD256,
25586 IX86_BUILTIN_CVTDQ2PS256,
25587 IX86_BUILTIN_CVTPD2PS256,
25588 IX86_BUILTIN_CVTPS2DQ256,
25589 IX86_BUILTIN_CVTPS2PD256,
25590 IX86_BUILTIN_CVTTPD2DQ256,
25591 IX86_BUILTIN_CVTPD2DQ256,
25592 IX86_BUILTIN_CVTTPS2DQ256,
25593 IX86_BUILTIN_EXTRACTF128PD256,
25594 IX86_BUILTIN_EXTRACTF128PS256,
25595 IX86_BUILTIN_EXTRACTF128SI256,
25596 IX86_BUILTIN_VZEROALL,
25597 IX86_BUILTIN_VZEROUPPER,
25598 IX86_BUILTIN_VPERMILVARPD,
25599 IX86_BUILTIN_VPERMILVARPS,
25600 IX86_BUILTIN_VPERMILVARPD256,
25601 IX86_BUILTIN_VPERMILVARPS256,
25602 IX86_BUILTIN_VPERMILPD,
25603 IX86_BUILTIN_VPERMILPS,
25604 IX86_BUILTIN_VPERMILPD256,
25605 IX86_BUILTIN_VPERMILPS256,
25606 IX86_BUILTIN_VPERMIL2PD,
25607 IX86_BUILTIN_VPERMIL2PS,
25608 IX86_BUILTIN_VPERMIL2PD256,
25609 IX86_BUILTIN_VPERMIL2PS256,
25610 IX86_BUILTIN_VPERM2F128PD256,
25611 IX86_BUILTIN_VPERM2F128PS256,
25612 IX86_BUILTIN_VPERM2F128SI256,
25613 IX86_BUILTIN_VBROADCASTSS,
25614 IX86_BUILTIN_VBROADCASTSD256,
25615 IX86_BUILTIN_VBROADCASTSS256,
25616 IX86_BUILTIN_VBROADCASTPD256,
25617 IX86_BUILTIN_VBROADCASTPS256,
25618 IX86_BUILTIN_VINSERTF128PD256,
25619 IX86_BUILTIN_VINSERTF128PS256,
25620 IX86_BUILTIN_VINSERTF128SI256,
25621 IX86_BUILTIN_LOADUPD256,
25622 IX86_BUILTIN_LOADUPS256,
25623 IX86_BUILTIN_STOREUPD256,
25624 IX86_BUILTIN_STOREUPS256,
25625 IX86_BUILTIN_LDDQU256,
25626 IX86_BUILTIN_MOVNTDQ256,
25627 IX86_BUILTIN_MOVNTPD256,
25628 IX86_BUILTIN_MOVNTPS256,
25629 IX86_BUILTIN_LOADDQU256,
25630 IX86_BUILTIN_STOREDQU256,
25631 IX86_BUILTIN_MASKLOADPD,
25632 IX86_BUILTIN_MASKLOADPS,
25633 IX86_BUILTIN_MASKSTOREPD,
25634 IX86_BUILTIN_MASKSTOREPS,
25635 IX86_BUILTIN_MASKLOADPD256,
25636 IX86_BUILTIN_MASKLOADPS256,
25637 IX86_BUILTIN_MASKSTOREPD256,
25638 IX86_BUILTIN_MASKSTOREPS256,
25639 IX86_BUILTIN_MOVSHDUP256,
25640 IX86_BUILTIN_MOVSLDUP256,
25641 IX86_BUILTIN_MOVDDUP256,
25643 IX86_BUILTIN_SQRTPD256,
25644 IX86_BUILTIN_SQRTPS256,
25645 IX86_BUILTIN_SQRTPS_NR256,
25646 IX86_BUILTIN_RSQRTPS256,
25647 IX86_BUILTIN_RSQRTPS_NR256,
25649 IX86_BUILTIN_RCPPS256,
25651 IX86_BUILTIN_ROUNDPD256,
25652 IX86_BUILTIN_ROUNDPS256,
25654 IX86_BUILTIN_FLOORPD256,
25655 IX86_BUILTIN_CEILPD256,
25656 IX86_BUILTIN_TRUNCPD256,
25657 IX86_BUILTIN_RINTPD256,
25658 IX86_BUILTIN_ROUNDPD_AZ256,
25660 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
25661 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
25662 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
25664 IX86_BUILTIN_FLOORPS256,
25665 IX86_BUILTIN_CEILPS256,
25666 IX86_BUILTIN_TRUNCPS256,
25667 IX86_BUILTIN_RINTPS256,
25668 IX86_BUILTIN_ROUNDPS_AZ256,
25670 IX86_BUILTIN_FLOORPS_SFIX256,
25671 IX86_BUILTIN_CEILPS_SFIX256,
25672 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
25674 IX86_BUILTIN_UNPCKHPD256,
25675 IX86_BUILTIN_UNPCKLPD256,
25676 IX86_BUILTIN_UNPCKHPS256,
25677 IX86_BUILTIN_UNPCKLPS256,
25679 IX86_BUILTIN_SI256_SI,
25680 IX86_BUILTIN_PS256_PS,
25681 IX86_BUILTIN_PD256_PD,
25682 IX86_BUILTIN_SI_SI256,
25683 IX86_BUILTIN_PS_PS256,
25684 IX86_BUILTIN_PD_PD256,
25686 IX86_BUILTIN_VTESTZPD,
25687 IX86_BUILTIN_VTESTCPD,
25688 IX86_BUILTIN_VTESTNZCPD,
25689 IX86_BUILTIN_VTESTZPS,
25690 IX86_BUILTIN_VTESTCPS,
25691 IX86_BUILTIN_VTESTNZCPS,
25692 IX86_BUILTIN_VTESTZPD256,
25693 IX86_BUILTIN_VTESTCPD256,
25694 IX86_BUILTIN_VTESTNZCPD256,
25695 IX86_BUILTIN_VTESTZPS256,
25696 IX86_BUILTIN_VTESTCPS256,
25697 IX86_BUILTIN_VTESTNZCPS256,
25698 IX86_BUILTIN_PTESTZ256,
25699 IX86_BUILTIN_PTESTC256,
25700 IX86_BUILTIN_PTESTNZC256,
25702 IX86_BUILTIN_MOVMSKPD256,
25703 IX86_BUILTIN_MOVMSKPS256,
25706 IX86_BUILTIN_MPSADBW256,
25707 IX86_BUILTIN_PABSB256,
25708 IX86_BUILTIN_PABSW256,
25709 IX86_BUILTIN_PABSD256,
25710 IX86_BUILTIN_PACKSSDW256,
25711 IX86_BUILTIN_PACKSSWB256,
25712 IX86_BUILTIN_PACKUSDW256,
25713 IX86_BUILTIN_PACKUSWB256,
25714 IX86_BUILTIN_PADDB256,
25715 IX86_BUILTIN_PADDW256,
25716 IX86_BUILTIN_PADDD256,
25717 IX86_BUILTIN_PADDQ256,
25718 IX86_BUILTIN_PADDSB256,
25719 IX86_BUILTIN_PADDSW256,
25720 IX86_BUILTIN_PADDUSB256,
25721 IX86_BUILTIN_PADDUSW256,
25722 IX86_BUILTIN_PALIGNR256,
25723 IX86_BUILTIN_AND256I,
25724 IX86_BUILTIN_ANDNOT256I,
25725 IX86_BUILTIN_PAVGB256,
25726 IX86_BUILTIN_PAVGW256,
25727 IX86_BUILTIN_PBLENDVB256,
25728 IX86_BUILTIN_PBLENDVW256,
25729 IX86_BUILTIN_PCMPEQB256,
25730 IX86_BUILTIN_PCMPEQW256,
25731 IX86_BUILTIN_PCMPEQD256,
25732 IX86_BUILTIN_PCMPEQQ256,
25733 IX86_BUILTIN_PCMPGTB256,
25734 IX86_BUILTIN_PCMPGTW256,
25735 IX86_BUILTIN_PCMPGTD256,
25736 IX86_BUILTIN_PCMPGTQ256,
25737 IX86_BUILTIN_PHADDW256,
25738 IX86_BUILTIN_PHADDD256,
25739 IX86_BUILTIN_PHADDSW256,
25740 IX86_BUILTIN_PHSUBW256,
25741 IX86_BUILTIN_PHSUBD256,
25742 IX86_BUILTIN_PHSUBSW256,
25743 IX86_BUILTIN_PMADDUBSW256,
25744 IX86_BUILTIN_PMADDWD256,
25745 IX86_BUILTIN_PMAXSB256,
25746 IX86_BUILTIN_PMAXSW256,
25747 IX86_BUILTIN_PMAXSD256,
25748 IX86_BUILTIN_PMAXUB256,
25749 IX86_BUILTIN_PMAXUW256,
25750 IX86_BUILTIN_PMAXUD256,
25751 IX86_BUILTIN_PMINSB256,
25752 IX86_BUILTIN_PMINSW256,
25753 IX86_BUILTIN_PMINSD256,
25754 IX86_BUILTIN_PMINUB256,
25755 IX86_BUILTIN_PMINUW256,
25756 IX86_BUILTIN_PMINUD256,
25757 IX86_BUILTIN_PMOVMSKB256,
25758 IX86_BUILTIN_PMOVSXBW256,
25759 IX86_BUILTIN_PMOVSXBD256,
25760 IX86_BUILTIN_PMOVSXBQ256,
25761 IX86_BUILTIN_PMOVSXWD256,
25762 IX86_BUILTIN_PMOVSXWQ256,
25763 IX86_BUILTIN_PMOVSXDQ256,
25764 IX86_BUILTIN_PMOVZXBW256,
25765 IX86_BUILTIN_PMOVZXBD256,
25766 IX86_BUILTIN_PMOVZXBQ256,
25767 IX86_BUILTIN_PMOVZXWD256,
25768 IX86_BUILTIN_PMOVZXWQ256,
25769 IX86_BUILTIN_PMOVZXDQ256,
25770 IX86_BUILTIN_PMULDQ256,
25771 IX86_BUILTIN_PMULHRSW256,
25772 IX86_BUILTIN_PMULHUW256,
25773 IX86_BUILTIN_PMULHW256,
25774 IX86_BUILTIN_PMULLW256,
25775 IX86_BUILTIN_PMULLD256,
25776 IX86_BUILTIN_PMULUDQ256,
25777 IX86_BUILTIN_POR256,
25778 IX86_BUILTIN_PSADBW256,
25779 IX86_BUILTIN_PSHUFB256,
25780 IX86_BUILTIN_PSHUFD256,
25781 IX86_BUILTIN_PSHUFHW256,
25782 IX86_BUILTIN_PSHUFLW256,
25783 IX86_BUILTIN_PSIGNB256,
25784 IX86_BUILTIN_PSIGNW256,
25785 IX86_BUILTIN_PSIGND256,
25786 IX86_BUILTIN_PSLLDQI256,
25787 IX86_BUILTIN_PSLLWI256,
25788 IX86_BUILTIN_PSLLW256,
25789 IX86_BUILTIN_PSLLDI256,
25790 IX86_BUILTIN_PSLLD256,
25791 IX86_BUILTIN_PSLLQI256,
25792 IX86_BUILTIN_PSLLQ256,
25793 IX86_BUILTIN_PSRAWI256,
25794 IX86_BUILTIN_PSRAW256,
25795 IX86_BUILTIN_PSRADI256,
25796 IX86_BUILTIN_PSRAD256,
25797 IX86_BUILTIN_PSRLDQI256,
25798 IX86_BUILTIN_PSRLWI256,
25799 IX86_BUILTIN_PSRLW256,
25800 IX86_BUILTIN_PSRLDI256,
25801 IX86_BUILTIN_PSRLD256,
25802 IX86_BUILTIN_PSRLQI256,
25803 IX86_BUILTIN_PSRLQ256,
25804 IX86_BUILTIN_PSUBB256,
25805 IX86_BUILTIN_PSUBW256,
25806 IX86_BUILTIN_PSUBD256,
25807 IX86_BUILTIN_PSUBQ256,
25808 IX86_BUILTIN_PSUBSB256,
25809 IX86_BUILTIN_PSUBSW256,
25810 IX86_BUILTIN_PSUBUSB256,
25811 IX86_BUILTIN_PSUBUSW256,
25812 IX86_BUILTIN_PUNPCKHBW256,
25813 IX86_BUILTIN_PUNPCKHWD256,
25814 IX86_BUILTIN_PUNPCKHDQ256,
25815 IX86_BUILTIN_PUNPCKHQDQ256,
25816 IX86_BUILTIN_PUNPCKLBW256,
25817 IX86_BUILTIN_PUNPCKLWD256,
25818 IX86_BUILTIN_PUNPCKLDQ256,
25819 IX86_BUILTIN_PUNPCKLQDQ256,
25820 IX86_BUILTIN_PXOR256,
25821 IX86_BUILTIN_MOVNTDQA256,
25822 IX86_BUILTIN_VBROADCASTSS_PS,
25823 IX86_BUILTIN_VBROADCASTSS_PS256,
25824 IX86_BUILTIN_VBROADCASTSD_PD256,
25825 IX86_BUILTIN_VBROADCASTSI256,
25826 IX86_BUILTIN_PBLENDD256,
25827 IX86_BUILTIN_PBLENDD128,
25828 IX86_BUILTIN_PBROADCASTB256,
25829 IX86_BUILTIN_PBROADCASTW256,
25830 IX86_BUILTIN_PBROADCASTD256,
25831 IX86_BUILTIN_PBROADCASTQ256,
25832 IX86_BUILTIN_PBROADCASTB128,
25833 IX86_BUILTIN_PBROADCASTW128,
25834 IX86_BUILTIN_PBROADCASTD128,
25835 IX86_BUILTIN_PBROADCASTQ128,
25836 IX86_BUILTIN_VPERMVARSI256,
25837 IX86_BUILTIN_VPERMDF256,
25838 IX86_BUILTIN_VPERMVARSF256,
25839 IX86_BUILTIN_VPERMDI256,
25840 IX86_BUILTIN_VPERMTI256,
25841 IX86_BUILTIN_VEXTRACT128I256,
25842 IX86_BUILTIN_VINSERT128I256,
25843 IX86_BUILTIN_MASKLOADD,
25844 IX86_BUILTIN_MASKLOADQ,
25845 IX86_BUILTIN_MASKLOADD256,
25846 IX86_BUILTIN_MASKLOADQ256,
25847 IX86_BUILTIN_MASKSTORED,
25848 IX86_BUILTIN_MASKSTOREQ,
25849 IX86_BUILTIN_MASKSTORED256,
25850 IX86_BUILTIN_MASKSTOREQ256,
25851 IX86_BUILTIN_PSLLVV4DI,
25852 IX86_BUILTIN_PSLLVV2DI,
25853 IX86_BUILTIN_PSLLVV8SI,
25854 IX86_BUILTIN_PSLLVV4SI,
25855 IX86_BUILTIN_PSRAVV8SI,
25856 IX86_BUILTIN_PSRAVV4SI,
25857 IX86_BUILTIN_PSRLVV4DI,
25858 IX86_BUILTIN_PSRLVV2DI,
25859 IX86_BUILTIN_PSRLVV8SI,
25860 IX86_BUILTIN_PSRLVV4SI,
25862 IX86_BUILTIN_GATHERSIV2DF,
25863 IX86_BUILTIN_GATHERSIV4DF,
25864 IX86_BUILTIN_GATHERDIV2DF,
25865 IX86_BUILTIN_GATHERDIV4DF,
25866 IX86_BUILTIN_GATHERSIV4SF,
25867 IX86_BUILTIN_GATHERSIV8SF,
25868 IX86_BUILTIN_GATHERDIV4SF,
25869 IX86_BUILTIN_GATHERDIV8SF,
25870 IX86_BUILTIN_GATHERSIV2DI,
25871 IX86_BUILTIN_GATHERSIV4DI,
25872 IX86_BUILTIN_GATHERDIV2DI,
25873 IX86_BUILTIN_GATHERDIV4DI,
25874 IX86_BUILTIN_GATHERSIV4SI,
25875 IX86_BUILTIN_GATHERSIV8SI,
25876 IX86_BUILTIN_GATHERDIV4SI,
25877 IX86_BUILTIN_GATHERDIV8SI,
25879 /* Alternate 4 element gather for the vectorizer where
25880 all operands are 32-byte wide. */
25881 IX86_BUILTIN_GATHERALTSIV4DF,
25882 IX86_BUILTIN_GATHERALTDIV8SF,
25883 IX86_BUILTIN_GATHERALTSIV4DI,
25884 IX86_BUILTIN_GATHERALTDIV8SI,
25886 /* TFmode support builtins. */
25888 IX86_BUILTIN_HUGE_VALQ,
25889 IX86_BUILTIN_FABSQ,
25890 IX86_BUILTIN_COPYSIGNQ,
25892 /* Vectorizer support builtins. */
25893 IX86_BUILTIN_CPYSGNPS,
25894 IX86_BUILTIN_CPYSGNPD,
25895 IX86_BUILTIN_CPYSGNPS256,
25896 IX86_BUILTIN_CPYSGNPD256,
25898 /* FMA4 instructions. */
25899 IX86_BUILTIN_VFMADDSS,
25900 IX86_BUILTIN_VFMADDSD,
25901 IX86_BUILTIN_VFMADDPS,
25902 IX86_BUILTIN_VFMADDPD,
25903 IX86_BUILTIN_VFMADDPS256,
25904 IX86_BUILTIN_VFMADDPD256,
25905 IX86_BUILTIN_VFMADDSUBPS,
25906 IX86_BUILTIN_VFMADDSUBPD,
25907 IX86_BUILTIN_VFMADDSUBPS256,
25908 IX86_BUILTIN_VFMADDSUBPD256,
25910 /* FMA3 instructions. */
25911 IX86_BUILTIN_VFMADDSS3,
25912 IX86_BUILTIN_VFMADDSD3,
25914 /* XOP instructions. */
25915 IX86_BUILTIN_VPCMOV,
25916 IX86_BUILTIN_VPCMOV_V2DI,
25917 IX86_BUILTIN_VPCMOV_V4SI,
25918 IX86_BUILTIN_VPCMOV_V8HI,
25919 IX86_BUILTIN_VPCMOV_V16QI,
25920 IX86_BUILTIN_VPCMOV_V4SF,
25921 IX86_BUILTIN_VPCMOV_V2DF,
25922 IX86_BUILTIN_VPCMOV256,
25923 IX86_BUILTIN_VPCMOV_V4DI256,
25924 IX86_BUILTIN_VPCMOV_V8SI256,
25925 IX86_BUILTIN_VPCMOV_V16HI256,
25926 IX86_BUILTIN_VPCMOV_V32QI256,
25927 IX86_BUILTIN_VPCMOV_V8SF256,
25928 IX86_BUILTIN_VPCMOV_V4DF256,
25930 IX86_BUILTIN_VPPERM,
25932 IX86_BUILTIN_VPMACSSWW,
25933 IX86_BUILTIN_VPMACSWW,
25934 IX86_BUILTIN_VPMACSSWD,
25935 IX86_BUILTIN_VPMACSWD,
25936 IX86_BUILTIN_VPMACSSDD,
25937 IX86_BUILTIN_VPMACSDD,
25938 IX86_BUILTIN_VPMACSSDQL,
25939 IX86_BUILTIN_VPMACSSDQH,
25940 IX86_BUILTIN_VPMACSDQL,
25941 IX86_BUILTIN_VPMACSDQH,
25942 IX86_BUILTIN_VPMADCSSWD,
25943 IX86_BUILTIN_VPMADCSWD,
25945 IX86_BUILTIN_VPHADDBW,
25946 IX86_BUILTIN_VPHADDBD,
25947 IX86_BUILTIN_VPHADDBQ,
25948 IX86_BUILTIN_VPHADDWD,
25949 IX86_BUILTIN_VPHADDWQ,
25950 IX86_BUILTIN_VPHADDDQ,
25951 IX86_BUILTIN_VPHADDUBW,
25952 IX86_BUILTIN_VPHADDUBD,
25953 IX86_BUILTIN_VPHADDUBQ,
25954 IX86_BUILTIN_VPHADDUWD,
25955 IX86_BUILTIN_VPHADDUWQ,
25956 IX86_BUILTIN_VPHADDUDQ,
25957 IX86_BUILTIN_VPHSUBBW,
25958 IX86_BUILTIN_VPHSUBWD,
25959 IX86_BUILTIN_VPHSUBDQ,
25961 IX86_BUILTIN_VPROTB,
25962 IX86_BUILTIN_VPROTW,
25963 IX86_BUILTIN_VPROTD,
25964 IX86_BUILTIN_VPROTQ,
25965 IX86_BUILTIN_VPROTB_IMM,
25966 IX86_BUILTIN_VPROTW_IMM,
25967 IX86_BUILTIN_VPROTD_IMM,
25968 IX86_BUILTIN_VPROTQ_IMM,
25970 IX86_BUILTIN_VPSHLB,
25971 IX86_BUILTIN_VPSHLW,
25972 IX86_BUILTIN_VPSHLD,
25973 IX86_BUILTIN_VPSHLQ,
25974 IX86_BUILTIN_VPSHAB,
25975 IX86_BUILTIN_VPSHAW,
25976 IX86_BUILTIN_VPSHAD,
25977 IX86_BUILTIN_VPSHAQ,
25979 IX86_BUILTIN_VFRCZSS,
25980 IX86_BUILTIN_VFRCZSD,
25981 IX86_BUILTIN_VFRCZPS,
25982 IX86_BUILTIN_VFRCZPD,
25983 IX86_BUILTIN_VFRCZPS256,
25984 IX86_BUILTIN_VFRCZPD256,
25986 IX86_BUILTIN_VPCOMEQUB,
25987 IX86_BUILTIN_VPCOMNEUB,
25988 IX86_BUILTIN_VPCOMLTUB,
25989 IX86_BUILTIN_VPCOMLEUB,
25990 IX86_BUILTIN_VPCOMGTUB,
25991 IX86_BUILTIN_VPCOMGEUB,
25992 IX86_BUILTIN_VPCOMFALSEUB,
25993 IX86_BUILTIN_VPCOMTRUEUB,
25995 IX86_BUILTIN_VPCOMEQUW,
25996 IX86_BUILTIN_VPCOMNEUW,
25997 IX86_BUILTIN_VPCOMLTUW,
25998 IX86_BUILTIN_VPCOMLEUW,
25999 IX86_BUILTIN_VPCOMGTUW,
26000 IX86_BUILTIN_VPCOMGEUW,
26001 IX86_BUILTIN_VPCOMFALSEUW,
26002 IX86_BUILTIN_VPCOMTRUEUW,
26004 IX86_BUILTIN_VPCOMEQUD,
26005 IX86_BUILTIN_VPCOMNEUD,
26006 IX86_BUILTIN_VPCOMLTUD,
26007 IX86_BUILTIN_VPCOMLEUD,
26008 IX86_BUILTIN_VPCOMGTUD,
26009 IX86_BUILTIN_VPCOMGEUD,
26010 IX86_BUILTIN_VPCOMFALSEUD,
26011 IX86_BUILTIN_VPCOMTRUEUD,
26013 IX86_BUILTIN_VPCOMEQUQ,
26014 IX86_BUILTIN_VPCOMNEUQ,
26015 IX86_BUILTIN_VPCOMLTUQ,
26016 IX86_BUILTIN_VPCOMLEUQ,
26017 IX86_BUILTIN_VPCOMGTUQ,
26018 IX86_BUILTIN_VPCOMGEUQ,
26019 IX86_BUILTIN_VPCOMFALSEUQ,
26020 IX86_BUILTIN_VPCOMTRUEUQ,
26022 IX86_BUILTIN_VPCOMEQB,
26023 IX86_BUILTIN_VPCOMNEB,
26024 IX86_BUILTIN_VPCOMLTB,
26025 IX86_BUILTIN_VPCOMLEB,
26026 IX86_BUILTIN_VPCOMGTB,
26027 IX86_BUILTIN_VPCOMGEB,
26028 IX86_BUILTIN_VPCOMFALSEB,
26029 IX86_BUILTIN_VPCOMTRUEB,
26031 IX86_BUILTIN_VPCOMEQW,
26032 IX86_BUILTIN_VPCOMNEW,
26033 IX86_BUILTIN_VPCOMLTW,
26034 IX86_BUILTIN_VPCOMLEW,
26035 IX86_BUILTIN_VPCOMGTW,
26036 IX86_BUILTIN_VPCOMGEW,
26037 IX86_BUILTIN_VPCOMFALSEW,
26038 IX86_BUILTIN_VPCOMTRUEW,
26040 IX86_BUILTIN_VPCOMEQD,
26041 IX86_BUILTIN_VPCOMNED,
26042 IX86_BUILTIN_VPCOMLTD,
26043 IX86_BUILTIN_VPCOMLED,
26044 IX86_BUILTIN_VPCOMGTD,
26045 IX86_BUILTIN_VPCOMGED,
26046 IX86_BUILTIN_VPCOMFALSED,
26047 IX86_BUILTIN_VPCOMTRUED,
26049 IX86_BUILTIN_VPCOMEQQ,
26050 IX86_BUILTIN_VPCOMNEQ,
26051 IX86_BUILTIN_VPCOMLTQ,
26052 IX86_BUILTIN_VPCOMLEQ,
26053 IX86_BUILTIN_VPCOMGTQ,
26054 IX86_BUILTIN_VPCOMGEQ,
26055 IX86_BUILTIN_VPCOMFALSEQ,
26056 IX86_BUILTIN_VPCOMTRUEQ,
26058 /* LWP instructions. */
26059 IX86_BUILTIN_LLWPCB,
26060 IX86_BUILTIN_SLWPCB,
26061 IX86_BUILTIN_LWPVAL32,
26062 IX86_BUILTIN_LWPVAL64,
26063 IX86_BUILTIN_LWPINS32,
26064 IX86_BUILTIN_LWPINS64,
26068 /* BMI instructions. */
26069 IX86_BUILTIN_BEXTR32,
26070 IX86_BUILTIN_BEXTR64,
26073 /* TBM instructions. */
26074 IX86_BUILTIN_BEXTRI32,
26075 IX86_BUILTIN_BEXTRI64,
26077 /* BMI2 instructions. */
26078 IX86_BUILTIN_BZHI32,
26079 IX86_BUILTIN_BZHI64,
26080 IX86_BUILTIN_PDEP32,
26081 IX86_BUILTIN_PDEP64,
26082 IX86_BUILTIN_PEXT32,
26083 IX86_BUILTIN_PEXT64,
26085 /* FSGSBASE instructions. */
26086 IX86_BUILTIN_RDFSBASE32,
26087 IX86_BUILTIN_RDFSBASE64,
26088 IX86_BUILTIN_RDGSBASE32,
26089 IX86_BUILTIN_RDGSBASE64,
26090 IX86_BUILTIN_WRFSBASE32,
26091 IX86_BUILTIN_WRFSBASE64,
26092 IX86_BUILTIN_WRGSBASE32,
26093 IX86_BUILTIN_WRGSBASE64,
26095 /* RDRND instructions. */
26096 IX86_BUILTIN_RDRAND16_STEP,
26097 IX86_BUILTIN_RDRAND32_STEP,
26098 IX86_BUILTIN_RDRAND64_STEP,
26100 /* F16C instructions. */
26101 IX86_BUILTIN_CVTPH2PS,
26102 IX86_BUILTIN_CVTPH2PS256,
26103 IX86_BUILTIN_CVTPS2PH,
26104 IX86_BUILTIN_CVTPS2PH256,
26106 /* CFString built-in for darwin */
26107 IX86_BUILTIN_CFSTRING,
26112 /* Table for the ix86 builtin decls. */
26113 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
26115 /* Table of all of the builtin functions that are possible with different ISA's
26116 but are waiting to be built until a function is declared to use that
26118 struct builtin_isa {
26119 const char *name; /* function name */
26120 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
26121 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
26122 bool const_p; /* true if the declaration is constant */
26123 bool set_and_not_built_p;
26126 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
26129 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
26130 of which isa_flags to use in the ix86_builtins_isa array. Stores the
26131 function decl in the ix86_builtins array. Returns the function decl or
26132 NULL_TREE, if the builtin was not added.
26134 If the front end has a special hook for builtin functions, delay adding
26135 builtin functions that aren't in the current ISA until the ISA is changed
26136 with function specific optimization. Doing so, can save about 300K for the
26137 default compiler. When the builtin is expanded, check at that time whether
26140 If the front end doesn't have a special hook, record all builtins, even if
26141 it isn't an instruction set in the current ISA in case the user uses
26142 function specific options for a different ISA, so that we don't get scope
26143 errors if a builtin is added in the middle of a function scope. */
26146 def_builtin (HOST_WIDE_INT mask, const char *name,
26147 enum ix86_builtin_func_type tcode,
26148 enum ix86_builtins code)
26150 tree decl = NULL_TREE;
26152 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
26154 ix86_builtins_isa[(int) code].isa = mask;
26156 mask &= ~OPTION_MASK_ISA_64BIT;
26158 || (mask & ix86_isa_flags) != 0
26159 || (lang_hooks.builtin_function
26160 == lang_hooks.builtin_function_ext_scope))
26163 tree type = ix86_get_builtin_func_type (tcode);
26164 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
26166 ix86_builtins[(int) code] = decl;
26167 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
26171 ix86_builtins[(int) code] = NULL_TREE;
26172 ix86_builtins_isa[(int) code].tcode = tcode;
26173 ix86_builtins_isa[(int) code].name = name;
26174 ix86_builtins_isa[(int) code].const_p = false;
26175 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
26182 /* Like def_builtin, but also marks the function decl "const". */
26185 def_builtin_const (HOST_WIDE_INT mask, const char *name,
26186 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
26188 tree decl = def_builtin (mask, name, tcode, code);
26190 TREE_READONLY (decl) = 1;
26192 ix86_builtins_isa[(int) code].const_p = true;
26197 /* Add any new builtin functions for a given ISA that may not have been
26198 declared. This saves a bit of space compared to adding all of the
26199 declarations to the tree, even if we didn't use them. */
26202 ix86_add_new_builtins (HOST_WIDE_INT isa)
26206 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
26208 if ((ix86_builtins_isa[i].isa & isa) != 0
26209 && ix86_builtins_isa[i].set_and_not_built_p)
26213 /* Don't define the builtin again. */
26214 ix86_builtins_isa[i].set_and_not_built_p = false;
26216 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
26217 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
26218 type, i, BUILT_IN_MD, NULL,
26221 ix86_builtins[i] = decl;
26222 if (ix86_builtins_isa[i].const_p)
26223 TREE_READONLY (decl) = 1;
26228 /* Bits for builtin_description.flag. */
26230 /* Set when we don't support the comparison natively, and should
26231 swap_comparison in order to support it. */
26232 #define BUILTIN_DESC_SWAP_OPERANDS 1
26234 struct builtin_description
26236 const HOST_WIDE_INT mask;
26237 const enum insn_code icode;
26238 const char *const name;
26239 const enum ix86_builtins code;
26240 const enum rtx_code comparison;
26244 static const struct builtin_description bdesc_comi[] =
26246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
26247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
26248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
26249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
26250 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
26251 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
26252 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
26253 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
26254 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
26255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
26256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
26257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
26258 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
26259 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
26260 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
26261 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
26262 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
26263 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
26264 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
26265 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
26266 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
26267 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
26268 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
26269 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
26272 static const struct builtin_description bdesc_pcmpestr[] =
26275 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
26276 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
26277 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
26278 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
26279 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
26280 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
26281 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
26284 static const struct builtin_description bdesc_pcmpistr[] =
26287 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
26288 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
26289 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
26290 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
26291 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
26292 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
26293 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
26296 /* Special builtins with variable number of arguments. */
26297 static const struct builtin_description bdesc_special_args[] =
26299 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
26300 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
26301 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
26304 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
26307 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
26310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
26311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
26312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
26314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
26315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
26316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
26317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
26319 /* SSE or 3DNow!A */
26320 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26321 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
26324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26325 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
26326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
26328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
26330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
26331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
26332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
26334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
26335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
26338 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
26341 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
26344 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
26345 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
26348 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
26349 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
26351 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
26352 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
26353 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
26354 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
26355 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
26357 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
26358 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
26359 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
26360 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
26361 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
26362 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
26363 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
26365 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
26366 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
26367 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
26369 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
26370 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
26371 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
26372 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
26373 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
26374 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
26375 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
26376 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
26379 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
26380 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
26381 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
26382 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
26383 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
26384 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
26385 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
26386 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
26387 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
26389 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
26390 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
26391 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
26392 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
26393 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
26394 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
26397 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
26398 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
26399 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
26400 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
26401 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
26402 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
26403 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
26404 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
26407 /* Builtins with variable number of arguments. */
26408 static const struct builtin_description bdesc_args[] =
26410 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
26411 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
26412 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
26413 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
26414 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
26415 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
26416 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
26419 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26420 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26421 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26422 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26423 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26424 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26426 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26427 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26428 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26429 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26430 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26431 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26432 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26433 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26435 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26436 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26438 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26439 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26440 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26441 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26443 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26444 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26445 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26446 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26447 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26448 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26450 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26451 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26452 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26453 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26454 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
26455 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
26457 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
26458 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
26459 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
26461 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
26463 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
26464 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
26465 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
26466 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
26467 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
26468 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
26470 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
26471 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
26472 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
26473 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
26474 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
26475 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
26477 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
26478 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
26479 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
26480 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
26483 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
26484 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
26485 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
26486 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
26488 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26489 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26490 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26491 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
26492 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
26493 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
26494 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26495 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26496 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26497 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26498 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26499 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26500 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26501 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26502 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26505 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
26506 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
26507 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
26508 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
26509 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26510 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
26513 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
26514 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26515 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26516 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26517 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26518 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26519 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
26520 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
26521 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
26522 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
26523 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
26524 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
26526 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26528 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26529 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26530 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26531 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26532 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26533 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26534 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26535 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26537 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
26538 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
26539 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
26540 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26541 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26542 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26543 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
26544 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
26545 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
26546 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26547 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
26548 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26549 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
26550 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
26551 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
26552 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26553 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
26554 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
26555 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
26556 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26557 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
26558 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
26560 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26561 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26562 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26563 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26565 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26566 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26567 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26568 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26570 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26572 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26573 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26574 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26575 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26576 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26578 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
26579 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
26580 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
26582 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
26584 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26585 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26586 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
26588 /* SSE MMX or 3Dnow!A */
26589 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26590 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26591 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26593 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26594 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26595 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26596 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26598 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
26599 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
26601 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
26604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
26607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
26608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
26609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
26610 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
26612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
26613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
26614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
26615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
26616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
26618 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
26620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
26621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
26622 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
26623 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
26625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26626 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
26627 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26629 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26630 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26631 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26632 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26633 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
26639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
26640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
26641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
26643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
26645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
26646 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
26647 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
26649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
26651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
26652 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
26653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
26655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
26656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
26657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
26659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26660 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26664 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26665 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26666 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26667 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26669 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26672 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26673 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26675 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
26677 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26678 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26679 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26680 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26681 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26682 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26683 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26684 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26690 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26691 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26692 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26693 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26695 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26696 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
26698 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26700 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26701 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26704 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26707 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26708 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26709 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26710 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26711 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26713 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26714 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26715 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26716 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26718 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26719 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26720 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26721 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26722 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26723 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26724 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26725 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26727 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
26728 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
26729 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
26731 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26732 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
26734 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
26735 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
26737 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
26739 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
26740 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
26741 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
26742 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
26744 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
26745 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26746 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26747 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
26748 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26749 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26750 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
26752 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
26753 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26754 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26755 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
26756 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26757 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26758 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
26760 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
26761 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
26762 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
26763 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
26765 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
26766 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
26767 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
26769 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
26771 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
26772 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
26774 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26777 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
26778 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
26781 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
26782 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26784 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26785 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26786 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26787 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26788 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
26789 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
26792 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
26793 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
26794 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26795 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
26796 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
26797 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
26799 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26800 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26801 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26802 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26803 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26804 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26805 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26806 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26807 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26808 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26809 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26810 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26811 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
26812 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
26813 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26814 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26815 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26816 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26817 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26818 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
26819 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26820 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
26821 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26822 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
26825 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
26826 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
26829 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26830 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26831 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
26832 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
26833 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26834 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26835 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26836 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
26837 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
26838 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
26840 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
26841 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
26842 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
26843 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
26844 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
26845 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
26846 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
26847 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
26848 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
26849 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
26850 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
26851 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
26852 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
26854 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
26855 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26856 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26857 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26858 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26859 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26860 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
26861 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26862 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26863 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
26864 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
26865 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
26868 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
26869 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
26870 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26871 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26873 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
26874 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
26875 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
26876 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
26878 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
26879 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
26881 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
26882 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
26884 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
26885 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
26886 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
26887 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
26889 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
26890 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
26892 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
26893 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
26895 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26896 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26897 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
26900 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26901 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
26902 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
26903 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
26904 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
26907 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
26908 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
26909 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
26910 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26913 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
26914 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
26916 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26917 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26918 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26919 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
26922 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
26925 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26926 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26927 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26928 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26929 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26930 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26931 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26932 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26933 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26934 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26935 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26936 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26937 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26938 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26939 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26940 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26941 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26942 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26943 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26944 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26945 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26946 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26947 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26948 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26949 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
26950 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
26952 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
26953 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
26954 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
26955 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
26957 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26958 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26959 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
26960 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
26961 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26962 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26963 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26964 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26965 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26966 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
26967 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
26968 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26969 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26970 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
26971 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
26972 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
26973 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
26974 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
26975 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
26976 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26977 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
26978 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
26979 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
26980 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
26981 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
26982 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
26983 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
26984 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
26985 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
26986 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
26987 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
26988 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
26989 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
26990 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
26992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26994 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26996 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
26997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26998 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
26999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27000 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
27005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
27007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
27008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
27009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
27010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
27012 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
27013 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
27015 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
27016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
27018 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
27019 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
27020 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
27021 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
27023 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
27024 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
27026 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27027 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
27029 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27030 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27031 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27032 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27034 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
27035 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
27036 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
27037 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
27038 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
27039 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
27041 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
27042 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
27043 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
27044 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
27045 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
27046 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
27047 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
27048 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
27049 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
27050 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
27051 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
27052 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
27053 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
27054 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
27055 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
27057 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
27058 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
27060 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27061 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27063 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
27066 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
27067 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
27068 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
27069 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
27070 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
27071 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
27072 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
27073 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
27074 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27075 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27076 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27077 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27078 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27079 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27080 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27081 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27082 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
27083 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27084 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27085 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27086 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27087 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
27088 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
27089 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27090 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27091 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27092 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27093 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27094 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27095 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27096 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27097 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27098 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27099 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27100 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27101 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27102 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27103 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
27104 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
27105 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27106 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27107 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27108 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27109 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27110 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27111 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27112 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27113 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27114 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27115 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27116 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27117 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
27118 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
27119 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
27120 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
27121 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
27122 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
27123 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
27124 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
27125 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
27126 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
27127 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
27128 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
27129 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
27130 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mulv4siv4di3 , "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
27131 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27132 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27133 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27134 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27135 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27136 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
27137 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27138 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
27139 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27140 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
27141 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
27142 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
27143 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27144 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27145 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27146 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
27147 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
27148 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
27149 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
27150 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
27151 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
27152 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
27153 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
27154 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
27155 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
27156 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
27157 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
27158 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
27159 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
27160 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
27161 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
27162 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
27163 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
27164 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27165 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27166 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27167 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27168 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27169 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27170 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27171 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27172 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27173 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27174 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27175 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27176 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27177 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27178 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27179 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27180 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27181 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27182 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
27183 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
27184 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
27185 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
27186 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
27187 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
27188 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
27189 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
27190 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
27191 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
27192 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
27193 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
27194 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
27195 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27196 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
27197 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27198 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
27199 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
27200 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
27201 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
27202 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27203 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27204 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27205 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27206 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27207 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27208 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27209 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27210 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27211 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27213 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
27216 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
27217 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
27218 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
27221 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
27222 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
27225 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
27226 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
27227 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
27228 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
27231 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
27232 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
27233 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
27234 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
27235 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
27236 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
27239 /* FMA4 and XOP. */
27240 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
27241 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
27242 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
27243 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
27244 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
27245 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
27246 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
27247 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
27248 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
27249 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
27250 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
27251 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
27252 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
27253 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
27254 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
27255 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
27256 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
27257 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
27258 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
27259 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
27260 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
27261 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
27262 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
27263 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
27264 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
27265 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
27266 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
27267 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
27268 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
27269 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
27270 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
27271 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
27272 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
27273 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
27274 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
27275 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
27276 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
27277 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
27278 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
27279 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
27280 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
27281 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
27282 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
27283 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
27284 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
27285 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
27286 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
27287 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
27288 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
27289 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
27290 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
27291 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
27293 static const struct builtin_description bdesc_multi_arg[] =
27295 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
27296 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
27297 UNKNOWN, (int)MULTI_ARG_3_SF },
27298 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
27299 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
27300 UNKNOWN, (int)MULTI_ARG_3_DF },
27302 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
27303 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
27304 UNKNOWN, (int)MULTI_ARG_3_SF },
27305 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
27306 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
27307 UNKNOWN, (int)MULTI_ARG_3_DF },
27309 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
27310 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
27311 UNKNOWN, (int)MULTI_ARG_3_SF },
27312 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
27313 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
27314 UNKNOWN, (int)MULTI_ARG_3_DF },
27315 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
27316 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
27317 UNKNOWN, (int)MULTI_ARG_3_SF2 },
27318 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
27319 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
27320 UNKNOWN, (int)MULTI_ARG_3_DF2 },
27322 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
27323 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
27324 UNKNOWN, (int)MULTI_ARG_3_SF },
27325 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
27326 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
27327 UNKNOWN, (int)MULTI_ARG_3_DF },
27328 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
27329 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
27330 UNKNOWN, (int)MULTI_ARG_3_SF2 },
27331 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
27332 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
27333 UNKNOWN, (int)MULTI_ARG_3_DF2 },
27335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
27336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
27337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
27338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
27339 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
27340 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
27341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
27343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
27344 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
27345 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
27346 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
27347 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
27348 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
27349 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
27351 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
27353 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
27354 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
27355 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27356 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27357 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
27358 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
27359 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27360 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27361 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27362 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
27363 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27364 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
27366 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27367 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
27368 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
27369 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
27370 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
27371 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
27372 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
27373 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
27374 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27375 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
27376 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
27377 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
27378 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
27379 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
27380 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
27381 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
27383 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
27384 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
27385 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
27386 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
27387 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
27388 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
27390 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27391 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
27392 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
27393 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
27395 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27396 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27397 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
27398 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
27399 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
27401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
27403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
27404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
27406 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
27407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
27408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
27409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
27410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
27411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
27412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
27414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
27415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
27416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
27417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
27418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
27419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
27420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
27422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
27423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
27424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
27425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
27426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
27427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
27428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
27430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
27431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
27432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
27433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
27434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
27435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
27436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
27438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
27439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
27440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
27441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
27442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
27443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
27444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
27446 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
27447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
27448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
27449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
27450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
27451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
27452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
27454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
27455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
27456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
27457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
27458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
27459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
27460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
27462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
27463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
27464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
27465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
27466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
27467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
27468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
27470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
27471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
27472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
27473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
27474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
27475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
27476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
27477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
27479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
27480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
27481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
27482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
27483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
27484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
27485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
27486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
27488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
27489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
27490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
27491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
27495 /* TM vector builtins. */
27497 /* Reuse the existing x86-specific `struct builtin_description' cause
27498 we're lazy. Add casts to make them fit. */
27499 static const struct builtin_description bdesc_tm[] =
27501 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
27502 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
27503 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
27504 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27505 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27506 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27507 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
27509 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27510 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27511 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
27512 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27513 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27514 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27515 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
27517 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27518 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27519 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
27520 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27521 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27522 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27523 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
27525 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
27526 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
27527 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
27530 /* TM callbacks. */
27532 /* Return the builtin decl needed to load a vector of TYPE. */
27535 ix86_builtin_tm_load (tree type)
27537 if (TREE_CODE (type) == VECTOR_TYPE)
27539 switch (tree_low_cst (TYPE_SIZE (type), 1))
27542 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
27544 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
27546 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
27552 /* Return the builtin decl needed to store a vector of TYPE. */
27555 ix86_builtin_tm_store (tree type)
27557 if (TREE_CODE (type) == VECTOR_TYPE)
27559 switch (tree_low_cst (TYPE_SIZE (type), 1))
27562 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
27564 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
27566 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
27572 /* Initialize the transactional memory vector load/store builtins. */
27575 ix86_init_tm_builtins (void)
27577 enum ix86_builtin_func_type ftype;
27578 const struct builtin_description *d;
27581 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
27582 tree attrs_log, attrs_type_log;
27587 /* Use whatever attributes a normal TM load has. */
27588 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
27589 attrs_load = DECL_ATTRIBUTES (decl);
27590 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27591 /* Use whatever attributes a normal TM store has. */
27592 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
27593 attrs_store = DECL_ATTRIBUTES (decl);
27594 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27595 /* Use whatever attributes a normal TM log has. */
27596 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
27597 attrs_log = DECL_ATTRIBUTES (decl);
27598 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
27600 for (i = 0, d = bdesc_tm;
27601 i < ARRAY_SIZE (bdesc_tm);
27604 if ((d->mask & ix86_isa_flags) != 0
27605 || (lang_hooks.builtin_function
27606 == lang_hooks.builtin_function_ext_scope))
27608 tree type, attrs, attrs_type;
27609 enum built_in_function code = (enum built_in_function) d->code;
27611 ftype = (enum ix86_builtin_func_type) d->flag;
27612 type = ix86_get_builtin_func_type (ftype);
27614 if (BUILTIN_TM_LOAD_P (code))
27616 attrs = attrs_load;
27617 attrs_type = attrs_type_load;
27619 else if (BUILTIN_TM_STORE_P (code))
27621 attrs = attrs_store;
27622 attrs_type = attrs_type_store;
27627 attrs_type = attrs_type_log;
27629 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
27630 /* The builtin without the prefix for
27631 calling it directly. */
27632 d->name + strlen ("__builtin_"),
27634 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
27635 set the TYPE_ATTRIBUTES. */
27636 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
27638 set_builtin_decl (code, decl, false);
27643 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
27644 in the current target ISA to allow the user to compile particular modules
27645 with different target specific options that differ from the command line
27648 ix86_init_mmx_sse_builtins (void)
27650 const struct builtin_description * d;
27651 enum ix86_builtin_func_type ftype;
27654 /* Add all special builtins with variable number of operands. */
27655 for (i = 0, d = bdesc_special_args;
27656 i < ARRAY_SIZE (bdesc_special_args);
27662 ftype = (enum ix86_builtin_func_type) d->flag;
27663 def_builtin (d->mask, d->name, ftype, d->code);
27666 /* Add all builtins with variable number of operands. */
27667 for (i = 0, d = bdesc_args;
27668 i < ARRAY_SIZE (bdesc_args);
27674 ftype = (enum ix86_builtin_func_type) d->flag;
27675 def_builtin_const (d->mask, d->name, ftype, d->code);
27678 /* pcmpestr[im] insns. */
27679 for (i = 0, d = bdesc_pcmpestr;
27680 i < ARRAY_SIZE (bdesc_pcmpestr);
27683 if (d->code == IX86_BUILTIN_PCMPESTRM128)
27684 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
27686 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
27687 def_builtin_const (d->mask, d->name, ftype, d->code);
27690 /* pcmpistr[im] insns. */
27691 for (i = 0, d = bdesc_pcmpistr;
27692 i < ARRAY_SIZE (bdesc_pcmpistr);
27695 if (d->code == IX86_BUILTIN_PCMPISTRM128)
27696 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
27698 ftype = INT_FTYPE_V16QI_V16QI_INT;
27699 def_builtin_const (d->mask, d->name, ftype, d->code);
27702 /* comi/ucomi insns. */
27703 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27705 if (d->mask == OPTION_MASK_ISA_SSE2)
27706 ftype = INT_FTYPE_V2DF_V2DF;
27708 ftype = INT_FTYPE_V4SF_V4SF;
27709 def_builtin_const (d->mask, d->name, ftype, d->code);
27713 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
27714 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
27715 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
27716 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
27718 /* SSE or 3DNow!A */
27719 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27720 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
27721 IX86_BUILTIN_MASKMOVQ);
27724 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
27725 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
27727 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
27728 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
27729 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
27730 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
27733 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
27734 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
27735 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
27736 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
27739 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
27740 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
27741 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
27742 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
27743 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
27744 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
27745 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
27746 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
27747 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
27748 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
27749 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
27750 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
27753 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
27754 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
27757 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
27758 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
27759 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
27760 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
27761 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
27762 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
27763 IX86_BUILTIN_RDRAND64_STEP);
27766 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
27767 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
27768 IX86_BUILTIN_GATHERSIV2DF);
27770 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
27771 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
27772 IX86_BUILTIN_GATHERSIV4DF);
27774 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
27775 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
27776 IX86_BUILTIN_GATHERDIV2DF);
27778 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
27779 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
27780 IX86_BUILTIN_GATHERDIV4DF);
27782 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
27783 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
27784 IX86_BUILTIN_GATHERSIV4SF);
27786 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
27787 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
27788 IX86_BUILTIN_GATHERSIV8SF);
27790 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
27791 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
27792 IX86_BUILTIN_GATHERDIV4SF);
27794 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
27795 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
27796 IX86_BUILTIN_GATHERDIV8SF);
27798 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
27799 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
27800 IX86_BUILTIN_GATHERSIV2DI);
27802 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
27803 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
27804 IX86_BUILTIN_GATHERSIV4DI);
27806 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
27807 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
27808 IX86_BUILTIN_GATHERDIV2DI);
27810 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
27811 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
27812 IX86_BUILTIN_GATHERDIV4DI);
27814 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
27815 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
27816 IX86_BUILTIN_GATHERSIV4SI);
27818 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
27819 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
27820 IX86_BUILTIN_GATHERSIV8SI);
27822 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
27823 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
27824 IX86_BUILTIN_GATHERDIV4SI);
27826 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
27827 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
27828 IX86_BUILTIN_GATHERDIV8SI);
27830 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
27831 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
27832 IX86_BUILTIN_GATHERALTSIV4DF);
27834 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
27835 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
27836 IX86_BUILTIN_GATHERALTDIV8SF);
27838 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
27839 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
27840 IX86_BUILTIN_GATHERALTSIV4DI);
27842 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
27843 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
27844 IX86_BUILTIN_GATHERALTDIV8SI);
27846 /* MMX access to the vec_init patterns. */
27847 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
27848 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
27850 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
27851 V4HI_FTYPE_HI_HI_HI_HI,
27852 IX86_BUILTIN_VEC_INIT_V4HI);
27854 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
27855 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
27856 IX86_BUILTIN_VEC_INIT_V8QI);
27858 /* Access to the vec_extract patterns. */
27859 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
27860 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
27861 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
27862 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
27863 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
27864 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
27865 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
27866 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
27867 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
27868 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
27870 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27871 "__builtin_ia32_vec_ext_v4hi",
27872 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
27874 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
27875 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
27877 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
27878 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
27880 /* Access to the vec_set patterns. */
27881 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
27882 "__builtin_ia32_vec_set_v2di",
27883 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
27885 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
27886 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
27888 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
27889 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
27891 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
27892 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
27894 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
27895 "__builtin_ia32_vec_set_v4hi",
27896 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
27898 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
27899 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
27901 /* Add FMA4 multi-arg argument instructions */
27902 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27907 ftype = (enum ix86_builtin_func_type) d->flag;
27908 def_builtin_const (d->mask, d->name, ftype, d->code);
27912 /* Internal method for ix86_init_builtins. */
27915 ix86_init_builtins_va_builtins_abi (void)
27917 tree ms_va_ref, sysv_va_ref;
27918 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
27919 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
27920 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
27921 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
27925 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
27926 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
27927 ms_va_ref = build_reference_type (ms_va_list_type_node);
27929 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
27932 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
27933 fnvoid_va_start_ms =
27934 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
27935 fnvoid_va_end_sysv =
27936 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
27937 fnvoid_va_start_sysv =
27938 build_varargs_function_type_list (void_type_node, sysv_va_ref,
27940 fnvoid_va_copy_ms =
27941 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
27943 fnvoid_va_copy_sysv =
27944 build_function_type_list (void_type_node, sysv_va_ref,
27945 sysv_va_ref, NULL_TREE);
27947 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
27948 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
27949 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
27950 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
27951 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
27952 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
27953 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
27954 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27955 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
27956 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27957 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
27958 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
27962 ix86_init_builtin_types (void)
27964 tree float128_type_node, float80_type_node;
27966 /* The __float80 type. */
27967 float80_type_node = long_double_type_node;
27968 if (TYPE_MODE (float80_type_node) != XFmode)
27970 /* The __float80 type. */
27971 float80_type_node = make_node (REAL_TYPE);
27973 TYPE_PRECISION (float80_type_node) = 80;
27974 layout_type (float80_type_node);
27976 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
27978 /* The __float128 type. */
27979 float128_type_node = make_node (REAL_TYPE);
27980 TYPE_PRECISION (float128_type_node) = 128;
27981 layout_type (float128_type_node);
27982 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
27984 /* This macro is built by i386-builtin-types.awk. */
27985 DEFINE_BUILTIN_PRIMITIVE_TYPES;
27989 ix86_init_builtins (void)
27993 ix86_init_builtin_types ();
27995 /* TFmode support builtins. */
27996 def_builtin_const (0, "__builtin_infq",
27997 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
27998 def_builtin_const (0, "__builtin_huge_valq",
27999 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
28001 /* We will expand them to normal call if SSE2 isn't available since
28002 they are used by libgcc. */
28003 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
28004 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
28005 BUILT_IN_MD, "__fabstf2", NULL_TREE);
28006 TREE_READONLY (t) = 1;
28007 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
28009 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
28010 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
28011 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
28012 TREE_READONLY (t) = 1;
28013 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
28015 ix86_init_tm_builtins ();
28016 ix86_init_mmx_sse_builtins ();
28019 ix86_init_builtins_va_builtins_abi ();
28021 #ifdef SUBTARGET_INIT_BUILTINS
28022 SUBTARGET_INIT_BUILTINS;
28026 /* Return the ix86 builtin for CODE. */
28029 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
28031 if (code >= IX86_BUILTIN_MAX)
28032 return error_mark_node;
28034 return ix86_builtins[code];
28037 /* Errors in the source file can cause expand_expr to return const0_rtx
28038 where we expect a vector. To avoid crashing, use one of the vector
28039 clear instructions. */
28041 safe_vector_operand (rtx x, enum machine_mode mode)
28043 if (x == const0_rtx)
28044 x = CONST0_RTX (mode);
28048 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
28051 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
28054 tree arg0 = CALL_EXPR_ARG (exp, 0);
28055 tree arg1 = CALL_EXPR_ARG (exp, 1);
28056 rtx op0 = expand_normal (arg0);
28057 rtx op1 = expand_normal (arg1);
28058 enum machine_mode tmode = insn_data[icode].operand[0].mode;
28059 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
28060 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
28062 if (VECTOR_MODE_P (mode0))
28063 op0 = safe_vector_operand (op0, mode0);
28064 if (VECTOR_MODE_P (mode1))
28065 op1 = safe_vector_operand (op1, mode1);
28067 if (optimize || !target
28068 || GET_MODE (target) != tmode
28069 || !insn_data[icode].operand[0].predicate (target, tmode))
28070 target = gen_reg_rtx (tmode);
28072 if (GET_MODE (op1) == SImode && mode1 == TImode)
28074 rtx x = gen_reg_rtx (V4SImode);
28075 emit_insn (gen_sse2_loadd (x, op1));
28076 op1 = gen_lowpart (TImode, x);
28079 if (!insn_data[icode].operand[1].predicate (op0, mode0))
28080 op0 = copy_to_mode_reg (mode0, op0);
28081 if (!insn_data[icode].operand[2].predicate (op1, mode1))
28082 op1 = copy_to_mode_reg (mode1, op1);
28084 pat = GEN_FCN (icode) (target, op0, op1);
28093 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
28096 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
28097 enum ix86_builtin_func_type m_type,
28098 enum rtx_code sub_code)
28103 bool comparison_p = false;
28105 bool last_arg_constant = false;
28106 int num_memory = 0;
28109 enum machine_mode mode;
28112 enum machine_mode tmode = insn_data[icode].operand[0].mode;
28116 case MULTI_ARG_4_DF2_DI_I:
28117 case MULTI_ARG_4_DF2_DI_I1:
28118 case MULTI_ARG_4_SF2_SI_I:
28119 case MULTI_ARG_4_SF2_SI_I1:
28121 last_arg_constant = true;
28124 case MULTI_ARG_3_SF:
28125 case MULTI_ARG_3_DF:
28126 case MULTI_ARG_3_SF2:
28127 case MULTI_ARG_3_DF2:
28128 case MULTI_ARG_3_DI:
28129 case MULTI_ARG_3_SI:
28130 case MULTI_ARG_3_SI_DI:
28131 case MULTI_ARG_3_HI:
28132 case MULTI_ARG_3_HI_SI:
28133 case MULTI_ARG_3_QI:
28134 case MULTI_ARG_3_DI2:
28135 case MULTI_ARG_3_SI2:
28136 case MULTI_ARG_3_HI2:
28137 case MULTI_ARG_3_QI2:
28141 case MULTI_ARG_2_SF:
28142 case MULTI_ARG_2_DF:
28143 case MULTI_ARG_2_DI:
28144 case MULTI_ARG_2_SI:
28145 case MULTI_ARG_2_HI:
28146 case MULTI_ARG_2_QI:
28150 case MULTI_ARG_2_DI_IMM:
28151 case MULTI_ARG_2_SI_IMM:
28152 case MULTI_ARG_2_HI_IMM:
28153 case MULTI_ARG_2_QI_IMM:
28155 last_arg_constant = true;
28158 case MULTI_ARG_1_SF:
28159 case MULTI_ARG_1_DF:
28160 case MULTI_ARG_1_SF2:
28161 case MULTI_ARG_1_DF2:
28162 case MULTI_ARG_1_DI:
28163 case MULTI_ARG_1_SI:
28164 case MULTI_ARG_1_HI:
28165 case MULTI_ARG_1_QI:
28166 case MULTI_ARG_1_SI_DI:
28167 case MULTI_ARG_1_HI_DI:
28168 case MULTI_ARG_1_HI_SI:
28169 case MULTI_ARG_1_QI_DI:
28170 case MULTI_ARG_1_QI_SI:
28171 case MULTI_ARG_1_QI_HI:
28175 case MULTI_ARG_2_DI_CMP:
28176 case MULTI_ARG_2_SI_CMP:
28177 case MULTI_ARG_2_HI_CMP:
28178 case MULTI_ARG_2_QI_CMP:
28180 comparison_p = true;
28183 case MULTI_ARG_2_SF_TF:
28184 case MULTI_ARG_2_DF_TF:
28185 case MULTI_ARG_2_DI_TF:
28186 case MULTI_ARG_2_SI_TF:
28187 case MULTI_ARG_2_HI_TF:
28188 case MULTI_ARG_2_QI_TF:
28194 gcc_unreachable ();
28197 if (optimize || !target
28198 || GET_MODE (target) != tmode
28199 || !insn_data[icode].operand[0].predicate (target, tmode))
28200 target = gen_reg_rtx (tmode);
28202 gcc_assert (nargs <= 4);
28204 for (i = 0; i < nargs; i++)
28206 tree arg = CALL_EXPR_ARG (exp, i);
28207 rtx op = expand_normal (arg);
28208 int adjust = (comparison_p) ? 1 : 0;
28209 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
28211 if (last_arg_constant && i == nargs - 1)
28213 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
28215 enum insn_code new_icode = icode;
28218 case CODE_FOR_xop_vpermil2v2df3:
28219 case CODE_FOR_xop_vpermil2v4sf3:
28220 case CODE_FOR_xop_vpermil2v4df3:
28221 case CODE_FOR_xop_vpermil2v8sf3:
28222 error ("the last argument must be a 2-bit immediate");
28223 return gen_reg_rtx (tmode);
28224 case CODE_FOR_xop_rotlv2di3:
28225 new_icode = CODE_FOR_rotlv2di3;
28227 case CODE_FOR_xop_rotlv4si3:
28228 new_icode = CODE_FOR_rotlv4si3;
28230 case CODE_FOR_xop_rotlv8hi3:
28231 new_icode = CODE_FOR_rotlv8hi3;
28233 case CODE_FOR_xop_rotlv16qi3:
28234 new_icode = CODE_FOR_rotlv16qi3;
28236 if (CONST_INT_P (op))
28238 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
28239 op = GEN_INT (INTVAL (op) & mask);
28240 gcc_checking_assert
28241 (insn_data[icode].operand[i + 1].predicate (op, mode));
28245 gcc_checking_assert
28247 && insn_data[new_icode].operand[0].mode == tmode
28248 && insn_data[new_icode].operand[1].mode == tmode
28249 && insn_data[new_icode].operand[2].mode == mode
28250 && insn_data[new_icode].operand[0].predicate
28251 == insn_data[icode].operand[0].predicate
28252 && insn_data[new_icode].operand[1].predicate
28253 == insn_data[icode].operand[1].predicate);
28259 gcc_unreachable ();
28266 if (VECTOR_MODE_P (mode))
28267 op = safe_vector_operand (op, mode);
28269 /* If we aren't optimizing, only allow one memory operand to be
28271 if (memory_operand (op, mode))
28274 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
28277 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
28279 op = force_reg (mode, op);
28283 args[i].mode = mode;
28289 pat = GEN_FCN (icode) (target, args[0].op);
28294 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
28295 GEN_INT ((int)sub_code));
28296 else if (! comparison_p)
28297 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
28300 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
28304 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
28309 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
28313 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
28317 gcc_unreachable ();
28327 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
28328 insns with vec_merge. */
28331 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
28335 tree arg0 = CALL_EXPR_ARG (exp, 0);
28336 rtx op1, op0 = expand_normal (arg0);
28337 enum machine_mode tmode = insn_data[icode].operand[0].mode;
28338 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
28340 if (optimize || !target
28341 || GET_MODE (target) != tmode
28342 || !insn_data[icode].operand[0].predicate (target, tmode))
28343 target = gen_reg_rtx (tmode);
28345 if (VECTOR_MODE_P (mode0))
28346 op0 = safe_vector_operand (op0, mode0);
28348 if ((optimize && !register_operand (op0, mode0))
28349 || !insn_data[icode].operand[1].predicate (op0, mode0))
28350 op0 = copy_to_mode_reg (mode0, op0);
28353 if (!insn_data[icode].operand[2].predicate (op1, mode0))
28354 op1 = copy_to_mode_reg (mode0, op1);
28356 pat = GEN_FCN (icode) (target, op0, op1);
28363 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
28366 ix86_expand_sse_compare (const struct builtin_description *d,
28367 tree exp, rtx target, bool swap)
28370 tree arg0 = CALL_EXPR_ARG (exp, 0);
28371 tree arg1 = CALL_EXPR_ARG (exp, 1);
28372 rtx op0 = expand_normal (arg0);
28373 rtx op1 = expand_normal (arg1);
28375 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
28376 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
28377 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
28378 enum rtx_code comparison = d->comparison;
28380 if (VECTOR_MODE_P (mode0))
28381 op0 = safe_vector_operand (op0, mode0);
28382 if (VECTOR_MODE_P (mode1))
28383 op1 = safe_vector_operand (op1, mode1);
28385 /* Swap operands if we have a comparison that isn't available in
28389 rtx tmp = gen_reg_rtx (mode1);
28390 emit_move_insn (tmp, op1);
28395 if (optimize || !target
28396 || GET_MODE (target) != tmode
28397 || !insn_data[d->icode].operand[0].predicate (target, tmode))
28398 target = gen_reg_rtx (tmode);
28400 if ((optimize && !register_operand (op0, mode0))
28401 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
28402 op0 = copy_to_mode_reg (mode0, op0);
28403 if ((optimize && !register_operand (op1, mode1))
28404 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
28405 op1 = copy_to_mode_reg (mode1, op1);
28407 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
28408 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
28415 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
28418 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
28422 tree arg0 = CALL_EXPR_ARG (exp, 0);
28423 tree arg1 = CALL_EXPR_ARG (exp, 1);
28424 rtx op0 = expand_normal (arg0);
28425 rtx op1 = expand_normal (arg1);
28426 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
28427 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
28428 enum rtx_code comparison = d->comparison;
28430 if (VECTOR_MODE_P (mode0))
28431 op0 = safe_vector_operand (op0, mode0);
28432 if (VECTOR_MODE_P (mode1))
28433 op1 = safe_vector_operand (op1, mode1);
28435 /* Swap operands if we have a comparison that isn't available in
28437 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
28444 target = gen_reg_rtx (SImode);
28445 emit_move_insn (target, const0_rtx);
28446 target = gen_rtx_SUBREG (QImode, target, 0);
28448 if ((optimize && !register_operand (op0, mode0))
28449 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28450 op0 = copy_to_mode_reg (mode0, op0);
28451 if ((optimize && !register_operand (op1, mode1))
28452 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28453 op1 = copy_to_mode_reg (mode1, op1);
28455 pat = GEN_FCN (d->icode) (op0, op1);
28459 emit_insn (gen_rtx_SET (VOIDmode,
28460 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28461 gen_rtx_fmt_ee (comparison, QImode,
28465 return SUBREG_REG (target);
28468 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
28471 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
28475 tree arg0 = CALL_EXPR_ARG (exp, 0);
28476 rtx op1, op0 = expand_normal (arg0);
28477 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
28478 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
28480 if (optimize || target == 0
28481 || GET_MODE (target) != tmode
28482 || !insn_data[d->icode].operand[0].predicate (target, tmode))
28483 target = gen_reg_rtx (tmode);
28485 if (VECTOR_MODE_P (mode0))
28486 op0 = safe_vector_operand (op0, mode0);
28488 if ((optimize && !register_operand (op0, mode0))
28489 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28490 op0 = copy_to_mode_reg (mode0, op0);
28492 op1 = GEN_INT (d->comparison);
28494 pat = GEN_FCN (d->icode) (target, op0, op1);
28502 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
28503 tree exp, rtx target)
28506 tree arg0 = CALL_EXPR_ARG (exp, 0);
28507 tree arg1 = CALL_EXPR_ARG (exp, 1);
28508 rtx op0 = expand_normal (arg0);
28509 rtx op1 = expand_normal (arg1);
28511 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
28512 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
28513 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
28515 if (optimize || target == 0
28516 || GET_MODE (target) != tmode
28517 || !insn_data[d->icode].operand[0].predicate (target, tmode))
28518 target = gen_reg_rtx (tmode);
28520 op0 = safe_vector_operand (op0, mode0);
28521 op1 = safe_vector_operand (op1, mode1);
28523 if ((optimize && !register_operand (op0, mode0))
28524 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28525 op0 = copy_to_mode_reg (mode0, op0);
28526 if ((optimize && !register_operand (op1, mode1))
28527 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28528 op1 = copy_to_mode_reg (mode1, op1);
28530 op2 = GEN_INT (d->comparison);
28532 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
28539 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
28542 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
28546 tree arg0 = CALL_EXPR_ARG (exp, 0);
28547 tree arg1 = CALL_EXPR_ARG (exp, 1);
28548 rtx op0 = expand_normal (arg0);
28549 rtx op1 = expand_normal (arg1);
28550 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
28551 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
28552 enum rtx_code comparison = d->comparison;
28554 if (VECTOR_MODE_P (mode0))
28555 op0 = safe_vector_operand (op0, mode0);
28556 if (VECTOR_MODE_P (mode1))
28557 op1 = safe_vector_operand (op1, mode1);
28559 target = gen_reg_rtx (SImode);
28560 emit_move_insn (target, const0_rtx);
28561 target = gen_rtx_SUBREG (QImode, target, 0);
28563 if ((optimize && !register_operand (op0, mode0))
28564 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
28565 op0 = copy_to_mode_reg (mode0, op0);
28566 if ((optimize && !register_operand (op1, mode1))
28567 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
28568 op1 = copy_to_mode_reg (mode1, op1);
28570 pat = GEN_FCN (d->icode) (op0, op1);
28574 emit_insn (gen_rtx_SET (VOIDmode,
28575 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28576 gen_rtx_fmt_ee (comparison, QImode,
28580 return SUBREG_REG (target);
28583 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
28586 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
28587 tree exp, rtx target)
28590 tree arg0 = CALL_EXPR_ARG (exp, 0);
28591 tree arg1 = CALL_EXPR_ARG (exp, 1);
28592 tree arg2 = CALL_EXPR_ARG (exp, 2);
28593 tree arg3 = CALL_EXPR_ARG (exp, 3);
28594 tree arg4 = CALL_EXPR_ARG (exp, 4);
28595 rtx scratch0, scratch1;
28596 rtx op0 = expand_normal (arg0);
28597 rtx op1 = expand_normal (arg1);
28598 rtx op2 = expand_normal (arg2);
28599 rtx op3 = expand_normal (arg3);
28600 rtx op4 = expand_normal (arg4);
28601 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
28603 tmode0 = insn_data[d->icode].operand[0].mode;
28604 tmode1 = insn_data[d->icode].operand[1].mode;
28605 modev2 = insn_data[d->icode].operand[2].mode;
28606 modei3 = insn_data[d->icode].operand[3].mode;
28607 modev4 = insn_data[d->icode].operand[4].mode;
28608 modei5 = insn_data[d->icode].operand[5].mode;
28609 modeimm = insn_data[d->icode].operand[6].mode;
28611 if (VECTOR_MODE_P (modev2))
28612 op0 = safe_vector_operand (op0, modev2);
28613 if (VECTOR_MODE_P (modev4))
28614 op2 = safe_vector_operand (op2, modev4);
28616 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
28617 op0 = copy_to_mode_reg (modev2, op0);
28618 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
28619 op1 = copy_to_mode_reg (modei3, op1);
28620 if ((optimize && !register_operand (op2, modev4))
28621 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
28622 op2 = copy_to_mode_reg (modev4, op2);
28623 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
28624 op3 = copy_to_mode_reg (modei5, op3);
28626 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
28628 error ("the fifth argument must be an 8-bit immediate");
28632 if (d->code == IX86_BUILTIN_PCMPESTRI128)
28634 if (optimize || !target
28635 || GET_MODE (target) != tmode0
28636 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
28637 target = gen_reg_rtx (tmode0);
28639 scratch1 = gen_reg_rtx (tmode1);
28641 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
28643 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
28645 if (optimize || !target
28646 || GET_MODE (target) != tmode1
28647 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
28648 target = gen_reg_rtx (tmode1);
28650 scratch0 = gen_reg_rtx (tmode0);
28652 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
28656 gcc_assert (d->flag);
28658 scratch0 = gen_reg_rtx (tmode0);
28659 scratch1 = gen_reg_rtx (tmode1);
28661 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
28671 target = gen_reg_rtx (SImode);
28672 emit_move_insn (target, const0_rtx);
28673 target = gen_rtx_SUBREG (QImode, target, 0);
28676 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28677 gen_rtx_fmt_ee (EQ, QImode,
28678 gen_rtx_REG ((enum machine_mode) d->flag,
28681 return SUBREG_REG (target);
28688 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
28691 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
28692 tree exp, rtx target)
28695 tree arg0 = CALL_EXPR_ARG (exp, 0);
28696 tree arg1 = CALL_EXPR_ARG (exp, 1);
28697 tree arg2 = CALL_EXPR_ARG (exp, 2);
28698 rtx scratch0, scratch1;
28699 rtx op0 = expand_normal (arg0);
28700 rtx op1 = expand_normal (arg1);
28701 rtx op2 = expand_normal (arg2);
28702 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
28704 tmode0 = insn_data[d->icode].operand[0].mode;
28705 tmode1 = insn_data[d->icode].operand[1].mode;
28706 modev2 = insn_data[d->icode].operand[2].mode;
28707 modev3 = insn_data[d->icode].operand[3].mode;
28708 modeimm = insn_data[d->icode].operand[4].mode;
28710 if (VECTOR_MODE_P (modev2))
28711 op0 = safe_vector_operand (op0, modev2);
28712 if (VECTOR_MODE_P (modev3))
28713 op1 = safe_vector_operand (op1, modev3);
28715 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
28716 op0 = copy_to_mode_reg (modev2, op0);
28717 if ((optimize && !register_operand (op1, modev3))
28718 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
28719 op1 = copy_to_mode_reg (modev3, op1);
28721 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
28723 error ("the third argument must be an 8-bit immediate");
28727 if (d->code == IX86_BUILTIN_PCMPISTRI128)
28729 if (optimize || !target
28730 || GET_MODE (target) != tmode0
28731 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
28732 target = gen_reg_rtx (tmode0);
28734 scratch1 = gen_reg_rtx (tmode1);
28736 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
28738 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
28740 if (optimize || !target
28741 || GET_MODE (target) != tmode1
28742 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
28743 target = gen_reg_rtx (tmode1);
28745 scratch0 = gen_reg_rtx (tmode0);
28747 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
28751 gcc_assert (d->flag);
28753 scratch0 = gen_reg_rtx (tmode0);
28754 scratch1 = gen_reg_rtx (tmode1);
28756 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
28766 target = gen_reg_rtx (SImode);
28767 emit_move_insn (target, const0_rtx);
28768 target = gen_rtx_SUBREG (QImode, target, 0);
28771 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
28772 gen_rtx_fmt_ee (EQ, QImode,
28773 gen_rtx_REG ((enum machine_mode) d->flag,
28776 return SUBREG_REG (target);
28782 /* Subroutine of ix86_expand_builtin to take care of insns with
28783 variable number of operands. */
28786 ix86_expand_args_builtin (const struct builtin_description *d,
28787 tree exp, rtx target)
28789 rtx pat, real_target;
28790 unsigned int i, nargs;
28791 unsigned int nargs_constant = 0;
28792 int num_memory = 0;
28796 enum machine_mode mode;
28798 bool last_arg_count = false;
28799 enum insn_code icode = d->icode;
28800 const struct insn_data_d *insn_p = &insn_data[icode];
28801 enum machine_mode tmode = insn_p->operand[0].mode;
28802 enum machine_mode rmode = VOIDmode;
28804 enum rtx_code comparison = d->comparison;
28806 switch ((enum ix86_builtin_func_type) d->flag)
28808 case V2DF_FTYPE_V2DF_ROUND:
28809 case V4DF_FTYPE_V4DF_ROUND:
28810 case V4SF_FTYPE_V4SF_ROUND:
28811 case V8SF_FTYPE_V8SF_ROUND:
28812 case V4SI_FTYPE_V4SF_ROUND:
28813 case V8SI_FTYPE_V8SF_ROUND:
28814 return ix86_expand_sse_round (d, exp, target);
28815 case V4SI_FTYPE_V2DF_V2DF_ROUND:
28816 case V8SI_FTYPE_V4DF_V4DF_ROUND:
28817 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
28818 case INT_FTYPE_V8SF_V8SF_PTEST:
28819 case INT_FTYPE_V4DI_V4DI_PTEST:
28820 case INT_FTYPE_V4DF_V4DF_PTEST:
28821 case INT_FTYPE_V4SF_V4SF_PTEST:
28822 case INT_FTYPE_V2DI_V2DI_PTEST:
28823 case INT_FTYPE_V2DF_V2DF_PTEST:
28824 return ix86_expand_sse_ptest (d, exp, target);
28825 case FLOAT128_FTYPE_FLOAT128:
28826 case FLOAT_FTYPE_FLOAT:
28827 case INT_FTYPE_INT:
28828 case UINT64_FTYPE_INT:
28829 case UINT16_FTYPE_UINT16:
28830 case INT64_FTYPE_INT64:
28831 case INT64_FTYPE_V4SF:
28832 case INT64_FTYPE_V2DF:
28833 case INT_FTYPE_V16QI:
28834 case INT_FTYPE_V8QI:
28835 case INT_FTYPE_V8SF:
28836 case INT_FTYPE_V4DF:
28837 case INT_FTYPE_V4SF:
28838 case INT_FTYPE_V2DF:
28839 case INT_FTYPE_V32QI:
28840 case V16QI_FTYPE_V16QI:
28841 case V8SI_FTYPE_V8SF:
28842 case V8SI_FTYPE_V4SI:
28843 case V8HI_FTYPE_V8HI:
28844 case V8HI_FTYPE_V16QI:
28845 case V8QI_FTYPE_V8QI:
28846 case V8SF_FTYPE_V8SF:
28847 case V8SF_FTYPE_V8SI:
28848 case V8SF_FTYPE_V4SF:
28849 case V8SF_FTYPE_V8HI:
28850 case V4SI_FTYPE_V4SI:
28851 case V4SI_FTYPE_V16QI:
28852 case V4SI_FTYPE_V4SF:
28853 case V4SI_FTYPE_V8SI:
28854 case V4SI_FTYPE_V8HI:
28855 case V4SI_FTYPE_V4DF:
28856 case V4SI_FTYPE_V2DF:
28857 case V4HI_FTYPE_V4HI:
28858 case V4DF_FTYPE_V4DF:
28859 case V4DF_FTYPE_V4SI:
28860 case V4DF_FTYPE_V4SF:
28861 case V4DF_FTYPE_V2DF:
28862 case V4SF_FTYPE_V4SF:
28863 case V4SF_FTYPE_V4SI:
28864 case V4SF_FTYPE_V8SF:
28865 case V4SF_FTYPE_V4DF:
28866 case V4SF_FTYPE_V8HI:
28867 case V4SF_FTYPE_V2DF:
28868 case V2DI_FTYPE_V2DI:
28869 case V2DI_FTYPE_V16QI:
28870 case V2DI_FTYPE_V8HI:
28871 case V2DI_FTYPE_V4SI:
28872 case V2DF_FTYPE_V2DF:
28873 case V2DF_FTYPE_V4SI:
28874 case V2DF_FTYPE_V4DF:
28875 case V2DF_FTYPE_V4SF:
28876 case V2DF_FTYPE_V2SI:
28877 case V2SI_FTYPE_V2SI:
28878 case V2SI_FTYPE_V4SF:
28879 case V2SI_FTYPE_V2SF:
28880 case V2SI_FTYPE_V2DF:
28881 case V2SF_FTYPE_V2SF:
28882 case V2SF_FTYPE_V2SI:
28883 case V32QI_FTYPE_V32QI:
28884 case V32QI_FTYPE_V16QI:
28885 case V16HI_FTYPE_V16HI:
28886 case V16HI_FTYPE_V8HI:
28887 case V8SI_FTYPE_V8SI:
28888 case V16HI_FTYPE_V16QI:
28889 case V8SI_FTYPE_V16QI:
28890 case V4DI_FTYPE_V16QI:
28891 case V8SI_FTYPE_V8HI:
28892 case V4DI_FTYPE_V8HI:
28893 case V4DI_FTYPE_V4SI:
28894 case V4DI_FTYPE_V2DI:
28897 case V4SF_FTYPE_V4SF_VEC_MERGE:
28898 case V2DF_FTYPE_V2DF_VEC_MERGE:
28899 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
28900 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
28901 case V16QI_FTYPE_V16QI_V16QI:
28902 case V16QI_FTYPE_V8HI_V8HI:
28903 case V8QI_FTYPE_V8QI_V8QI:
28904 case V8QI_FTYPE_V4HI_V4HI:
28905 case V8HI_FTYPE_V8HI_V8HI:
28906 case V8HI_FTYPE_V16QI_V16QI:
28907 case V8HI_FTYPE_V4SI_V4SI:
28908 case V8SF_FTYPE_V8SF_V8SF:
28909 case V8SF_FTYPE_V8SF_V8SI:
28910 case V4SI_FTYPE_V4SI_V4SI:
28911 case V4SI_FTYPE_V8HI_V8HI:
28912 case V4SI_FTYPE_V4SF_V4SF:
28913 case V4SI_FTYPE_V2DF_V2DF:
28914 case V4HI_FTYPE_V4HI_V4HI:
28915 case V4HI_FTYPE_V8QI_V8QI:
28916 case V4HI_FTYPE_V2SI_V2SI:
28917 case V4DF_FTYPE_V4DF_V4DF:
28918 case V4DF_FTYPE_V4DF_V4DI:
28919 case V4SF_FTYPE_V4SF_V4SF:
28920 case V4SF_FTYPE_V4SF_V4SI:
28921 case V4SF_FTYPE_V4SF_V2SI:
28922 case V4SF_FTYPE_V4SF_V2DF:
28923 case V4SF_FTYPE_V4SF_DI:
28924 case V4SF_FTYPE_V4SF_SI:
28925 case V2DI_FTYPE_V2DI_V2DI:
28926 case V2DI_FTYPE_V16QI_V16QI:
28927 case V2DI_FTYPE_V4SI_V4SI:
28928 case V2DI_FTYPE_V2DI_V16QI:
28929 case V2DI_FTYPE_V2DF_V2DF:
28930 case V2SI_FTYPE_V2SI_V2SI:
28931 case V2SI_FTYPE_V4HI_V4HI:
28932 case V2SI_FTYPE_V2SF_V2SF:
28933 case V2DF_FTYPE_V2DF_V2DF:
28934 case V2DF_FTYPE_V2DF_V4SF:
28935 case V2DF_FTYPE_V2DF_V2DI:
28936 case V2DF_FTYPE_V2DF_DI:
28937 case V2DF_FTYPE_V2DF_SI:
28938 case V2SF_FTYPE_V2SF_V2SF:
28939 case V1DI_FTYPE_V1DI_V1DI:
28940 case V1DI_FTYPE_V8QI_V8QI:
28941 case V1DI_FTYPE_V2SI_V2SI:
28942 case V32QI_FTYPE_V16HI_V16HI:
28943 case V16HI_FTYPE_V8SI_V8SI:
28944 case V32QI_FTYPE_V32QI_V32QI:
28945 case V16HI_FTYPE_V32QI_V32QI:
28946 case V16HI_FTYPE_V16HI_V16HI:
28947 case V8SI_FTYPE_V4DF_V4DF:
28948 case V8SI_FTYPE_V8SI_V8SI:
28949 case V8SI_FTYPE_V16HI_V16HI:
28950 case V4DI_FTYPE_V4DI_V4DI:
28951 case V4DI_FTYPE_V8SI_V8SI:
28952 if (comparison == UNKNOWN)
28953 return ix86_expand_binop_builtin (icode, exp, target);
28956 case V4SF_FTYPE_V4SF_V4SF_SWAP:
28957 case V2DF_FTYPE_V2DF_V2DF_SWAP:
28958 gcc_assert (comparison != UNKNOWN);
28962 case V16HI_FTYPE_V16HI_V8HI_COUNT:
28963 case V16HI_FTYPE_V16HI_SI_COUNT:
28964 case V8SI_FTYPE_V8SI_V4SI_COUNT:
28965 case V8SI_FTYPE_V8SI_SI_COUNT:
28966 case V4DI_FTYPE_V4DI_V2DI_COUNT:
28967 case V4DI_FTYPE_V4DI_INT_COUNT:
28968 case V8HI_FTYPE_V8HI_V8HI_COUNT:
28969 case V8HI_FTYPE_V8HI_SI_COUNT:
28970 case V4SI_FTYPE_V4SI_V4SI_COUNT:
28971 case V4SI_FTYPE_V4SI_SI_COUNT:
28972 case V4HI_FTYPE_V4HI_V4HI_COUNT:
28973 case V4HI_FTYPE_V4HI_SI_COUNT:
28974 case V2DI_FTYPE_V2DI_V2DI_COUNT:
28975 case V2DI_FTYPE_V2DI_SI_COUNT:
28976 case V2SI_FTYPE_V2SI_V2SI_COUNT:
28977 case V2SI_FTYPE_V2SI_SI_COUNT:
28978 case V1DI_FTYPE_V1DI_V1DI_COUNT:
28979 case V1DI_FTYPE_V1DI_SI_COUNT:
28981 last_arg_count = true;
28983 case UINT64_FTYPE_UINT64_UINT64:
28984 case UINT_FTYPE_UINT_UINT:
28985 case UINT_FTYPE_UINT_USHORT:
28986 case UINT_FTYPE_UINT_UCHAR:
28987 case UINT16_FTYPE_UINT16_INT:
28988 case UINT8_FTYPE_UINT8_INT:
28991 case V2DI_FTYPE_V2DI_INT_CONVERT:
28994 nargs_constant = 1;
28996 case V4DI_FTYPE_V4DI_INT_CONVERT:
28999 nargs_constant = 1;
29001 case V8HI_FTYPE_V8HI_INT:
29002 case V8HI_FTYPE_V8SF_INT:
29003 case V8HI_FTYPE_V4SF_INT:
29004 case V8SF_FTYPE_V8SF_INT:
29005 case V4SI_FTYPE_V4SI_INT:
29006 case V4SI_FTYPE_V8SI_INT:
29007 case V4HI_FTYPE_V4HI_INT:
29008 case V4DF_FTYPE_V4DF_INT:
29009 case V4SF_FTYPE_V4SF_INT:
29010 case V4SF_FTYPE_V8SF_INT:
29011 case V2DI_FTYPE_V2DI_INT:
29012 case V2DF_FTYPE_V2DF_INT:
29013 case V2DF_FTYPE_V4DF_INT:
29014 case V16HI_FTYPE_V16HI_INT:
29015 case V8SI_FTYPE_V8SI_INT:
29016 case V4DI_FTYPE_V4DI_INT:
29017 case V2DI_FTYPE_V4DI_INT:
29019 nargs_constant = 1;
29021 case V16QI_FTYPE_V16QI_V16QI_V16QI:
29022 case V8SF_FTYPE_V8SF_V8SF_V8SF:
29023 case V4DF_FTYPE_V4DF_V4DF_V4DF:
29024 case V4SF_FTYPE_V4SF_V4SF_V4SF:
29025 case V2DF_FTYPE_V2DF_V2DF_V2DF:
29026 case V32QI_FTYPE_V32QI_V32QI_V32QI:
29029 case V32QI_FTYPE_V32QI_V32QI_INT:
29030 case V16HI_FTYPE_V16HI_V16HI_INT:
29031 case V16QI_FTYPE_V16QI_V16QI_INT:
29032 case V4DI_FTYPE_V4DI_V4DI_INT:
29033 case V8HI_FTYPE_V8HI_V8HI_INT:
29034 case V8SI_FTYPE_V8SI_V8SI_INT:
29035 case V8SI_FTYPE_V8SI_V4SI_INT:
29036 case V8SF_FTYPE_V8SF_V8SF_INT:
29037 case V8SF_FTYPE_V8SF_V4SF_INT:
29038 case V4SI_FTYPE_V4SI_V4SI_INT:
29039 case V4DF_FTYPE_V4DF_V4DF_INT:
29040 case V4DF_FTYPE_V4DF_V2DF_INT:
29041 case V4SF_FTYPE_V4SF_V4SF_INT:
29042 case V2DI_FTYPE_V2DI_V2DI_INT:
29043 case V4DI_FTYPE_V4DI_V2DI_INT:
29044 case V2DF_FTYPE_V2DF_V2DF_INT:
29046 nargs_constant = 1;
29048 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
29051 nargs_constant = 1;
29053 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
29056 nargs_constant = 1;
29058 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
29061 nargs_constant = 1;
29063 case V2DI_FTYPE_V2DI_UINT_UINT:
29065 nargs_constant = 2;
29067 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
29068 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
29069 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
29070 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
29072 nargs_constant = 1;
29074 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
29076 nargs_constant = 2;
29079 gcc_unreachable ();
29082 gcc_assert (nargs <= ARRAY_SIZE (args));
29084 if (comparison != UNKNOWN)
29086 gcc_assert (nargs == 2);
29087 return ix86_expand_sse_compare (d, exp, target, swap);
29090 if (rmode == VOIDmode || rmode == tmode)
29094 || GET_MODE (target) != tmode
29095 || !insn_p->operand[0].predicate (target, tmode))
29096 target = gen_reg_rtx (tmode);
29097 real_target = target;
29101 target = gen_reg_rtx (rmode);
29102 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
29105 for (i = 0; i < nargs; i++)
29107 tree arg = CALL_EXPR_ARG (exp, i);
29108 rtx op = expand_normal (arg);
29109 enum machine_mode mode = insn_p->operand[i + 1].mode;
29110 bool match = insn_p->operand[i + 1].predicate (op, mode);
29112 if (last_arg_count && (i + 1) == nargs)
29114 /* SIMD shift insns take either an 8-bit immediate or
29115 register as count. But builtin functions take int as
29116 count. If count doesn't match, we put it in register. */
29119 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
29120 if (!insn_p->operand[i + 1].predicate (op, mode))
29121 op = copy_to_reg (op);
29124 else if ((nargs - i) <= nargs_constant)
29129 case CODE_FOR_avx2_inserti128:
29130 case CODE_FOR_avx2_extracti128:
29131 error ("the last argument must be an 1-bit immediate");
29134 case CODE_FOR_sse4_1_roundsd:
29135 case CODE_FOR_sse4_1_roundss:
29137 case CODE_FOR_sse4_1_roundpd:
29138 case CODE_FOR_sse4_1_roundps:
29139 case CODE_FOR_avx_roundpd256:
29140 case CODE_FOR_avx_roundps256:
29142 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
29143 case CODE_FOR_sse4_1_roundps_sfix:
29144 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
29145 case CODE_FOR_avx_roundps_sfix256:
29147 case CODE_FOR_sse4_1_blendps:
29148 case CODE_FOR_avx_blendpd256:
29149 case CODE_FOR_avx_vpermilv4df:
29150 error ("the last argument must be a 4-bit immediate");
29153 case CODE_FOR_sse4_1_blendpd:
29154 case CODE_FOR_avx_vpermilv2df:
29155 case CODE_FOR_xop_vpermil2v2df3:
29156 case CODE_FOR_xop_vpermil2v4sf3:
29157 case CODE_FOR_xop_vpermil2v4df3:
29158 case CODE_FOR_xop_vpermil2v8sf3:
29159 error ("the last argument must be a 2-bit immediate");
29162 case CODE_FOR_avx_vextractf128v4df:
29163 case CODE_FOR_avx_vextractf128v8sf:
29164 case CODE_FOR_avx_vextractf128v8si:
29165 case CODE_FOR_avx_vinsertf128v4df:
29166 case CODE_FOR_avx_vinsertf128v8sf:
29167 case CODE_FOR_avx_vinsertf128v8si:
29168 error ("the last argument must be a 1-bit immediate");
29171 case CODE_FOR_avx_vmcmpv2df3:
29172 case CODE_FOR_avx_vmcmpv4sf3:
29173 case CODE_FOR_avx_cmpv2df3:
29174 case CODE_FOR_avx_cmpv4sf3:
29175 case CODE_FOR_avx_cmpv4df3:
29176 case CODE_FOR_avx_cmpv8sf3:
29177 error ("the last argument must be a 5-bit immediate");
29181 switch (nargs_constant)
29184 if ((nargs - i) == nargs_constant)
29186 error ("the next to last argument must be an 8-bit immediate");
29190 error ("the last argument must be an 8-bit immediate");
29193 gcc_unreachable ();
29200 if (VECTOR_MODE_P (mode))
29201 op = safe_vector_operand (op, mode);
29203 /* If we aren't optimizing, only allow one memory operand to
29205 if (memory_operand (op, mode))
29208 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
29210 if (optimize || !match || num_memory > 1)
29211 op = copy_to_mode_reg (mode, op);
29215 op = copy_to_reg (op);
29216 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
29221 args[i].mode = mode;
29227 pat = GEN_FCN (icode) (real_target, args[0].op);
29230 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
29233 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
29237 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
29238 args[2].op, args[3].op);
29241 gcc_unreachable ();
29251 /* Subroutine of ix86_expand_builtin to take care of special insns
29252 with variable number of operands. */
29255 ix86_expand_special_args_builtin (const struct builtin_description *d,
29256 tree exp, rtx target)
29260 unsigned int i, nargs, arg_adjust, memory;
29264 enum machine_mode mode;
29266 enum insn_code icode = d->icode;
29267 bool last_arg_constant = false;
29268 const struct insn_data_d *insn_p = &insn_data[icode];
29269 enum machine_mode tmode = insn_p->operand[0].mode;
29270 enum { load, store } klass;
29272 switch ((enum ix86_builtin_func_type) d->flag)
29274 case VOID_FTYPE_VOID:
29275 if (icode == CODE_FOR_avx_vzeroupper)
29276 target = GEN_INT (vzeroupper_intrinsic);
29277 emit_insn (GEN_FCN (icode) (target));
29279 case VOID_FTYPE_UINT64:
29280 case VOID_FTYPE_UNSIGNED:
29285 case UINT64_FTYPE_VOID:
29286 case UNSIGNED_FTYPE_VOID:
29291 case UINT64_FTYPE_PUNSIGNED:
29292 case V2DI_FTYPE_PV2DI:
29293 case V4DI_FTYPE_PV4DI:
29294 case V32QI_FTYPE_PCCHAR:
29295 case V16QI_FTYPE_PCCHAR:
29296 case V8SF_FTYPE_PCV4SF:
29297 case V8SF_FTYPE_PCFLOAT:
29298 case V4SF_FTYPE_PCFLOAT:
29299 case V4DF_FTYPE_PCV2DF:
29300 case V4DF_FTYPE_PCDOUBLE:
29301 case V2DF_FTYPE_PCDOUBLE:
29302 case VOID_FTYPE_PVOID:
29307 case VOID_FTYPE_PV2SF_V4SF:
29308 case VOID_FTYPE_PV4DI_V4DI:
29309 case VOID_FTYPE_PV2DI_V2DI:
29310 case VOID_FTYPE_PCHAR_V32QI:
29311 case VOID_FTYPE_PCHAR_V16QI:
29312 case VOID_FTYPE_PFLOAT_V8SF:
29313 case VOID_FTYPE_PFLOAT_V4SF:
29314 case VOID_FTYPE_PDOUBLE_V4DF:
29315 case VOID_FTYPE_PDOUBLE_V2DF:
29316 case VOID_FTYPE_PULONGLONG_ULONGLONG:
29317 case VOID_FTYPE_PINT_INT:
29320 /* Reserve memory operand for target. */
29321 memory = ARRAY_SIZE (args);
29323 case V4SF_FTYPE_V4SF_PCV2SF:
29324 case V2DF_FTYPE_V2DF_PCDOUBLE:
29329 case V8SF_FTYPE_PCV8SF_V8SI:
29330 case V4DF_FTYPE_PCV4DF_V4DI:
29331 case V4SF_FTYPE_PCV4SF_V4SI:
29332 case V2DF_FTYPE_PCV2DF_V2DI:
29333 case V8SI_FTYPE_PCV8SI_V8SI:
29334 case V4DI_FTYPE_PCV4DI_V4DI:
29335 case V4SI_FTYPE_PCV4SI_V4SI:
29336 case V2DI_FTYPE_PCV2DI_V2DI:
29341 case VOID_FTYPE_PV8SF_V8SI_V8SF:
29342 case VOID_FTYPE_PV4DF_V4DI_V4DF:
29343 case VOID_FTYPE_PV4SF_V4SI_V4SF:
29344 case VOID_FTYPE_PV2DF_V2DI_V2DF:
29345 case VOID_FTYPE_PV8SI_V8SI_V8SI:
29346 case VOID_FTYPE_PV4DI_V4DI_V4DI:
29347 case VOID_FTYPE_PV4SI_V4SI_V4SI:
29348 case VOID_FTYPE_PV2DI_V2DI_V2DI:
29351 /* Reserve memory operand for target. */
29352 memory = ARRAY_SIZE (args);
29354 case VOID_FTYPE_UINT_UINT_UINT:
29355 case VOID_FTYPE_UINT64_UINT_UINT:
29356 case UCHAR_FTYPE_UINT_UINT_UINT:
29357 case UCHAR_FTYPE_UINT64_UINT_UINT:
29360 memory = ARRAY_SIZE (args);
29361 last_arg_constant = true;
29364 gcc_unreachable ();
29367 gcc_assert (nargs <= ARRAY_SIZE (args));
29369 if (klass == store)
29371 arg = CALL_EXPR_ARG (exp, 0);
29372 op = expand_normal (arg);
29373 gcc_assert (target == 0);
29376 if (GET_MODE (op) != Pmode)
29377 op = convert_to_mode (Pmode, op, 1);
29378 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
29381 target = force_reg (tmode, op);
29389 || GET_MODE (target) != tmode
29390 || !insn_p->operand[0].predicate (target, tmode))
29391 target = gen_reg_rtx (tmode);
29394 for (i = 0; i < nargs; i++)
29396 enum machine_mode mode = insn_p->operand[i + 1].mode;
29399 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
29400 op = expand_normal (arg);
29401 match = insn_p->operand[i + 1].predicate (op, mode);
29403 if (last_arg_constant && (i + 1) == nargs)
29407 if (icode == CODE_FOR_lwp_lwpvalsi3
29408 || icode == CODE_FOR_lwp_lwpinssi3
29409 || icode == CODE_FOR_lwp_lwpvaldi3
29410 || icode == CODE_FOR_lwp_lwpinsdi3)
29411 error ("the last argument must be a 32-bit immediate");
29413 error ("the last argument must be an 8-bit immediate");
29421 /* This must be the memory operand. */
29422 if (GET_MODE (op) != Pmode)
29423 op = convert_to_mode (Pmode, op, 1);
29424 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
29425 gcc_assert (GET_MODE (op) == mode
29426 || GET_MODE (op) == VOIDmode);
29430 /* This must be register. */
29431 if (VECTOR_MODE_P (mode))
29432 op = safe_vector_operand (op, mode);
29434 gcc_assert (GET_MODE (op) == mode
29435 || GET_MODE (op) == VOIDmode);
29436 op = copy_to_mode_reg (mode, op);
29441 args[i].mode = mode;
29447 pat = GEN_FCN (icode) (target);
29450 pat = GEN_FCN (icode) (target, args[0].op);
29453 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
29456 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
29459 gcc_unreachable ();
29465 return klass == store ? 0 : target;
29468 /* Return the integer constant in ARG. Constrain it to be in the range
29469 of the subparts of VEC_TYPE; issue an error if not. */
29472 get_element_number (tree vec_type, tree arg)
29474 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
29476 if (!host_integerp (arg, 1)
29477 || (elt = tree_low_cst (arg, 1), elt > max))
29479 error ("selector must be an integer constant in the range 0..%wi", max);
29486 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29487 ix86_expand_vector_init. We DO have language-level syntax for this, in
29488 the form of (type){ init-list }. Except that since we can't place emms
29489 instructions from inside the compiler, we can't allow the use of MMX
29490 registers unless the user explicitly asks for it. So we do *not* define
29491 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
29492 we have builtins invoked by mmintrin.h that gives us license to emit
29493 these sorts of instructions. */
29496 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
29498 enum machine_mode tmode = TYPE_MODE (type);
29499 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
29500 int i, n_elt = GET_MODE_NUNITS (tmode);
29501 rtvec v = rtvec_alloc (n_elt);
29503 gcc_assert (VECTOR_MODE_P (tmode));
29504 gcc_assert (call_expr_nargs (exp) == n_elt);
29506 for (i = 0; i < n_elt; ++i)
29508 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
29509 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
29512 if (!target || !register_operand (target, tmode))
29513 target = gen_reg_rtx (tmode);
29515 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
29519 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29520 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
29521 had a language-level syntax for referencing vector elements. */
29524 ix86_expand_vec_ext_builtin (tree exp, rtx target)
29526 enum machine_mode tmode, mode0;
29531 arg0 = CALL_EXPR_ARG (exp, 0);
29532 arg1 = CALL_EXPR_ARG (exp, 1);
29534 op0 = expand_normal (arg0);
29535 elt = get_element_number (TREE_TYPE (arg0), arg1);
29537 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
29538 mode0 = TYPE_MODE (TREE_TYPE (arg0));
29539 gcc_assert (VECTOR_MODE_P (mode0));
29541 op0 = force_reg (mode0, op0);
29543 if (optimize || !target || !register_operand (target, tmode))
29544 target = gen_reg_rtx (tmode);
29546 ix86_expand_vector_extract (true, target, op0, elt);
29551 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29552 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
29553 a language-level syntax for referencing vector elements. */
29556 ix86_expand_vec_set_builtin (tree exp)
29558 enum machine_mode tmode, mode1;
29559 tree arg0, arg1, arg2;
29561 rtx op0, op1, target;
29563 arg0 = CALL_EXPR_ARG (exp, 0);
29564 arg1 = CALL_EXPR_ARG (exp, 1);
29565 arg2 = CALL_EXPR_ARG (exp, 2);
29567 tmode = TYPE_MODE (TREE_TYPE (arg0));
29568 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
29569 gcc_assert (VECTOR_MODE_P (tmode));
29571 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
29572 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
29573 elt = get_element_number (TREE_TYPE (arg0), arg2);
29575 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
29576 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
29578 op0 = force_reg (tmode, op0);
29579 op1 = force_reg (mode1, op1);
29581 /* OP0 is the source of these builtin functions and shouldn't be
29582 modified. Create a copy, use it and return it as target. */
29583 target = gen_reg_rtx (tmode);
29584 emit_move_insn (target, op0);
29585 ix86_expand_vector_set (true, target, op1, elt);
29590 /* Expand an expression EXP that calls a built-in function,
29591 with result going to TARGET if that's convenient
29592 (and in mode MODE if that's convenient).
29593 SUBTARGET may be used as the target for computing one of EXP's operands.
29594 IGNORE is nonzero if the value is to be ignored. */
29597 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
29598 enum machine_mode mode ATTRIBUTE_UNUSED,
29599 int ignore ATTRIBUTE_UNUSED)
29601 const struct builtin_description *d;
29603 enum insn_code icode;
29604 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
29605 tree arg0, arg1, arg2, arg3, arg4;
29606 rtx op0, op1, op2, op3, op4, pat;
29607 enum machine_mode mode0, mode1, mode2, mode3, mode4;
29608 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
29610 /* Determine whether the builtin function is available under the current ISA.
29611 Originally the builtin was not created if it wasn't applicable to the
29612 current ISA based on the command line switches. With function specific
29613 options, we need to check in the context of the function making the call
29614 whether it is supported. */
29615 if (ix86_builtins_isa[fcode].isa
29616 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
29618 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
29619 NULL, (enum fpmath_unit) 0, false);
29622 error ("%qE needs unknown isa option", fndecl);
29625 gcc_assert (opts != NULL);
29626 error ("%qE needs isa option %s", fndecl, opts);
29634 case IX86_BUILTIN_MASKMOVQ:
29635 case IX86_BUILTIN_MASKMOVDQU:
29636 icode = (fcode == IX86_BUILTIN_MASKMOVQ
29637 ? CODE_FOR_mmx_maskmovq
29638 : CODE_FOR_sse2_maskmovdqu);
29639 /* Note the arg order is different from the operand order. */
29640 arg1 = CALL_EXPR_ARG (exp, 0);
29641 arg2 = CALL_EXPR_ARG (exp, 1);
29642 arg0 = CALL_EXPR_ARG (exp, 2);
29643 op0 = expand_normal (arg0);
29644 op1 = expand_normal (arg1);
29645 op2 = expand_normal (arg2);
29646 mode0 = insn_data[icode].operand[0].mode;
29647 mode1 = insn_data[icode].operand[1].mode;
29648 mode2 = insn_data[icode].operand[2].mode;
29650 if (GET_MODE (op0) != Pmode)
29651 op0 = convert_to_mode (Pmode, op0, 1);
29652 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
29654 if (!insn_data[icode].operand[0].predicate (op0, mode0))
29655 op0 = copy_to_mode_reg (mode0, op0);
29656 if (!insn_data[icode].operand[1].predicate (op1, mode1))
29657 op1 = copy_to_mode_reg (mode1, op1);
29658 if (!insn_data[icode].operand[2].predicate (op2, mode2))
29659 op2 = copy_to_mode_reg (mode2, op2);
29660 pat = GEN_FCN (icode) (op0, op1, op2);
29666 case IX86_BUILTIN_LDMXCSR:
29667 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
29668 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
29669 emit_move_insn (target, op0);
29670 emit_insn (gen_sse_ldmxcsr (target));
29673 case IX86_BUILTIN_STMXCSR:
29674 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
29675 emit_insn (gen_sse_stmxcsr (target));
29676 return copy_to_mode_reg (SImode, target);
29678 case IX86_BUILTIN_CLFLUSH:
29679 arg0 = CALL_EXPR_ARG (exp, 0);
29680 op0 = expand_normal (arg0);
29681 icode = CODE_FOR_sse2_clflush;
29682 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
29684 if (GET_MODE (op0) != Pmode)
29685 op0 = convert_to_mode (Pmode, op0, 1);
29686 op0 = force_reg (Pmode, op0);
29689 emit_insn (gen_sse2_clflush (op0));
29692 case IX86_BUILTIN_MONITOR:
29693 arg0 = CALL_EXPR_ARG (exp, 0);
29694 arg1 = CALL_EXPR_ARG (exp, 1);
29695 arg2 = CALL_EXPR_ARG (exp, 2);
29696 op0 = expand_normal (arg0);
29697 op1 = expand_normal (arg1);
29698 op2 = expand_normal (arg2);
29701 if (GET_MODE (op0) != Pmode)
29702 op0 = convert_to_mode (Pmode, op0, 1);
29703 op0 = force_reg (Pmode, op0);
29706 op1 = copy_to_mode_reg (SImode, op1);
29708 op2 = copy_to_mode_reg (SImode, op2);
29709 emit_insn (ix86_gen_monitor (op0, op1, op2));
29712 case IX86_BUILTIN_MWAIT:
29713 arg0 = CALL_EXPR_ARG (exp, 0);
29714 arg1 = CALL_EXPR_ARG (exp, 1);
29715 op0 = expand_normal (arg0);
29716 op1 = expand_normal (arg1);
29718 op0 = copy_to_mode_reg (SImode, op0);
29720 op1 = copy_to_mode_reg (SImode, op1);
29721 emit_insn (gen_sse3_mwait (op0, op1));
29724 case IX86_BUILTIN_VEC_INIT_V2SI:
29725 case IX86_BUILTIN_VEC_INIT_V4HI:
29726 case IX86_BUILTIN_VEC_INIT_V8QI:
29727 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
29729 case IX86_BUILTIN_VEC_EXT_V2DF:
29730 case IX86_BUILTIN_VEC_EXT_V2DI:
29731 case IX86_BUILTIN_VEC_EXT_V4SF:
29732 case IX86_BUILTIN_VEC_EXT_V4SI:
29733 case IX86_BUILTIN_VEC_EXT_V8HI:
29734 case IX86_BUILTIN_VEC_EXT_V2SI:
29735 case IX86_BUILTIN_VEC_EXT_V4HI:
29736 case IX86_BUILTIN_VEC_EXT_V16QI:
29737 return ix86_expand_vec_ext_builtin (exp, target);
29739 case IX86_BUILTIN_VEC_SET_V2DI:
29740 case IX86_BUILTIN_VEC_SET_V4SF:
29741 case IX86_BUILTIN_VEC_SET_V4SI:
29742 case IX86_BUILTIN_VEC_SET_V8HI:
29743 case IX86_BUILTIN_VEC_SET_V4HI:
29744 case IX86_BUILTIN_VEC_SET_V16QI:
29745 return ix86_expand_vec_set_builtin (exp);
29747 case IX86_BUILTIN_INFQ:
29748 case IX86_BUILTIN_HUGE_VALQ:
29750 REAL_VALUE_TYPE inf;
29754 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
29756 tmp = validize_mem (force_const_mem (mode, tmp));
29759 target = gen_reg_rtx (mode);
29761 emit_move_insn (target, tmp);
29765 case IX86_BUILTIN_LLWPCB:
29766 arg0 = CALL_EXPR_ARG (exp, 0);
29767 op0 = expand_normal (arg0);
29768 icode = CODE_FOR_lwp_llwpcb;
29769 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
29771 if (GET_MODE (op0) != Pmode)
29772 op0 = convert_to_mode (Pmode, op0, 1);
29773 op0 = force_reg (Pmode, op0);
29775 emit_insn (gen_lwp_llwpcb (op0));
29778 case IX86_BUILTIN_SLWPCB:
29779 icode = CODE_FOR_lwp_slwpcb;
29781 || !insn_data[icode].operand[0].predicate (target, Pmode))
29782 target = gen_reg_rtx (Pmode);
29783 emit_insn (gen_lwp_slwpcb (target));
29786 case IX86_BUILTIN_BEXTRI32:
29787 case IX86_BUILTIN_BEXTRI64:
29788 arg0 = CALL_EXPR_ARG (exp, 0);
29789 arg1 = CALL_EXPR_ARG (exp, 1);
29790 op0 = expand_normal (arg0);
29791 op1 = expand_normal (arg1);
29792 icode = (fcode == IX86_BUILTIN_BEXTRI32
29793 ? CODE_FOR_tbm_bextri_si
29794 : CODE_FOR_tbm_bextri_di);
29795 if (!CONST_INT_P (op1))
29797 error ("last argument must be an immediate");
29802 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
29803 unsigned char lsb_index = INTVAL (op1) & 0xFF;
29804 op1 = GEN_INT (length);
29805 op2 = GEN_INT (lsb_index);
29806 pat = GEN_FCN (icode) (target, op0, op1, op2);
29812 case IX86_BUILTIN_RDRAND16_STEP:
29813 icode = CODE_FOR_rdrandhi_1;
29817 case IX86_BUILTIN_RDRAND32_STEP:
29818 icode = CODE_FOR_rdrandsi_1;
29822 case IX86_BUILTIN_RDRAND64_STEP:
29823 icode = CODE_FOR_rdranddi_1;
29827 op0 = gen_reg_rtx (mode0);
29828 emit_insn (GEN_FCN (icode) (op0));
29830 arg0 = CALL_EXPR_ARG (exp, 0);
29831 op1 = expand_normal (arg0);
29832 if (!address_operand (op1, VOIDmode))
29834 op1 = convert_memory_address (Pmode, op1);
29835 op1 = copy_addr_to_reg (op1);
29837 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
29839 op1 = gen_reg_rtx (SImode);
29840 emit_move_insn (op1, CONST1_RTX (SImode));
29842 /* Emit SImode conditional move. */
29843 if (mode0 == HImode)
29845 op2 = gen_reg_rtx (SImode);
29846 emit_insn (gen_zero_extendhisi2 (op2, op0));
29848 else if (mode0 == SImode)
29851 op2 = gen_rtx_SUBREG (SImode, op0, 0);
29854 target = gen_reg_rtx (SImode);
29856 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
29858 emit_insn (gen_rtx_SET (VOIDmode, target,
29859 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
29862 case IX86_BUILTIN_GATHERSIV2DF:
29863 icode = CODE_FOR_avx2_gathersiv2df;
29865 case IX86_BUILTIN_GATHERSIV4DF:
29866 icode = CODE_FOR_avx2_gathersiv4df;
29868 case IX86_BUILTIN_GATHERDIV2DF:
29869 icode = CODE_FOR_avx2_gatherdiv2df;
29871 case IX86_BUILTIN_GATHERDIV4DF:
29872 icode = CODE_FOR_avx2_gatherdiv4df;
29874 case IX86_BUILTIN_GATHERSIV4SF:
29875 icode = CODE_FOR_avx2_gathersiv4sf;
29877 case IX86_BUILTIN_GATHERSIV8SF:
29878 icode = CODE_FOR_avx2_gathersiv8sf;
29880 case IX86_BUILTIN_GATHERDIV4SF:
29881 icode = CODE_FOR_avx2_gatherdiv4sf;
29883 case IX86_BUILTIN_GATHERDIV8SF:
29884 icode = CODE_FOR_avx2_gatherdiv8sf;
29886 case IX86_BUILTIN_GATHERSIV2DI:
29887 icode = CODE_FOR_avx2_gathersiv2di;
29889 case IX86_BUILTIN_GATHERSIV4DI:
29890 icode = CODE_FOR_avx2_gathersiv4di;
29892 case IX86_BUILTIN_GATHERDIV2DI:
29893 icode = CODE_FOR_avx2_gatherdiv2di;
29895 case IX86_BUILTIN_GATHERDIV4DI:
29896 icode = CODE_FOR_avx2_gatherdiv4di;
29898 case IX86_BUILTIN_GATHERSIV4SI:
29899 icode = CODE_FOR_avx2_gathersiv4si;
29901 case IX86_BUILTIN_GATHERSIV8SI:
29902 icode = CODE_FOR_avx2_gathersiv8si;
29904 case IX86_BUILTIN_GATHERDIV4SI:
29905 icode = CODE_FOR_avx2_gatherdiv4si;
29907 case IX86_BUILTIN_GATHERDIV8SI:
29908 icode = CODE_FOR_avx2_gatherdiv8si;
29910 case IX86_BUILTIN_GATHERALTSIV4DF:
29911 icode = CODE_FOR_avx2_gathersiv4df;
29913 case IX86_BUILTIN_GATHERALTDIV8SF:
29914 icode = CODE_FOR_avx2_gatherdiv8sf;
29916 case IX86_BUILTIN_GATHERALTSIV4DI:
29917 icode = CODE_FOR_avx2_gathersiv4df;
29919 case IX86_BUILTIN_GATHERALTDIV8SI:
29920 icode = CODE_FOR_avx2_gatherdiv8si;
29924 arg0 = CALL_EXPR_ARG (exp, 0);
29925 arg1 = CALL_EXPR_ARG (exp, 1);
29926 arg2 = CALL_EXPR_ARG (exp, 2);
29927 arg3 = CALL_EXPR_ARG (exp, 3);
29928 arg4 = CALL_EXPR_ARG (exp, 4);
29929 op0 = expand_normal (arg0);
29930 op1 = expand_normal (arg1);
29931 op2 = expand_normal (arg2);
29932 op3 = expand_normal (arg3);
29933 op4 = expand_normal (arg4);
29934 /* Note the arg order is different from the operand order. */
29935 mode0 = insn_data[icode].operand[1].mode;
29936 mode2 = insn_data[icode].operand[3].mode;
29937 mode3 = insn_data[icode].operand[4].mode;
29938 mode4 = insn_data[icode].operand[5].mode;
29940 if (target == NULL_RTX
29941 || GET_MODE (target) != insn_data[icode].operand[0].mode)
29942 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
29944 subtarget = target;
29946 if (fcode == IX86_BUILTIN_GATHERALTSIV4DF
29947 || fcode == IX86_BUILTIN_GATHERALTSIV4DI)
29949 rtx half = gen_reg_rtx (V4SImode);
29950 if (!nonimmediate_operand (op2, V8SImode))
29951 op2 = copy_to_mode_reg (V8SImode, op2);
29952 emit_insn (gen_vec_extract_lo_v8si (half, op2));
29955 else if (fcode == IX86_BUILTIN_GATHERALTDIV8SF
29956 || fcode == IX86_BUILTIN_GATHERALTDIV8SI)
29958 rtx (*gen) (rtx, rtx);
29959 rtx half = gen_reg_rtx (mode0);
29960 if (mode0 == V4SFmode)
29961 gen = gen_vec_extract_lo_v8sf;
29963 gen = gen_vec_extract_lo_v8si;
29964 if (!nonimmediate_operand (op0, GET_MODE (op0)))
29965 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
29966 emit_insn (gen (half, op0));
29968 if (!nonimmediate_operand (op3, GET_MODE (op3)))
29969 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
29970 emit_insn (gen (half, op3));
29974 /* Force memory operand only with base register here. But we
29975 don't want to do it on memory operand for other builtin
29977 if (GET_MODE (op1) != Pmode)
29978 op1 = convert_to_mode (Pmode, op1, 1);
29979 op1 = force_reg (Pmode, op1);
29981 if (!insn_data[icode].operand[1].predicate (op0, mode0))
29982 op0 = copy_to_mode_reg (mode0, op0);
29983 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
29984 op1 = copy_to_mode_reg (Pmode, op1);
29985 if (!insn_data[icode].operand[3].predicate (op2, mode2))
29986 op2 = copy_to_mode_reg (mode2, op2);
29987 if (!insn_data[icode].operand[4].predicate (op3, mode3))
29988 op3 = copy_to_mode_reg (mode3, op3);
29989 if (!insn_data[icode].operand[5].predicate (op4, mode4))
29991 error ("last argument must be scale 1, 2, 4, 8");
29995 /* Optimize. If mask is known to have all high bits set,
29996 replace op0 with pc_rtx to signal that the instruction
29997 overwrites the whole destination and doesn't use its
29998 previous contents. */
30001 if (TREE_CODE (arg3) == VECTOR_CST)
30004 unsigned int negative = 0;
30005 for (elt = TREE_VECTOR_CST_ELTS (arg3);
30006 elt; elt = TREE_CHAIN (elt))
30008 tree cst = TREE_VALUE (elt);
30009 if (TREE_CODE (cst) == INTEGER_CST
30010 && tree_int_cst_sign_bit (cst))
30012 else if (TREE_CODE (cst) == REAL_CST
30013 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
30016 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
30019 else if (TREE_CODE (arg3) == SSA_NAME)
30021 /* Recognize also when mask is like:
30022 __v2df src = _mm_setzero_pd ();
30023 __v2df mask = _mm_cmpeq_pd (src, src);
30025 __v8sf src = _mm256_setzero_ps ();
30026 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
30027 as that is a cheaper way to load all ones into
30028 a register than having to load a constant from
30030 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
30031 if (is_gimple_call (def_stmt))
30033 tree fndecl = gimple_call_fndecl (def_stmt);
30035 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
30036 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
30038 case IX86_BUILTIN_CMPPD:
30039 case IX86_BUILTIN_CMPPS:
30040 case IX86_BUILTIN_CMPPD256:
30041 case IX86_BUILTIN_CMPPS256:
30042 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
30045 case IX86_BUILTIN_CMPEQPD:
30046 case IX86_BUILTIN_CMPEQPS:
30047 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
30048 && initializer_zerop (gimple_call_arg (def_stmt,
30059 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
30064 if (fcode == IX86_BUILTIN_GATHERDIV8SF
30065 || fcode == IX86_BUILTIN_GATHERDIV8SI)
30067 enum machine_mode tmode = GET_MODE (subtarget) == V8SFmode
30068 ? V4SFmode : V4SImode;
30069 if (target == NULL_RTX)
30070 target = gen_reg_rtx (tmode);
30071 if (tmode == V4SFmode)
30072 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
30074 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
30077 target = subtarget;
30085 for (i = 0, d = bdesc_special_args;
30086 i < ARRAY_SIZE (bdesc_special_args);
30088 if (d->code == fcode)
30089 return ix86_expand_special_args_builtin (d, exp, target);
30091 for (i = 0, d = bdesc_args;
30092 i < ARRAY_SIZE (bdesc_args);
30094 if (d->code == fcode)
30097 case IX86_BUILTIN_FABSQ:
30098 case IX86_BUILTIN_COPYSIGNQ:
30100 /* Emit a normal call if SSE2 isn't available. */
30101 return expand_call (exp, target, ignore);
30103 return ix86_expand_args_builtin (d, exp, target);
30106 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30107 if (d->code == fcode)
30108 return ix86_expand_sse_comi (d, exp, target);
30110 for (i = 0, d = bdesc_pcmpestr;
30111 i < ARRAY_SIZE (bdesc_pcmpestr);
30113 if (d->code == fcode)
30114 return ix86_expand_sse_pcmpestr (d, exp, target);
30116 for (i = 0, d = bdesc_pcmpistr;
30117 i < ARRAY_SIZE (bdesc_pcmpistr);
30119 if (d->code == fcode)
30120 return ix86_expand_sse_pcmpistr (d, exp, target);
30122 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
30123 if (d->code == fcode)
30124 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
30125 (enum ix86_builtin_func_type)
30126 d->flag, d->comparison);
30128 gcc_unreachable ();
30131 /* Returns a function decl for a vectorized version of the builtin function
30132 with builtin function code FN and the result vector type TYPE, or NULL_TREE
30133 if it is not available. */
30136 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
30139 enum machine_mode in_mode, out_mode;
30141 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
30143 if (TREE_CODE (type_out) != VECTOR_TYPE
30144 || TREE_CODE (type_in) != VECTOR_TYPE
30145 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
30148 out_mode = TYPE_MODE (TREE_TYPE (type_out));
30149 out_n = TYPE_VECTOR_SUBPARTS (type_out);
30150 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30151 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30155 case BUILT_IN_SQRT:
30156 if (out_mode == DFmode && in_mode == DFmode)
30158 if (out_n == 2 && in_n == 2)
30159 return ix86_builtins[IX86_BUILTIN_SQRTPD];
30160 else if (out_n == 4 && in_n == 4)
30161 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
30165 case BUILT_IN_SQRTF:
30166 if (out_mode == SFmode && in_mode == SFmode)
30168 if (out_n == 4 && in_n == 4)
30169 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
30170 else if (out_n == 8 && in_n == 8)
30171 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
30175 case BUILT_IN_IFLOOR:
30176 case BUILT_IN_LFLOOR:
30177 case BUILT_IN_LLFLOOR:
30178 /* The round insn does not trap on denormals. */
30179 if (flag_trapping_math || !TARGET_ROUND)
30182 if (out_mode == SImode && in_mode == DFmode)
30184 if (out_n == 4 && in_n == 2)
30185 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX];
30186 else if (out_n == 8 && in_n == 4)
30187 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256];
30191 case BUILT_IN_IFLOORF:
30192 case BUILT_IN_LFLOORF:
30193 case BUILT_IN_LLFLOORF:
30194 /* The round insn does not trap on denormals. */
30195 if (flag_trapping_math || !TARGET_ROUND)
30198 if (out_mode == SImode && in_mode == SFmode)
30200 if (out_n == 4 && in_n == 4)
30201 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX];
30202 else if (out_n == 8 && in_n == 8)
30203 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX256];
30207 case BUILT_IN_ICEIL:
30208 case BUILT_IN_LCEIL:
30209 case BUILT_IN_LLCEIL:
30210 /* The round insn does not trap on denormals. */
30211 if (flag_trapping_math || !TARGET_ROUND)
30214 if (out_mode == SImode && in_mode == DFmode)
30216 if (out_n == 4 && in_n == 2)
30217 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX];
30218 else if (out_n == 8 && in_n == 4)
30219 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256];
30223 case BUILT_IN_ICEILF:
30224 case BUILT_IN_LCEILF:
30225 case BUILT_IN_LLCEILF:
30226 /* The round insn does not trap on denormals. */
30227 if (flag_trapping_math || !TARGET_ROUND)
30230 if (out_mode == SImode && in_mode == SFmode)
30232 if (out_n == 4 && in_n == 4)
30233 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX];
30234 else if (out_n == 8 && in_n == 8)
30235 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX256];
30239 case BUILT_IN_IRINT:
30240 case BUILT_IN_LRINT:
30241 case BUILT_IN_LLRINT:
30242 if (out_mode == SImode && in_mode == DFmode)
30244 if (out_n == 4 && in_n == 2)
30245 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
30246 else if (out_n == 8 && in_n == 4)
30247 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX256];
30251 case BUILT_IN_IRINTF:
30252 case BUILT_IN_LRINTF:
30253 case BUILT_IN_LLRINTF:
30254 if (out_mode == SImode && in_mode == SFmode)
30256 if (out_n == 4 && in_n == 4)
30257 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
30258 else if (out_n == 8 && in_n == 8)
30259 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
30263 case BUILT_IN_IROUND:
30264 case BUILT_IN_LROUND:
30265 case BUILT_IN_LLROUND:
30266 /* The round insn does not trap on denormals. */
30267 if (flag_trapping_math || !TARGET_ROUND)
30270 if (out_mode == SImode && in_mode == DFmode)
30272 if (out_n == 4 && in_n == 2)
30273 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX];
30274 else if (out_n == 8 && in_n == 4)
30275 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256];
30279 case BUILT_IN_IROUNDF:
30280 case BUILT_IN_LROUNDF:
30281 case BUILT_IN_LLROUNDF:
30282 /* The round insn does not trap on denormals. */
30283 if (flag_trapping_math || !TARGET_ROUND)
30286 if (out_mode == SImode && in_mode == SFmode)
30288 if (out_n == 4 && in_n == 4)
30289 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX];
30290 else if (out_n == 8 && in_n == 8)
30291 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX256];
30295 case BUILT_IN_COPYSIGN:
30296 if (out_mode == DFmode && in_mode == DFmode)
30298 if (out_n == 2 && in_n == 2)
30299 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
30300 else if (out_n == 4 && in_n == 4)
30301 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
30305 case BUILT_IN_COPYSIGNF:
30306 if (out_mode == SFmode && in_mode == SFmode)
30308 if (out_n == 4 && in_n == 4)
30309 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
30310 else if (out_n == 8 && in_n == 8)
30311 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
30315 case BUILT_IN_FLOOR:
30316 /* The round insn does not trap on denormals. */
30317 if (flag_trapping_math || !TARGET_ROUND)
30320 if (out_mode == DFmode && in_mode == DFmode)
30322 if (out_n == 2 && in_n == 2)
30323 return ix86_builtins[IX86_BUILTIN_FLOORPD];
30324 else if (out_n == 4 && in_n == 4)
30325 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
30329 case BUILT_IN_FLOORF:
30330 /* The round insn does not trap on denormals. */
30331 if (flag_trapping_math || !TARGET_ROUND)
30334 if (out_mode == SFmode && in_mode == SFmode)
30336 if (out_n == 4 && in_n == 4)
30337 return ix86_builtins[IX86_BUILTIN_FLOORPS];
30338 else if (out_n == 8 && in_n == 8)
30339 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
30343 case BUILT_IN_CEIL:
30344 /* The round insn does not trap on denormals. */
30345 if (flag_trapping_math || !TARGET_ROUND)
30348 if (out_mode == DFmode && in_mode == DFmode)
30350 if (out_n == 2 && in_n == 2)
30351 return ix86_builtins[IX86_BUILTIN_CEILPD];
30352 else if (out_n == 4 && in_n == 4)
30353 return ix86_builtins[IX86_BUILTIN_CEILPD256];
30357 case BUILT_IN_CEILF:
30358 /* The round insn does not trap on denormals. */
30359 if (flag_trapping_math || !TARGET_ROUND)
30362 if (out_mode == SFmode && in_mode == SFmode)
30364 if (out_n == 4 && in_n == 4)
30365 return ix86_builtins[IX86_BUILTIN_CEILPS];
30366 else if (out_n == 8 && in_n == 8)
30367 return ix86_builtins[IX86_BUILTIN_CEILPS256];
30371 case BUILT_IN_TRUNC:
30372 /* The round insn does not trap on denormals. */
30373 if (flag_trapping_math || !TARGET_ROUND)
30376 if (out_mode == DFmode && in_mode == DFmode)
30378 if (out_n == 2 && in_n == 2)
30379 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
30380 else if (out_n == 4 && in_n == 4)
30381 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
30385 case BUILT_IN_TRUNCF:
30386 /* The round insn does not trap on denormals. */
30387 if (flag_trapping_math || !TARGET_ROUND)
30390 if (out_mode == SFmode && in_mode == SFmode)
30392 if (out_n == 4 && in_n == 4)
30393 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
30394 else if (out_n == 8 && in_n == 8)
30395 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
30399 case BUILT_IN_RINT:
30400 /* The round insn does not trap on denormals. */
30401 if (flag_trapping_math || !TARGET_ROUND)
30404 if (out_mode == DFmode && in_mode == DFmode)
30406 if (out_n == 2 && in_n == 2)
30407 return ix86_builtins[IX86_BUILTIN_RINTPD];
30408 else if (out_n == 4 && in_n == 4)
30409 return ix86_builtins[IX86_BUILTIN_RINTPD256];
30413 case BUILT_IN_RINTF:
30414 /* The round insn does not trap on denormals. */
30415 if (flag_trapping_math || !TARGET_ROUND)
30418 if (out_mode == SFmode && in_mode == SFmode)
30420 if (out_n == 4 && in_n == 4)
30421 return ix86_builtins[IX86_BUILTIN_RINTPS];
30422 else if (out_n == 8 && in_n == 8)
30423 return ix86_builtins[IX86_BUILTIN_RINTPS256];
30427 case BUILT_IN_ROUND:
30428 /* The round insn does not trap on denormals. */
30429 if (flag_trapping_math || !TARGET_ROUND)
30432 if (out_mode == DFmode && in_mode == DFmode)
30434 if (out_n == 2 && in_n == 2)
30435 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
30436 else if (out_n == 4 && in_n == 4)
30437 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
30441 case BUILT_IN_ROUNDF:
30442 /* The round insn does not trap on denormals. */
30443 if (flag_trapping_math || !TARGET_ROUND)
30446 if (out_mode == SFmode && in_mode == SFmode)
30448 if (out_n == 4 && in_n == 4)
30449 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
30450 else if (out_n == 8 && in_n == 8)
30451 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
30456 if (out_mode == DFmode && in_mode == DFmode)
30458 if (out_n == 2 && in_n == 2)
30459 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
30460 if (out_n == 4 && in_n == 4)
30461 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
30465 case BUILT_IN_FMAF:
30466 if (out_mode == SFmode && in_mode == SFmode)
30468 if (out_n == 4 && in_n == 4)
30469 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
30470 if (out_n == 8 && in_n == 8)
30471 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
30479 /* Dispatch to a handler for a vectorization library. */
30480 if (ix86_veclib_handler)
30481 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
30487 /* Handler for an SVML-style interface to
30488 a library with vectorized intrinsics. */
30491 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
30494 tree fntype, new_fndecl, args;
30497 enum machine_mode el_mode, in_mode;
30500 /* The SVML is suitable for unsafe math only. */
30501 if (!flag_unsafe_math_optimizations)
30504 el_mode = TYPE_MODE (TREE_TYPE (type_out));
30505 n = TYPE_VECTOR_SUBPARTS (type_out);
30506 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30507 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30508 if (el_mode != in_mode
30516 case BUILT_IN_LOG10:
30518 case BUILT_IN_TANH:
30520 case BUILT_IN_ATAN:
30521 case BUILT_IN_ATAN2:
30522 case BUILT_IN_ATANH:
30523 case BUILT_IN_CBRT:
30524 case BUILT_IN_SINH:
30526 case BUILT_IN_ASINH:
30527 case BUILT_IN_ASIN:
30528 case BUILT_IN_COSH:
30530 case BUILT_IN_ACOSH:
30531 case BUILT_IN_ACOS:
30532 if (el_mode != DFmode || n != 2)
30536 case BUILT_IN_EXPF:
30537 case BUILT_IN_LOGF:
30538 case BUILT_IN_LOG10F:
30539 case BUILT_IN_POWF:
30540 case BUILT_IN_TANHF:
30541 case BUILT_IN_TANF:
30542 case BUILT_IN_ATANF:
30543 case BUILT_IN_ATAN2F:
30544 case BUILT_IN_ATANHF:
30545 case BUILT_IN_CBRTF:
30546 case BUILT_IN_SINHF:
30547 case BUILT_IN_SINF:
30548 case BUILT_IN_ASINHF:
30549 case BUILT_IN_ASINF:
30550 case BUILT_IN_COSHF:
30551 case BUILT_IN_COSF:
30552 case BUILT_IN_ACOSHF:
30553 case BUILT_IN_ACOSF:
30554 if (el_mode != SFmode || n != 4)
30562 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
30564 if (fn == BUILT_IN_LOGF)
30565 strcpy (name, "vmlsLn4");
30566 else if (fn == BUILT_IN_LOG)
30567 strcpy (name, "vmldLn2");
30570 sprintf (name, "vmls%s", bname+10);
30571 name[strlen (name)-1] = '4';
30574 sprintf (name, "vmld%s2", bname+10);
30576 /* Convert to uppercase. */
30580 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
30582 args = TREE_CHAIN (args))
30586 fntype = build_function_type_list (type_out, type_in, NULL);
30588 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
30590 /* Build a function declaration for the vectorized function. */
30591 new_fndecl = build_decl (BUILTINS_LOCATION,
30592 FUNCTION_DECL, get_identifier (name), fntype);
30593 TREE_PUBLIC (new_fndecl) = 1;
30594 DECL_EXTERNAL (new_fndecl) = 1;
30595 DECL_IS_NOVOPS (new_fndecl) = 1;
30596 TREE_READONLY (new_fndecl) = 1;
30601 /* Handler for an ACML-style interface to
30602 a library with vectorized intrinsics. */
30605 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
30607 char name[20] = "__vr.._";
30608 tree fntype, new_fndecl, args;
30611 enum machine_mode el_mode, in_mode;
30614 /* The ACML is 64bits only and suitable for unsafe math only as
30615 it does not correctly support parts of IEEE with the required
30616 precision such as denormals. */
30618 || !flag_unsafe_math_optimizations)
30621 el_mode = TYPE_MODE (TREE_TYPE (type_out));
30622 n = TYPE_VECTOR_SUBPARTS (type_out);
30623 in_mode = TYPE_MODE (TREE_TYPE (type_in));
30624 in_n = TYPE_VECTOR_SUBPARTS (type_in);
30625 if (el_mode != in_mode
30635 case BUILT_IN_LOG2:
30636 case BUILT_IN_LOG10:
30639 if (el_mode != DFmode
30644 case BUILT_IN_SINF:
30645 case BUILT_IN_COSF:
30646 case BUILT_IN_EXPF:
30647 case BUILT_IN_POWF:
30648 case BUILT_IN_LOGF:
30649 case BUILT_IN_LOG2F:
30650 case BUILT_IN_LOG10F:
30653 if (el_mode != SFmode
30662 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
30663 sprintf (name + 7, "%s", bname+10);
30666 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
30668 args = TREE_CHAIN (args))
30672 fntype = build_function_type_list (type_out, type_in, NULL);
30674 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
30676 /* Build a function declaration for the vectorized function. */
30677 new_fndecl = build_decl (BUILTINS_LOCATION,
30678 FUNCTION_DECL, get_identifier (name), fntype);
30679 TREE_PUBLIC (new_fndecl) = 1;
30680 DECL_EXTERNAL (new_fndecl) = 1;
30681 DECL_IS_NOVOPS (new_fndecl) = 1;
30682 TREE_READONLY (new_fndecl) = 1;
30687 /* Returns a decl of a function that implements gather load with
30688 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
30689 Return NULL_TREE if it is not available. */
30692 ix86_vectorize_builtin_gather (const_tree mem_vectype,
30693 const_tree index_type, int scale)
30696 enum ix86_builtins code;
30701 if ((TREE_CODE (index_type) != INTEGER_TYPE
30702 && !POINTER_TYPE_P (index_type))
30703 || (TYPE_MODE (index_type) != SImode
30704 && TYPE_MODE (index_type) != DImode))
30707 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
30710 /* v*gather* insn sign extends index to pointer mode. */
30711 if (TYPE_PRECISION (index_type) < POINTER_SIZE
30712 && TYPE_UNSIGNED (index_type))
30717 || (scale & (scale - 1)) != 0)
30720 si = TYPE_MODE (index_type) == SImode;
30721 switch (TYPE_MODE (mem_vectype))
30724 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
30727 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
30730 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
30733 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
30736 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
30739 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
30742 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
30745 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
30751 return ix86_builtins[code];
30754 /* Returns a code for a target-specific builtin that implements
30755 reciprocal of the function, or NULL_TREE if not available. */
30758 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
30759 bool sqrt ATTRIBUTE_UNUSED)
30761 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
30762 && flag_finite_math_only && !flag_trapping_math
30763 && flag_unsafe_math_optimizations))
30767 /* Machine dependent builtins. */
30770 /* Vectorized version of sqrt to rsqrt conversion. */
30771 case IX86_BUILTIN_SQRTPS_NR:
30772 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
30774 case IX86_BUILTIN_SQRTPS_NR256:
30775 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
30781 /* Normal builtins. */
30784 /* Sqrt to rsqrt conversion. */
30785 case BUILT_IN_SQRTF:
30786 return ix86_builtins[IX86_BUILTIN_RSQRTF];
30793 /* Helper for avx_vpermilps256_operand et al. This is also used by
30794 the expansion functions to turn the parallel back into a mask.
30795 The return value is 0 for no match and the imm8+1 for a match. */
30798 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
30800 unsigned i, nelt = GET_MODE_NUNITS (mode);
30802 unsigned char ipar[8];
30804 if (XVECLEN (par, 0) != (int) nelt)
30807 /* Validate that all of the elements are constants, and not totally
30808 out of range. Copy the data into an integral array to make the
30809 subsequent checks easier. */
30810 for (i = 0; i < nelt; ++i)
30812 rtx er = XVECEXP (par, 0, i);
30813 unsigned HOST_WIDE_INT ei;
30815 if (!CONST_INT_P (er))
30826 /* In the 256-bit DFmode case, we can only move elements within
30828 for (i = 0; i < 2; ++i)
30832 mask |= ipar[i] << i;
30834 for (i = 2; i < 4; ++i)
30838 mask |= (ipar[i] - 2) << i;
30843 /* In the 256-bit SFmode case, we have full freedom of movement
30844 within the low 128-bit lane, but the high 128-bit lane must
30845 mirror the exact same pattern. */
30846 for (i = 0; i < 4; ++i)
30847 if (ipar[i] + 4 != ipar[i + 4])
30854 /* In the 128-bit case, we've full freedom in the placement of
30855 the elements from the source operand. */
30856 for (i = 0; i < nelt; ++i)
30857 mask |= ipar[i] << (i * (nelt / 2));
30861 gcc_unreachable ();
30864 /* Make sure success has a non-zero value by adding one. */
30868 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
30869 the expansion functions to turn the parallel back into a mask.
30870 The return value is 0 for no match and the imm8+1 for a match. */
30873 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
30875 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
30877 unsigned char ipar[8];
30879 if (XVECLEN (par, 0) != (int) nelt)
30882 /* Validate that all of the elements are constants, and not totally
30883 out of range. Copy the data into an integral array to make the
30884 subsequent checks easier. */
30885 for (i = 0; i < nelt; ++i)
30887 rtx er = XVECEXP (par, 0, i);
30888 unsigned HOST_WIDE_INT ei;
30890 if (!CONST_INT_P (er))
30893 if (ei >= 2 * nelt)
30898 /* Validate that the halves of the permute are halves. */
30899 for (i = 0; i < nelt2 - 1; ++i)
30900 if (ipar[i] + 1 != ipar[i + 1])
30902 for (i = nelt2; i < nelt - 1; ++i)
30903 if (ipar[i] + 1 != ipar[i + 1])
30906 /* Reconstruct the mask. */
30907 for (i = 0; i < 2; ++i)
30909 unsigned e = ipar[i * nelt2];
30913 mask |= e << (i * 4);
30916 /* Make sure success has a non-zero value by adding one. */
30920 /* Store OPERAND to the memory after reload is completed. This means
30921 that we can't easily use assign_stack_local. */
30923 ix86_force_to_memory (enum machine_mode mode, rtx operand)
30927 gcc_assert (reload_completed);
30928 if (ix86_using_red_zone ())
30930 result = gen_rtx_MEM (mode,
30931 gen_rtx_PLUS (Pmode,
30933 GEN_INT (-RED_ZONE_SIZE)));
30934 emit_move_insn (result, operand);
30936 else if (TARGET_64BIT)
30942 operand = gen_lowpart (DImode, operand);
30946 gen_rtx_SET (VOIDmode,
30947 gen_rtx_MEM (DImode,
30948 gen_rtx_PRE_DEC (DImode,
30949 stack_pointer_rtx)),
30953 gcc_unreachable ();
30955 result = gen_rtx_MEM (mode, stack_pointer_rtx);
30964 split_double_mode (mode, &operand, 1, operands, operands + 1);
30966 gen_rtx_SET (VOIDmode,
30967 gen_rtx_MEM (SImode,
30968 gen_rtx_PRE_DEC (Pmode,
30969 stack_pointer_rtx)),
30972 gen_rtx_SET (VOIDmode,
30973 gen_rtx_MEM (SImode,
30974 gen_rtx_PRE_DEC (Pmode,
30975 stack_pointer_rtx)),
30980 /* Store HImodes as SImodes. */
30981 operand = gen_lowpart (SImode, operand);
30985 gen_rtx_SET (VOIDmode,
30986 gen_rtx_MEM (GET_MODE (operand),
30987 gen_rtx_PRE_DEC (SImode,
30988 stack_pointer_rtx)),
30992 gcc_unreachable ();
30994 result = gen_rtx_MEM (mode, stack_pointer_rtx);
30999 /* Free operand from the memory. */
31001 ix86_free_from_memory (enum machine_mode mode)
31003 if (!ix86_using_red_zone ())
31007 if (mode == DImode || TARGET_64BIT)
31011 /* Use LEA to deallocate stack space. In peephole2 it will be converted
31012 to pop or add instruction if registers are available. */
31013 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
31014 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
31019 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
31021 Put float CONST_DOUBLE in the constant pool instead of fp regs.
31022 QImode must go into class Q_REGS.
31023 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
31024 movdf to do mem-to-mem moves through integer regs. */
31027 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
31029 enum machine_mode mode = GET_MODE (x);
31031 /* We're only allowed to return a subclass of CLASS. Many of the
31032 following checks fail for NO_REGS, so eliminate that early. */
31033 if (regclass == NO_REGS)
31036 /* All classes can load zeros. */
31037 if (x == CONST0_RTX (mode))
31040 /* Force constants into memory if we are loading a (nonzero) constant into
31041 an MMX or SSE register. This is because there are no MMX/SSE instructions
31042 to load from a constant. */
31044 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
31047 /* Prefer SSE regs only, if we can use them for math. */
31048 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
31049 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
31051 /* Floating-point constants need more complex checks. */
31052 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
31054 /* General regs can load everything. */
31055 if (reg_class_subset_p (regclass, GENERAL_REGS))
31058 /* Floats can load 0 and 1 plus some others. Note that we eliminated
31059 zero above. We only want to wind up preferring 80387 registers if
31060 we plan on doing computation with them. */
31062 && standard_80387_constant_p (x) > 0)
31064 /* Limit class to non-sse. */
31065 if (regclass == FLOAT_SSE_REGS)
31067 if (regclass == FP_TOP_SSE_REGS)
31069 if (regclass == FP_SECOND_SSE_REGS)
31070 return FP_SECOND_REG;
31071 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
31078 /* Generally when we see PLUS here, it's the function invariant
31079 (plus soft-fp const_int). Which can only be computed into general
31081 if (GET_CODE (x) == PLUS)
31082 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
31084 /* QImode constants are easy to load, but non-constant QImode data
31085 must go into Q_REGS. */
31086 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
31088 if (reg_class_subset_p (regclass, Q_REGS))
31090 if (reg_class_subset_p (Q_REGS, regclass))
31098 /* Discourage putting floating-point values in SSE registers unless
31099 SSE math is being used, and likewise for the 387 registers. */
31101 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
31103 enum machine_mode mode = GET_MODE (x);
31105 /* Restrict the output reload class to the register bank that we are doing
31106 math on. If we would like not to return a subset of CLASS, reject this
31107 alternative: if reload cannot do this, it will still use its choice. */
31108 mode = GET_MODE (x);
31109 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
31110 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
31112 if (X87_FLOAT_MODE_P (mode))
31114 if (regclass == FP_TOP_SSE_REGS)
31116 else if (regclass == FP_SECOND_SSE_REGS)
31117 return FP_SECOND_REG;
31119 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
31126 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
31127 enum machine_mode mode, secondary_reload_info *sri)
31129 /* Double-word spills from general registers to non-offsettable memory
31130 references (zero-extended addresses) require special handling. */
31133 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
31134 && rclass == GENERAL_REGS
31135 && !offsettable_memref_p (x))
31138 ? CODE_FOR_reload_noff_load
31139 : CODE_FOR_reload_noff_store);
31140 /* Add the cost of moving address to a temporary. */
31141 sri->extra_cost = 1;
31146 /* QImode spills from non-QI registers require
31147 intermediate register on 32bit targets. */
31149 && !in_p && mode == QImode
31150 && (rclass == GENERAL_REGS
31151 || rclass == LEGACY_REGS
31152 || rclass == INDEX_REGS))
31161 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
31162 regno = true_regnum (x);
31164 /* Return Q_REGS if the operand is in memory. */
31169 /* This condition handles corner case where an expression involving
31170 pointers gets vectorized. We're trying to use the address of a
31171 stack slot as a vector initializer.
31173 (set (reg:V2DI 74 [ vect_cst_.2 ])
31174 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
31176 Eventually frame gets turned into sp+offset like this:
31178 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31179 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
31180 (const_int 392 [0x188]))))
31182 That later gets turned into:
31184 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31185 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
31186 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
31188 We'll have the following reload recorded:
31190 Reload 0: reload_in (DI) =
31191 (plus:DI (reg/f:DI 7 sp)
31192 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
31193 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31194 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
31195 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
31196 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31197 reload_reg_rtx: (reg:V2DI 22 xmm1)
31199 Which isn't going to work since SSE instructions can't handle scalar
31200 additions. Returning GENERAL_REGS forces the addition into integer
31201 register and reload can handle subsequent reloads without problems. */
31203 if (in_p && GET_CODE (x) == PLUS
31204 && SSE_CLASS_P (rclass)
31205 && SCALAR_INT_MODE_P (mode))
31206 return GENERAL_REGS;
31211 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
31214 ix86_class_likely_spilled_p (reg_class_t rclass)
31225 case SSE_FIRST_REG:
31227 case FP_SECOND_REG:
31237 /* If we are copying between general and FP registers, we need a memory
31238 location. The same is true for SSE and MMX registers.
31240 To optimize register_move_cost performance, allow inline variant.
31242 The macro can't work reliably when one of the CLASSES is class containing
31243 registers from multiple units (SSE, MMX, integer). We avoid this by never
31244 combining those units in single alternative in the machine description.
31245 Ensure that this constraint holds to avoid unexpected surprises.
31247 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
31248 enforce these sanity checks. */
31251 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
31252 enum machine_mode mode, int strict)
31254 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
31255 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
31256 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
31257 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
31258 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
31259 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
31261 gcc_assert (!strict);
31265 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
31268 /* ??? This is a lie. We do have moves between mmx/general, and for
31269 mmx/sse2. But by saying we need secondary memory we discourage the
31270 register allocator from using the mmx registers unless needed. */
31271 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
31274 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
31276 /* SSE1 doesn't have any direct moves from other classes. */
31280 /* If the target says that inter-unit moves are more expensive
31281 than moving through memory, then don't generate them. */
31282 if (!TARGET_INTER_UNIT_MOVES)
31285 /* Between SSE and general, we have moves no larger than word size. */
31286 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
31294 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
31295 enum machine_mode mode, int strict)
31297 return inline_secondary_memory_needed (class1, class2, mode, strict);
31300 /* Implement the TARGET_CLASS_MAX_NREGS hook.
31302 On the 80386, this is the size of MODE in words,
31303 except in the FP regs, where a single reg is always enough. */
31305 static unsigned char
31306 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
31308 if (MAYBE_INTEGER_CLASS_P (rclass))
31310 if (mode == XFmode)
31311 return (TARGET_64BIT ? 2 : 3);
31312 else if (mode == XCmode)
31313 return (TARGET_64BIT ? 4 : 6);
31315 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
31319 if (COMPLEX_MODE_P (mode))
31326 /* Return true if the registers in CLASS cannot represent the change from
31327 modes FROM to TO. */
31330 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
31331 enum reg_class regclass)
31336 /* x87 registers can't do subreg at all, as all values are reformatted
31337 to extended precision. */
31338 if (MAYBE_FLOAT_CLASS_P (regclass))
31341 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
31343 /* Vector registers do not support QI or HImode loads. If we don't
31344 disallow a change to these modes, reload will assume it's ok to
31345 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
31346 the vec_dupv4hi pattern. */
31347 if (GET_MODE_SIZE (from) < 4)
31350 /* Vector registers do not support subreg with nonzero offsets, which
31351 are otherwise valid for integer registers. Since we can't see
31352 whether we have a nonzero offset from here, prohibit all
31353 nonparadoxical subregs changing size. */
31354 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
31361 /* Return the cost of moving data of mode M between a
31362 register and memory. A value of 2 is the default; this cost is
31363 relative to those in `REGISTER_MOVE_COST'.
31365 This function is used extensively by register_move_cost that is used to
31366 build tables at startup. Make it inline in this case.
31367 When IN is 2, return maximum of in and out move cost.
31369 If moving between registers and memory is more expensive than
31370 between two registers, you should define this macro to express the
31373 Model also increased moving costs of QImode registers in non
31377 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
31381 if (FLOAT_CLASS_P (regclass))
31399 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
31400 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
31402 if (SSE_CLASS_P (regclass))
31405 switch (GET_MODE_SIZE (mode))
31420 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
31421 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
31423 if (MMX_CLASS_P (regclass))
31426 switch (GET_MODE_SIZE (mode))
31438 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
31439 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
31441 switch (GET_MODE_SIZE (mode))
31444 if (Q_CLASS_P (regclass) || TARGET_64BIT)
31447 return ix86_cost->int_store[0];
31448 if (TARGET_PARTIAL_REG_DEPENDENCY
31449 && optimize_function_for_speed_p (cfun))
31450 cost = ix86_cost->movzbl_load;
31452 cost = ix86_cost->int_load[0];
31454 return MAX (cost, ix86_cost->int_store[0]);
31460 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
31462 return ix86_cost->movzbl_load;
31464 return ix86_cost->int_store[0] + 4;
31469 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
31470 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
31472 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
31473 if (mode == TFmode)
31476 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
31478 cost = ix86_cost->int_load[2];
31480 cost = ix86_cost->int_store[2];
31481 return (cost * (((int) GET_MODE_SIZE (mode)
31482 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
31487 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
31490 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
31494 /* Return the cost of moving data from a register in class CLASS1 to
31495 one in class CLASS2.
31497 It is not required that the cost always equal 2 when FROM is the same as TO;
31498 on some machines it is expensive to move between registers if they are not
31499 general registers. */
31502 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
31503 reg_class_t class2_i)
31505 enum reg_class class1 = (enum reg_class) class1_i;
31506 enum reg_class class2 = (enum reg_class) class2_i;
31508 /* In case we require secondary memory, compute cost of the store followed
31509 by load. In order to avoid bad register allocation choices, we need
31510 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
31512 if (inline_secondary_memory_needed (class1, class2, mode, 0))
31516 cost += inline_memory_move_cost (mode, class1, 2);
31517 cost += inline_memory_move_cost (mode, class2, 2);
31519 /* In case of copying from general_purpose_register we may emit multiple
31520 stores followed by single load causing memory size mismatch stall.
31521 Count this as arbitrarily high cost of 20. */
31522 if (targetm.class_max_nregs (class1, mode)
31523 > targetm.class_max_nregs (class2, mode))
31526 /* In the case of FP/MMX moves, the registers actually overlap, and we
31527 have to switch modes in order to treat them differently. */
31528 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
31529 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
31535 /* Moves between SSE/MMX and integer unit are expensive. */
31536 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
31537 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
31539 /* ??? By keeping returned value relatively high, we limit the number
31540 of moves between integer and MMX/SSE registers for all targets.
31541 Additionally, high value prevents problem with x86_modes_tieable_p(),
31542 where integer modes in MMX/SSE registers are not tieable
31543 because of missing QImode and HImode moves to, from or between
31544 MMX/SSE registers. */
31545 return MAX (8, ix86_cost->mmxsse_to_integer);
31547 if (MAYBE_FLOAT_CLASS_P (class1))
31548 return ix86_cost->fp_move;
31549 if (MAYBE_SSE_CLASS_P (class1))
31550 return ix86_cost->sse_move;
31551 if (MAYBE_MMX_CLASS_P (class1))
31552 return ix86_cost->mmx_move;
31556 /* Return TRUE if hard register REGNO can hold a value of machine-mode
31560 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
31562 /* Flags and only flags can only hold CCmode values. */
31563 if (CC_REGNO_P (regno))
31564 return GET_MODE_CLASS (mode) == MODE_CC;
31565 if (GET_MODE_CLASS (mode) == MODE_CC
31566 || GET_MODE_CLASS (mode) == MODE_RANDOM
31567 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
31569 if (FP_REGNO_P (regno))
31570 return VALID_FP_MODE_P (mode);
31571 if (SSE_REGNO_P (regno))
31573 /* We implement the move patterns for all vector modes into and
31574 out of SSE registers, even when no operation instructions
31575 are available. OImode move is available only when AVX is
31577 return ((TARGET_AVX && mode == OImode)
31578 || VALID_AVX256_REG_MODE (mode)
31579 || VALID_SSE_REG_MODE (mode)
31580 || VALID_SSE2_REG_MODE (mode)
31581 || VALID_MMX_REG_MODE (mode)
31582 || VALID_MMX_REG_MODE_3DNOW (mode));
31584 if (MMX_REGNO_P (regno))
31586 /* We implement the move patterns for 3DNOW modes even in MMX mode,
31587 so if the register is available at all, then we can move data of
31588 the given mode into or out of it. */
31589 return (VALID_MMX_REG_MODE (mode)
31590 || VALID_MMX_REG_MODE_3DNOW (mode));
31593 if (mode == QImode)
31595 /* Take care for QImode values - they can be in non-QI regs,
31596 but then they do cause partial register stalls. */
31597 if (regno <= BX_REG || TARGET_64BIT)
31599 if (!TARGET_PARTIAL_REG_STALL)
31601 return !can_create_pseudo_p ();
31603 /* We handle both integer and floats in the general purpose registers. */
31604 else if (VALID_INT_MODE_P (mode))
31606 else if (VALID_FP_MODE_P (mode))
31608 else if (VALID_DFP_MODE_P (mode))
31610 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
31611 on to use that value in smaller contexts, this can easily force a
31612 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
31613 supporting DImode, allow it. */
31614 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
31620 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
31621 tieable integer mode. */
31624 ix86_tieable_integer_mode_p (enum machine_mode mode)
31633 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
31636 return TARGET_64BIT;
31643 /* Return true if MODE1 is accessible in a register that can hold MODE2
31644 without copying. That is, all register classes that can hold MODE2
31645 can also hold MODE1. */
31648 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
31650 if (mode1 == mode2)
31653 if (ix86_tieable_integer_mode_p (mode1)
31654 && ix86_tieable_integer_mode_p (mode2))
31657 /* MODE2 being XFmode implies fp stack or general regs, which means we
31658 can tie any smaller floating point modes to it. Note that we do not
31659 tie this with TFmode. */
31660 if (mode2 == XFmode)
31661 return mode1 == SFmode || mode1 == DFmode;
31663 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
31664 that we can tie it with SFmode. */
31665 if (mode2 == DFmode)
31666 return mode1 == SFmode;
31668 /* If MODE2 is only appropriate for an SSE register, then tie with
31669 any other mode acceptable to SSE registers. */
31670 if (GET_MODE_SIZE (mode2) == 16
31671 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
31672 return (GET_MODE_SIZE (mode1) == 16
31673 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
31675 /* If MODE2 is appropriate for an MMX register, then tie
31676 with any other mode acceptable to MMX registers. */
31677 if (GET_MODE_SIZE (mode2) == 8
31678 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
31679 return (GET_MODE_SIZE (mode1) == 8
31680 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
31685 /* Compute a (partial) cost for rtx X. Return true if the complete
31686 cost has been computed, and false if subexpressions should be
31687 scanned. In either case, *TOTAL contains the cost result. */
31690 ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
31693 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
31694 enum machine_mode mode = GET_MODE (x);
31695 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
31703 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
31705 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
31707 else if (flag_pic && SYMBOLIC_CONST (x)
31709 || (!GET_CODE (x) != LABEL_REF
31710 && (GET_CODE (x) != SYMBOL_REF
31711 || !SYMBOL_REF_LOCAL_P (x)))))
31718 if (mode == VOIDmode)
31721 switch (standard_80387_constant_p (x))
31726 default: /* Other constants */
31731 /* Start with (MEM (SYMBOL_REF)), since that's where
31732 it'll probably end up. Add a penalty for size. */
31733 *total = (COSTS_N_INSNS (1)
31734 + (flag_pic != 0 && !TARGET_64BIT)
31735 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
31741 /* The zero extensions is often completely free on x86_64, so make
31742 it as cheap as possible. */
31743 if (TARGET_64BIT && mode == DImode
31744 && GET_MODE (XEXP (x, 0)) == SImode)
31746 else if (TARGET_ZERO_EXTEND_WITH_AND)
31747 *total = cost->add;
31749 *total = cost->movzx;
31753 *total = cost->movsx;
31757 if (CONST_INT_P (XEXP (x, 1))
31758 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
31760 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
31763 *total = cost->add;
31766 if ((value == 2 || value == 3)
31767 && cost->lea <= cost->shift_const)
31769 *total = cost->lea;
31779 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
31781 if (CONST_INT_P (XEXP (x, 1)))
31783 if (INTVAL (XEXP (x, 1)) > 32)
31784 *total = cost->shift_const + COSTS_N_INSNS (2);
31786 *total = cost->shift_const * 2;
31790 if (GET_CODE (XEXP (x, 1)) == AND)
31791 *total = cost->shift_var * 2;
31793 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
31798 if (CONST_INT_P (XEXP (x, 1)))
31799 *total = cost->shift_const;
31801 *total = cost->shift_var;
31809 gcc_assert (FLOAT_MODE_P (mode));
31810 gcc_assert (TARGET_FMA || TARGET_FMA4);
31812 /* ??? SSE scalar/vector cost should be used here. */
31813 /* ??? Bald assumption that fma has the same cost as fmul. */
31814 *total = cost->fmul;
31815 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
31817 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
31819 if (GET_CODE (sub) == NEG)
31820 sub = XEXP (sub, 0);
31821 *total += rtx_cost (sub, FMA, 0, speed);
31824 if (GET_CODE (sub) == NEG)
31825 sub = XEXP (sub, 0);
31826 *total += rtx_cost (sub, FMA, 2, speed);
31831 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31833 /* ??? SSE scalar cost should be used here. */
31834 *total = cost->fmul;
31837 else if (X87_FLOAT_MODE_P (mode))
31839 *total = cost->fmul;
31842 else if (FLOAT_MODE_P (mode))
31844 /* ??? SSE vector cost should be used here. */
31845 *total = cost->fmul;
31850 rtx op0 = XEXP (x, 0);
31851 rtx op1 = XEXP (x, 1);
31853 if (CONST_INT_P (XEXP (x, 1)))
31855 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
31856 for (nbits = 0; value != 0; value &= value - 1)
31860 /* This is arbitrary. */
31863 /* Compute costs correctly for widening multiplication. */
31864 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
31865 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
31866 == GET_MODE_SIZE (mode))
31868 int is_mulwiden = 0;
31869 enum machine_mode inner_mode = GET_MODE (op0);
31871 if (GET_CODE (op0) == GET_CODE (op1))
31872 is_mulwiden = 1, op1 = XEXP (op1, 0);
31873 else if (CONST_INT_P (op1))
31875 if (GET_CODE (op0) == SIGN_EXTEND)
31876 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
31879 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
31883 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
31886 *total = (cost->mult_init[MODE_INDEX (mode)]
31887 + nbits * cost->mult_bit
31888 + rtx_cost (op0, outer_code, opno, speed)
31889 + rtx_cost (op1, outer_code, opno, speed));
31898 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31899 /* ??? SSE cost should be used here. */
31900 *total = cost->fdiv;
31901 else if (X87_FLOAT_MODE_P (mode))
31902 *total = cost->fdiv;
31903 else if (FLOAT_MODE_P (mode))
31904 /* ??? SSE vector cost should be used here. */
31905 *total = cost->fdiv;
31907 *total = cost->divide[MODE_INDEX (mode)];
31911 if (GET_MODE_CLASS (mode) == MODE_INT
31912 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
31914 if (GET_CODE (XEXP (x, 0)) == PLUS
31915 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
31916 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
31917 && CONSTANT_P (XEXP (x, 1)))
31919 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
31920 if (val == 2 || val == 4 || val == 8)
31922 *total = cost->lea;
31923 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
31924 outer_code, opno, speed);
31925 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
31926 outer_code, opno, speed);
31927 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31931 else if (GET_CODE (XEXP (x, 0)) == MULT
31932 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
31934 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
31935 if (val == 2 || val == 4 || val == 8)
31937 *total = cost->lea;
31938 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
31939 outer_code, opno, speed);
31940 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31944 else if (GET_CODE (XEXP (x, 0)) == PLUS)
31946 *total = cost->lea;
31947 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
31948 outer_code, opno, speed);
31949 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
31950 outer_code, opno, speed);
31951 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
31958 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31960 /* ??? SSE cost should be used here. */
31961 *total = cost->fadd;
31964 else if (X87_FLOAT_MODE_P (mode))
31966 *total = cost->fadd;
31969 else if (FLOAT_MODE_P (mode))
31971 /* ??? SSE vector cost should be used here. */
31972 *total = cost->fadd;
31980 if (!TARGET_64BIT && mode == DImode)
31982 *total = (cost->add * 2
31983 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
31984 << (GET_MODE (XEXP (x, 0)) != DImode))
31985 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
31986 << (GET_MODE (XEXP (x, 1)) != DImode)));
31992 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
31994 /* ??? SSE cost should be used here. */
31995 *total = cost->fchs;
31998 else if (X87_FLOAT_MODE_P (mode))
32000 *total = cost->fchs;
32003 else if (FLOAT_MODE_P (mode))
32005 /* ??? SSE vector cost should be used here. */
32006 *total = cost->fchs;
32012 if (!TARGET_64BIT && mode == DImode)
32013 *total = cost->add * 2;
32015 *total = cost->add;
32019 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
32020 && XEXP (XEXP (x, 0), 1) == const1_rtx
32021 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
32022 && XEXP (x, 1) == const0_rtx)
32024 /* This kind of construct is implemented using test[bwl].
32025 Treat it as if we had an AND. */
32026 *total = (cost->add
32027 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
32028 + rtx_cost (const1_rtx, outer_code, opno, speed));
32034 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
32039 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
32040 /* ??? SSE cost should be used here. */
32041 *total = cost->fabs;
32042 else if (X87_FLOAT_MODE_P (mode))
32043 *total = cost->fabs;
32044 else if (FLOAT_MODE_P (mode))
32045 /* ??? SSE vector cost should be used here. */
32046 *total = cost->fabs;
32050 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
32051 /* ??? SSE cost should be used here. */
32052 *total = cost->fsqrt;
32053 else if (X87_FLOAT_MODE_P (mode))
32054 *total = cost->fsqrt;
32055 else if (FLOAT_MODE_P (mode))
32056 /* ??? SSE vector cost should be used here. */
32057 *total = cost->fsqrt;
32061 if (XINT (x, 1) == UNSPEC_TP)
32068 case VEC_DUPLICATE:
32069 /* ??? Assume all of these vector manipulation patterns are
32070 recognizable. In which case they all pretty much have the
32072 *total = COSTS_N_INSNS (1);
32082 static int current_machopic_label_num;
32084 /* Given a symbol name and its associated stub, write out the
32085 definition of the stub. */
32088 machopic_output_stub (FILE *file, const char *symb, const char *stub)
32090 unsigned int length;
32091 char *binder_name, *symbol_name, lazy_ptr_name[32];
32092 int label = ++current_machopic_label_num;
32094 /* For 64-bit we shouldn't get here. */
32095 gcc_assert (!TARGET_64BIT);
32097 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
32098 symb = targetm.strip_name_encoding (symb);
32100 length = strlen (stub);
32101 binder_name = XALLOCAVEC (char, length + 32);
32102 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
32104 length = strlen (symb);
32105 symbol_name = XALLOCAVEC (char, length + 32);
32106 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
32108 sprintf (lazy_ptr_name, "L%d$lz", label);
32110 if (MACHOPIC_ATT_STUB)
32111 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
32112 else if (MACHOPIC_PURE)
32113 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
32115 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
32117 fprintf (file, "%s:\n", stub);
32118 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
32120 if (MACHOPIC_ATT_STUB)
32122 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
32124 else if (MACHOPIC_PURE)
32127 /* 25-byte PIC stub using "CALL get_pc_thunk". */
32128 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
32129 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
32130 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
32131 label, lazy_ptr_name, label);
32132 fprintf (file, "\tjmp\t*%%ecx\n");
32135 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
32137 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
32138 it needs no stub-binding-helper. */
32139 if (MACHOPIC_ATT_STUB)
32142 fprintf (file, "%s:\n", binder_name);
32146 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
32147 fprintf (file, "\tpushl\t%%ecx\n");
32150 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
32152 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
32154 /* N.B. Keep the correspondence of these
32155 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
32156 old-pic/new-pic/non-pic stubs; altering this will break
32157 compatibility with existing dylibs. */
32160 /* 25-byte PIC stub using "CALL get_pc_thunk". */
32161 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
32164 /* 16-byte -mdynamic-no-pic stub. */
32165 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
32167 fprintf (file, "%s:\n", lazy_ptr_name);
32168 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
32169 fprintf (file, ASM_LONG "%s\n", binder_name);
32171 #endif /* TARGET_MACHO */
32173 /* Order the registers for register allocator. */
32176 x86_order_regs_for_local_alloc (void)
32181 /* First allocate the local general purpose registers. */
32182 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
32183 if (GENERAL_REGNO_P (i) && call_used_regs[i])
32184 reg_alloc_order [pos++] = i;
32186 /* Global general purpose registers. */
32187 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
32188 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
32189 reg_alloc_order [pos++] = i;
32191 /* x87 registers come first in case we are doing FP math
32193 if (!TARGET_SSE_MATH)
32194 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
32195 reg_alloc_order [pos++] = i;
32197 /* SSE registers. */
32198 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
32199 reg_alloc_order [pos++] = i;
32200 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
32201 reg_alloc_order [pos++] = i;
32203 /* x87 registers. */
32204 if (TARGET_SSE_MATH)
32205 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
32206 reg_alloc_order [pos++] = i;
32208 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
32209 reg_alloc_order [pos++] = i;
32211 /* Initialize the rest of array as we do not allocate some registers
32213 while (pos < FIRST_PSEUDO_REGISTER)
32214 reg_alloc_order [pos++] = 0;
32217 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
32218 in struct attribute_spec handler. */
32220 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
32222 int flags ATTRIBUTE_UNUSED,
32223 bool *no_add_attrs)
32225 if (TREE_CODE (*node) != FUNCTION_TYPE
32226 && TREE_CODE (*node) != METHOD_TYPE
32227 && TREE_CODE (*node) != FIELD_DECL
32228 && TREE_CODE (*node) != TYPE_DECL)
32230 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32232 *no_add_attrs = true;
32237 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
32239 *no_add_attrs = true;
32242 if (is_attribute_p ("callee_pop_aggregate_return", name))
32246 cst = TREE_VALUE (args);
32247 if (TREE_CODE (cst) != INTEGER_CST)
32249 warning (OPT_Wattributes,
32250 "%qE attribute requires an integer constant argument",
32252 *no_add_attrs = true;
32254 else if (compare_tree_int (cst, 0) != 0
32255 && compare_tree_int (cst, 1) != 0)
32257 warning (OPT_Wattributes,
32258 "argument to %qE attribute is neither zero, nor one",
32260 *no_add_attrs = true;
32269 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
32270 struct attribute_spec.handler. */
32272 ix86_handle_abi_attribute (tree *node, tree name,
32273 tree args ATTRIBUTE_UNUSED,
32274 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32276 if (TREE_CODE (*node) != FUNCTION_TYPE
32277 && TREE_CODE (*node) != METHOD_TYPE
32278 && TREE_CODE (*node) != FIELD_DECL
32279 && TREE_CODE (*node) != TYPE_DECL)
32281 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32283 *no_add_attrs = true;
32287 /* Can combine regparm with all attributes but fastcall. */
32288 if (is_attribute_p ("ms_abi", name))
32290 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
32292 error ("ms_abi and sysv_abi attributes are not compatible");
32297 else if (is_attribute_p ("sysv_abi", name))
32299 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
32301 error ("ms_abi and sysv_abi attributes are not compatible");
32310 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32311 struct attribute_spec.handler. */
32313 ix86_handle_struct_attribute (tree *node, tree name,
32314 tree args ATTRIBUTE_UNUSED,
32315 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32318 if (DECL_P (*node))
32320 if (TREE_CODE (*node) == TYPE_DECL)
32321 type = &TREE_TYPE (*node);
32326 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
32327 || TREE_CODE (*type) == UNION_TYPE)))
32329 warning (OPT_Wattributes, "%qE attribute ignored",
32331 *no_add_attrs = true;
32334 else if ((is_attribute_p ("ms_struct", name)
32335 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
32336 || ((is_attribute_p ("gcc_struct", name)
32337 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
32339 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
32341 *no_add_attrs = true;
32348 ix86_handle_fndecl_attribute (tree *node, tree name,
32349 tree args ATTRIBUTE_UNUSED,
32350 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32352 if (TREE_CODE (*node) != FUNCTION_DECL)
32354 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32356 *no_add_attrs = true;
32362 ix86_ms_bitfield_layout_p (const_tree record_type)
32364 return ((TARGET_MS_BITFIELD_LAYOUT
32365 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
32366 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
32369 /* Returns an expression indicating where the this parameter is
32370 located on entry to the FUNCTION. */
32373 x86_this_parameter (tree function)
32375 tree type = TREE_TYPE (function);
32376 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
32381 const int *parm_regs;
32383 if (ix86_function_type_abi (type) == MS_ABI)
32384 parm_regs = x86_64_ms_abi_int_parameter_registers;
32386 parm_regs = x86_64_int_parameter_registers;
32387 return gen_rtx_REG (DImode, parm_regs[aggr]);
32390 nregs = ix86_function_regparm (type, function);
32392 if (nregs > 0 && !stdarg_p (type))
32395 unsigned int ccvt = ix86_get_callcvt (type);
32397 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
32398 regno = aggr ? DX_REG : CX_REG;
32399 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
32403 return gen_rtx_MEM (SImode,
32404 plus_constant (stack_pointer_rtx, 4));
32413 return gen_rtx_MEM (SImode,
32414 plus_constant (stack_pointer_rtx, 4));
32417 return gen_rtx_REG (SImode, regno);
32420 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
32423 /* Determine whether x86_output_mi_thunk can succeed. */
32426 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
32427 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
32428 HOST_WIDE_INT vcall_offset, const_tree function)
32430 /* 64-bit can handle anything. */
32434 /* For 32-bit, everything's fine if we have one free register. */
32435 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
32438 /* Need a free register for vcall_offset. */
32442 /* Need a free register for GOT references. */
32443 if (flag_pic && !targetm.binds_local_p (function))
32446 /* Otherwise ok. */
32450 /* Output the assembler code for a thunk function. THUNK_DECL is the
32451 declaration for the thunk function itself, FUNCTION is the decl for
32452 the target function. DELTA is an immediate constant offset to be
32453 added to THIS. If VCALL_OFFSET is nonzero, the word at
32454 *(*this + vcall_offset) should be added to THIS. */
32457 x86_output_mi_thunk (FILE *file,
32458 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
32459 HOST_WIDE_INT vcall_offset, tree function)
32461 rtx this_param = x86_this_parameter (function);
32462 rtx this_reg, tmp, fnaddr;
32464 emit_note (NOTE_INSN_PROLOGUE_END);
32466 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
32467 pull it in now and let DELTA benefit. */
32468 if (REG_P (this_param))
32469 this_reg = this_param;
32470 else if (vcall_offset)
32472 /* Put the this parameter into %eax. */
32473 this_reg = gen_rtx_REG (Pmode, AX_REG);
32474 emit_move_insn (this_reg, this_param);
32477 this_reg = NULL_RTX;
32479 /* Adjust the this parameter by a fixed constant. */
32482 rtx delta_rtx = GEN_INT (delta);
32483 rtx delta_dst = this_reg ? this_reg : this_param;
32487 if (!x86_64_general_operand (delta_rtx, Pmode))
32489 tmp = gen_rtx_REG (Pmode, R10_REG);
32490 emit_move_insn (tmp, delta_rtx);
32495 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
32498 /* Adjust the this parameter by a value stored in the vtable. */
32501 rtx vcall_addr, vcall_mem, this_mem;
32502 unsigned int tmp_regno;
32505 tmp_regno = R10_REG;
32508 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
32509 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
32510 tmp_regno = AX_REG;
32512 tmp_regno = CX_REG;
32514 tmp = gen_rtx_REG (Pmode, tmp_regno);
32516 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
32517 if (Pmode != ptr_mode)
32518 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
32519 emit_move_insn (tmp, this_mem);
32521 /* Adjust the this parameter. */
32522 vcall_addr = plus_constant (tmp, vcall_offset);
32524 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
32526 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
32527 emit_move_insn (tmp2, GEN_INT (vcall_offset));
32528 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
32531 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
32532 if (Pmode != ptr_mode)
32533 emit_insn (gen_addsi_1_zext (this_reg,
32534 gen_rtx_REG (ptr_mode,
32538 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
32541 /* If necessary, drop THIS back to its stack slot. */
32542 if (this_reg && this_reg != this_param)
32543 emit_move_insn (this_param, this_reg);
32545 fnaddr = XEXP (DECL_RTL (function), 0);
32548 if (!flag_pic || targetm.binds_local_p (function)
32549 || cfun->machine->call_abi == MS_ABI)
32553 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
32554 tmp = gen_rtx_CONST (Pmode, tmp);
32555 fnaddr = gen_rtx_MEM (Pmode, tmp);
32560 if (!flag_pic || targetm.binds_local_p (function))
32563 else if (TARGET_MACHO)
32565 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
32566 fnaddr = XEXP (fnaddr, 0);
32568 #endif /* TARGET_MACHO */
32571 tmp = gen_rtx_REG (Pmode, CX_REG);
32572 output_set_got (tmp, NULL_RTX);
32574 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
32575 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
32576 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
32580 /* Our sibling call patterns do not allow memories, because we have no
32581 predicate that can distinguish between frame and non-frame memory.
32582 For our purposes here, we can get away with (ab)using a jump pattern,
32583 because we're going to do no optimization. */
32584 if (MEM_P (fnaddr))
32585 emit_jump_insn (gen_indirect_jump (fnaddr));
32588 tmp = gen_rtx_MEM (QImode, fnaddr);
32589 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
32590 tmp = emit_call_insn (tmp);
32591 SIBLING_CALL_P (tmp) = 1;
32595 /* Emit just enough of rest_of_compilation to get the insns emitted.
32596 Note that use_thunk calls assemble_start_function et al. */
32597 tmp = get_insns ();
32598 insn_locators_alloc ();
32599 shorten_branches (tmp);
32600 final_start_function (tmp, file, 1);
32601 final (tmp, file, 1);
32602 final_end_function ();
32606 x86_file_start (void)
32608 default_file_start ();
32610 darwin_file_start ();
32612 if (X86_FILE_START_VERSION_DIRECTIVE)
32613 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
32614 if (X86_FILE_START_FLTUSED)
32615 fputs ("\t.global\t__fltused\n", asm_out_file);
32616 if (ix86_asm_dialect == ASM_INTEL)
32617 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
32621 x86_field_alignment (tree field, int computed)
32623 enum machine_mode mode;
32624 tree type = TREE_TYPE (field);
32626 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
32628 mode = TYPE_MODE (strip_array_types (type));
32629 if (mode == DFmode || mode == DCmode
32630 || GET_MODE_CLASS (mode) == MODE_INT
32631 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
32632 return MIN (32, computed);
32636 /* Output assembler code to FILE to increment profiler label # LABELNO
32637 for profiling a function entry. */
32639 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
32641 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
32646 #ifndef NO_PROFILE_COUNTERS
32647 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
32650 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
32651 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
32653 fprintf (file, "\tcall\t%s\n", mcount_name);
32657 #ifndef NO_PROFILE_COUNTERS
32658 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
32661 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
32665 #ifndef NO_PROFILE_COUNTERS
32666 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
32669 fprintf (file, "\tcall\t%s\n", mcount_name);
32673 /* We don't have exact information about the insn sizes, but we may assume
32674 quite safely that we are informed about all 1 byte insns and memory
32675 address sizes. This is enough to eliminate unnecessary padding in
32679 min_insn_size (rtx insn)
32683 if (!INSN_P (insn) || !active_insn_p (insn))
32686 /* Discard alignments we've emit and jump instructions. */
32687 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
32688 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
32690 if (JUMP_TABLE_DATA_P (insn))
32693 /* Important case - calls are always 5 bytes.
32694 It is common to have many calls in the row. */
32696 && symbolic_reference_mentioned_p (PATTERN (insn))
32697 && !SIBLING_CALL_P (insn))
32699 len = get_attr_length (insn);
32703 /* For normal instructions we rely on get_attr_length being exact,
32704 with a few exceptions. */
32705 if (!JUMP_P (insn))
32707 enum attr_type type = get_attr_type (insn);
32712 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
32713 || asm_noperands (PATTERN (insn)) >= 0)
32720 /* Otherwise trust get_attr_length. */
32724 l = get_attr_length_address (insn);
32725 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
32734 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32736 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
32740 ix86_avoid_jump_mispredicts (void)
32742 rtx insn, start = get_insns ();
32743 int nbytes = 0, njumps = 0;
32746 /* Look for all minimal intervals of instructions containing 4 jumps.
32747 The intervals are bounded by START and INSN. NBYTES is the total
32748 size of instructions in the interval including INSN and not including
32749 START. When the NBYTES is smaller than 16 bytes, it is possible
32750 that the end of START and INSN ends up in the same 16byte page.
32752 The smallest offset in the page INSN can start is the case where START
32753 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
32754 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
32756 for (insn = start; insn; insn = NEXT_INSN (insn))
32760 if (LABEL_P (insn))
32762 int align = label_to_alignment (insn);
32763 int max_skip = label_to_max_skip (insn);
32767 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
32768 already in the current 16 byte page, because otherwise
32769 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
32770 bytes to reach 16 byte boundary. */
32772 || (align <= 3 && max_skip != (1 << align) - 1))
32775 fprintf (dump_file, "Label %i with max_skip %i\n",
32776 INSN_UID (insn), max_skip);
32779 while (nbytes + max_skip >= 16)
32781 start = NEXT_INSN (start);
32782 if ((JUMP_P (start)
32783 && GET_CODE (PATTERN (start)) != ADDR_VEC
32784 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
32786 njumps--, isjump = 1;
32789 nbytes -= min_insn_size (start);
32795 min_size = min_insn_size (insn);
32796 nbytes += min_size;
32798 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
32799 INSN_UID (insn), min_size);
32801 && GET_CODE (PATTERN (insn)) != ADDR_VEC
32802 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
32810 start = NEXT_INSN (start);
32811 if ((JUMP_P (start)
32812 && GET_CODE (PATTERN (start)) != ADDR_VEC
32813 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
32815 njumps--, isjump = 1;
32818 nbytes -= min_insn_size (start);
32820 gcc_assert (njumps >= 0);
32822 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
32823 INSN_UID (start), INSN_UID (insn), nbytes);
32825 if (njumps == 3 && isjump && nbytes < 16)
32827 int padsize = 15 - nbytes + min_insn_size (insn);
32830 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
32831 INSN_UID (insn), padsize);
32832 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
32838 /* AMD Athlon works faster
32839 when RET is not destination of conditional jump or directly preceded
32840 by other jump instruction. We avoid the penalty by inserting NOP just
32841 before the RET instructions in such cases. */
32843 ix86_pad_returns (void)
32848 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
32850 basic_block bb = e->src;
32851 rtx ret = BB_END (bb);
32853 bool replace = false;
32855 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
32856 || optimize_bb_for_size_p (bb))
32858 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
32859 if (active_insn_p (prev) || LABEL_P (prev))
32861 if (prev && LABEL_P (prev))
32866 FOR_EACH_EDGE (e, ei, bb->preds)
32867 if (EDGE_FREQUENCY (e) && e->src->index >= 0
32868 && !(e->flags & EDGE_FALLTHRU))
32873 prev = prev_active_insn (ret);
32875 && ((JUMP_P (prev) && any_condjump_p (prev))
32878 /* Empty functions get branch mispredict even when
32879 the jump destination is not visible to us. */
32880 if (!prev && !optimize_function_for_size_p (cfun))
32885 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
32891 /* Count the minimum number of instructions in BB. Return 4 if the
32892 number of instructions >= 4. */
32895 ix86_count_insn_bb (basic_block bb)
32898 int insn_count = 0;
32900 /* Count number of instructions in this block. Return 4 if the number
32901 of instructions >= 4. */
32902 FOR_BB_INSNS (bb, insn)
32904 /* Only happen in exit blocks. */
32906 && ANY_RETURN_P (PATTERN (insn)))
32909 if (NONDEBUG_INSN_P (insn)
32910 && GET_CODE (PATTERN (insn)) != USE
32911 && GET_CODE (PATTERN (insn)) != CLOBBER)
32914 if (insn_count >= 4)
32923 /* Count the minimum number of instructions in code path in BB.
32924 Return 4 if the number of instructions >= 4. */
32927 ix86_count_insn (basic_block bb)
32931 int min_prev_count;
32933 /* Only bother counting instructions along paths with no
32934 more than 2 basic blocks between entry and exit. Given
32935 that BB has an edge to exit, determine if a predecessor
32936 of BB has an edge from entry. If so, compute the number
32937 of instructions in the predecessor block. If there
32938 happen to be multiple such blocks, compute the minimum. */
32939 min_prev_count = 4;
32940 FOR_EACH_EDGE (e, ei, bb->preds)
32943 edge_iterator prev_ei;
32945 if (e->src == ENTRY_BLOCK_PTR)
32947 min_prev_count = 0;
32950 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
32952 if (prev_e->src == ENTRY_BLOCK_PTR)
32954 int count = ix86_count_insn_bb (e->src);
32955 if (count < min_prev_count)
32956 min_prev_count = count;
32962 if (min_prev_count < 4)
32963 min_prev_count += ix86_count_insn_bb (bb);
32965 return min_prev_count;
32968 /* Pad short funtion to 4 instructions. */
32971 ix86_pad_short_function (void)
32976 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
32978 rtx ret = BB_END (e->src);
32979 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
32981 int insn_count = ix86_count_insn (e->src);
32983 /* Pad short function. */
32984 if (insn_count < 4)
32988 /* Find epilogue. */
32991 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
32992 insn = PREV_INSN (insn);
32997 /* Two NOPs count as one instruction. */
32998 insn_count = 2 * (4 - insn_count);
32999 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
33005 /* Implement machine specific optimizations. We implement padding of returns
33006 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
33010 /* We are freeing block_for_insn in the toplev to keep compatibility
33011 with old MDEP_REORGS that are not CFG based. Recompute it now. */
33012 compute_bb_for_insn ();
33014 /* Run the vzeroupper optimization if needed. */
33015 if (TARGET_VZEROUPPER)
33016 move_or_delete_vzeroupper ();
33018 if (optimize && optimize_function_for_speed_p (cfun))
33020 if (TARGET_PAD_SHORT_FUNCTION)
33021 ix86_pad_short_function ();
33022 else if (TARGET_PAD_RETURNS)
33023 ix86_pad_returns ();
33024 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
33025 if (TARGET_FOUR_JUMP_LIMIT)
33026 ix86_avoid_jump_mispredicts ();
33031 /* Return nonzero when QImode register that must be represented via REX prefix
33034 x86_extended_QIreg_mentioned_p (rtx insn)
33037 extract_insn_cached (insn);
33038 for (i = 0; i < recog_data.n_operands; i++)
33039 if (REG_P (recog_data.operand[i])
33040 && REGNO (recog_data.operand[i]) > BX_REG)
33045 /* Return nonzero when P points to register encoded via REX prefix.
33046 Called via for_each_rtx. */
33048 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
33050 unsigned int regno;
33053 regno = REGNO (*p);
33054 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
33057 /* Return true when INSN mentions register that must be encoded using REX
33060 x86_extended_reg_mentioned_p (rtx insn)
33062 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
33063 extended_reg_mentioned_1, NULL);
33066 /* If profitable, negate (without causing overflow) integer constant
33067 of mode MODE at location LOC. Return true in this case. */
33069 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
33073 if (!CONST_INT_P (*loc))
33079 /* DImode x86_64 constants must fit in 32 bits. */
33080 gcc_assert (x86_64_immediate_operand (*loc, mode));
33091 gcc_unreachable ();
33094 /* Avoid overflows. */
33095 if (mode_signbit_p (mode, *loc))
33098 val = INTVAL (*loc);
33100 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
33101 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
33102 if ((val < 0 && val != -128)
33105 *loc = GEN_INT (-val);
33112 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
33113 optabs would emit if we didn't have TFmode patterns. */
33116 x86_emit_floatuns (rtx operands[2])
33118 rtx neglab, donelab, i0, i1, f0, in, out;
33119 enum machine_mode mode, inmode;
33121 inmode = GET_MODE (operands[1]);
33122 gcc_assert (inmode == SImode || inmode == DImode);
33125 in = force_reg (inmode, operands[1]);
33126 mode = GET_MODE (out);
33127 neglab = gen_label_rtx ();
33128 donelab = gen_label_rtx ();
33129 f0 = gen_reg_rtx (mode);
33131 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
33133 expand_float (out, in, 0);
33135 emit_jump_insn (gen_jump (donelab));
33138 emit_label (neglab);
33140 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
33142 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
33144 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
33146 expand_float (f0, i0, 0);
33148 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
33150 emit_label (donelab);
33153 /* AVX2 does support 32-byte integer vector operations,
33154 thus the longest vector we are faced with is V32QImode. */
33155 #define MAX_VECT_LEN 32
33157 struct expand_vec_perm_d
33159 rtx target, op0, op1;
33160 unsigned char perm[MAX_VECT_LEN];
33161 enum machine_mode vmode;
33162 unsigned char nelt;
33166 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
33167 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
33169 /* Get a vector mode of the same size as the original but with elements
33170 twice as wide. This is only guaranteed to apply to integral vectors. */
33172 static inline enum machine_mode
33173 get_mode_wider_vector (enum machine_mode o)
33175 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
33176 enum machine_mode n = GET_MODE_WIDER_MODE (o);
33177 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
33178 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
33182 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33183 with all elements equal to VAR. Return true if successful. */
33186 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
33187 rtx target, rtx val)
33210 /* First attempt to recognize VAL as-is. */
33211 dup = gen_rtx_VEC_DUPLICATE (mode, val);
33212 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
33213 if (recog_memoized (insn) < 0)
33216 /* If that fails, force VAL into a register. */
33219 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
33220 seq = get_insns ();
33223 emit_insn_before (seq, insn);
33225 ok = recog_memoized (insn) >= 0;
33234 if (TARGET_SSE || TARGET_3DNOW_A)
33238 val = gen_lowpart (SImode, val);
33239 x = gen_rtx_TRUNCATE (HImode, val);
33240 x = gen_rtx_VEC_DUPLICATE (mode, x);
33241 emit_insn (gen_rtx_SET (VOIDmode, target, x));
33254 struct expand_vec_perm_d dperm;
33258 memset (&dperm, 0, sizeof (dperm));
33259 dperm.target = target;
33260 dperm.vmode = mode;
33261 dperm.nelt = GET_MODE_NUNITS (mode);
33262 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
33264 /* Extend to SImode using a paradoxical SUBREG. */
33265 tmp1 = gen_reg_rtx (SImode);
33266 emit_move_insn (tmp1, gen_lowpart (SImode, val));
33268 /* Insert the SImode value as low element of a V4SImode vector. */
33269 tmp2 = gen_lowpart (V4SImode, dperm.op0);
33270 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
33272 ok = (expand_vec_perm_1 (&dperm)
33273 || expand_vec_perm_broadcast_1 (&dperm));
33285 /* Replicate the value once into the next wider mode and recurse. */
33287 enum machine_mode smode, wsmode, wvmode;
33290 smode = GET_MODE_INNER (mode);
33291 wvmode = get_mode_wider_vector (mode);
33292 wsmode = GET_MODE_INNER (wvmode);
33294 val = convert_modes (wsmode, smode, val, true);
33295 x = expand_simple_binop (wsmode, ASHIFT, val,
33296 GEN_INT (GET_MODE_BITSIZE (smode)),
33297 NULL_RTX, 1, OPTAB_LIB_WIDEN);
33298 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
33300 x = gen_lowpart (wvmode, target);
33301 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
33309 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
33310 rtx x = gen_reg_rtx (hvmode);
33312 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
33315 x = gen_rtx_VEC_CONCAT (mode, x, x);
33316 emit_insn (gen_rtx_SET (VOIDmode, target, x));
33325 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33326 whose ONE_VAR element is VAR, and other elements are zero. Return true
33330 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
33331 rtx target, rtx var, int one_var)
33333 enum machine_mode vsimode;
33336 bool use_vector_set = false;
33341 /* For SSE4.1, we normally use vector set. But if the second
33342 element is zero and inter-unit moves are OK, we use movq
33344 use_vector_set = (TARGET_64BIT
33346 && !(TARGET_INTER_UNIT_MOVES
33352 use_vector_set = TARGET_SSE4_1;
33355 use_vector_set = TARGET_SSE2;
33358 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
33365 use_vector_set = TARGET_AVX;
33368 /* Use ix86_expand_vector_set in 64bit mode only. */
33369 use_vector_set = TARGET_AVX && TARGET_64BIT;
33375 if (use_vector_set)
33377 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
33378 var = force_reg (GET_MODE_INNER (mode), var);
33379 ix86_expand_vector_set (mmx_ok, target, var, one_var);
33395 var = force_reg (GET_MODE_INNER (mode), var);
33396 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
33397 emit_insn (gen_rtx_SET (VOIDmode, target, x));
33402 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
33403 new_target = gen_reg_rtx (mode);
33405 new_target = target;
33406 var = force_reg (GET_MODE_INNER (mode), var);
33407 x = gen_rtx_VEC_DUPLICATE (mode, var);
33408 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
33409 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
33412 /* We need to shuffle the value to the correct position, so
33413 create a new pseudo to store the intermediate result. */
33415 /* With SSE2, we can use the integer shuffle insns. */
33416 if (mode != V4SFmode && TARGET_SSE2)
33418 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
33420 GEN_INT (one_var == 1 ? 0 : 1),
33421 GEN_INT (one_var == 2 ? 0 : 1),
33422 GEN_INT (one_var == 3 ? 0 : 1)));
33423 if (target != new_target)
33424 emit_move_insn (target, new_target);
33428 /* Otherwise convert the intermediate result to V4SFmode and
33429 use the SSE1 shuffle instructions. */
33430 if (mode != V4SFmode)
33432 tmp = gen_reg_rtx (V4SFmode);
33433 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
33438 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
33440 GEN_INT (one_var == 1 ? 0 : 1),
33441 GEN_INT (one_var == 2 ? 0+4 : 1+4),
33442 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
33444 if (mode != V4SFmode)
33445 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
33446 else if (tmp != target)
33447 emit_move_insn (target, tmp);
33449 else if (target != new_target)
33450 emit_move_insn (target, new_target);
33455 vsimode = V4SImode;
33461 vsimode = V2SImode;
33467 /* Zero extend the variable element to SImode and recurse. */
33468 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
33470 x = gen_reg_rtx (vsimode);
33471 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
33473 gcc_unreachable ();
33475 emit_move_insn (target, gen_lowpart (mode, x));
33483 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33484 consisting of the values in VALS. It is known that all elements
33485 except ONE_VAR are constants. Return true if successful. */
33488 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
33489 rtx target, rtx vals, int one_var)
33491 rtx var = XVECEXP (vals, 0, one_var);
33492 enum machine_mode wmode;
33495 const_vec = copy_rtx (vals);
33496 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
33497 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
33505 /* For the two element vectors, it's just as easy to use
33506 the general case. */
33510 /* Use ix86_expand_vector_set in 64bit mode only. */
33533 /* There's no way to set one QImode entry easily. Combine
33534 the variable value with its adjacent constant value, and
33535 promote to an HImode set. */
33536 x = XVECEXP (vals, 0, one_var ^ 1);
33539 var = convert_modes (HImode, QImode, var, true);
33540 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
33541 NULL_RTX, 1, OPTAB_LIB_WIDEN);
33542 x = GEN_INT (INTVAL (x) & 0xff);
33546 var = convert_modes (HImode, QImode, var, true);
33547 x = gen_int_mode (INTVAL (x) << 8, HImode);
33549 if (x != const0_rtx)
33550 var = expand_simple_binop (HImode, IOR, var, x, var,
33551 1, OPTAB_LIB_WIDEN);
33553 x = gen_reg_rtx (wmode);
33554 emit_move_insn (x, gen_lowpart (wmode, const_vec));
33555 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
33557 emit_move_insn (target, gen_lowpart (mode, x));
33564 emit_move_insn (target, const_vec);
33565 ix86_expand_vector_set (mmx_ok, target, var, one_var);
33569 /* A subroutine of ix86_expand_vector_init_general. Use vector
33570 concatenate to handle the most general case: all values variable,
33571 and none identical. */
33574 ix86_expand_vector_init_concat (enum machine_mode mode,
33575 rtx target, rtx *ops, int n)
33577 enum machine_mode cmode, hmode = VOIDmode;
33578 rtx first[8], second[4];
33618 gcc_unreachable ();
33621 if (!register_operand (ops[1], cmode))
33622 ops[1] = force_reg (cmode, ops[1]);
33623 if (!register_operand (ops[0], cmode))
33624 ops[0] = force_reg (cmode, ops[0]);
33625 emit_insn (gen_rtx_SET (VOIDmode, target,
33626 gen_rtx_VEC_CONCAT (mode, ops[0],
33646 gcc_unreachable ();
33662 gcc_unreachable ();
33667 /* FIXME: We process inputs backward to help RA. PR 36222. */
33670 for (; i > 0; i -= 2, j--)
33672 first[j] = gen_reg_rtx (cmode);
33673 v = gen_rtvec (2, ops[i - 1], ops[i]);
33674 ix86_expand_vector_init (false, first[j],
33675 gen_rtx_PARALLEL (cmode, v));
33681 gcc_assert (hmode != VOIDmode);
33682 for (i = j = 0; i < n; i += 2, j++)
33684 second[j] = gen_reg_rtx (hmode);
33685 ix86_expand_vector_init_concat (hmode, second [j],
33689 ix86_expand_vector_init_concat (mode, target, second, n);
33692 ix86_expand_vector_init_concat (mode, target, first, n);
33696 gcc_unreachable ();
33700 /* A subroutine of ix86_expand_vector_init_general. Use vector
33701 interleave to handle the most general case: all values variable,
33702 and none identical. */
33705 ix86_expand_vector_init_interleave (enum machine_mode mode,
33706 rtx target, rtx *ops, int n)
33708 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
33711 rtx (*gen_load_even) (rtx, rtx, rtx);
33712 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
33713 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
33718 gen_load_even = gen_vec_setv8hi;
33719 gen_interleave_first_low = gen_vec_interleave_lowv4si;
33720 gen_interleave_second_low = gen_vec_interleave_lowv2di;
33721 inner_mode = HImode;
33722 first_imode = V4SImode;
33723 second_imode = V2DImode;
33724 third_imode = VOIDmode;
33727 gen_load_even = gen_vec_setv16qi;
33728 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
33729 gen_interleave_second_low = gen_vec_interleave_lowv4si;
33730 inner_mode = QImode;
33731 first_imode = V8HImode;
33732 second_imode = V4SImode;
33733 third_imode = V2DImode;
33736 gcc_unreachable ();
33739 for (i = 0; i < n; i++)
33741 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
33742 op0 = gen_reg_rtx (SImode);
33743 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
33745 /* Insert the SImode value as low element of V4SImode vector. */
33746 op1 = gen_reg_rtx (V4SImode);
33747 op0 = gen_rtx_VEC_MERGE (V4SImode,
33748 gen_rtx_VEC_DUPLICATE (V4SImode,
33750 CONST0_RTX (V4SImode),
33752 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
33754 /* Cast the V4SImode vector back to a vector in orignal mode. */
33755 op0 = gen_reg_rtx (mode);
33756 emit_move_insn (op0, gen_lowpart (mode, op1));
33758 /* Load even elements into the second positon. */
33759 emit_insn (gen_load_even (op0,
33760 force_reg (inner_mode,
33764 /* Cast vector to FIRST_IMODE vector. */
33765 ops[i] = gen_reg_rtx (first_imode);
33766 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
33769 /* Interleave low FIRST_IMODE vectors. */
33770 for (i = j = 0; i < n; i += 2, j++)
33772 op0 = gen_reg_rtx (first_imode);
33773 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
33775 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
33776 ops[j] = gen_reg_rtx (second_imode);
33777 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
33780 /* Interleave low SECOND_IMODE vectors. */
33781 switch (second_imode)
33784 for (i = j = 0; i < n / 2; i += 2, j++)
33786 op0 = gen_reg_rtx (second_imode);
33787 emit_insn (gen_interleave_second_low (op0, ops[i],
33790 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
33792 ops[j] = gen_reg_rtx (third_imode);
33793 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
33795 second_imode = V2DImode;
33796 gen_interleave_second_low = gen_vec_interleave_lowv2di;
33800 op0 = gen_reg_rtx (second_imode);
33801 emit_insn (gen_interleave_second_low (op0, ops[0],
33804 /* Cast the SECOND_IMODE vector back to a vector on original
33806 emit_insn (gen_rtx_SET (VOIDmode, target,
33807 gen_lowpart (mode, op0)));
33811 gcc_unreachable ();
33815 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
33816 all values variable, and none identical. */
33819 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
33820 rtx target, rtx vals)
33822 rtx ops[32], op0, op1;
33823 enum machine_mode half_mode = VOIDmode;
33830 if (!mmx_ok && !TARGET_SSE)
33842 n = GET_MODE_NUNITS (mode);
33843 for (i = 0; i < n; i++)
33844 ops[i] = XVECEXP (vals, 0, i);
33845 ix86_expand_vector_init_concat (mode, target, ops, n);
33849 half_mode = V16QImode;
33853 half_mode = V8HImode;
33857 n = GET_MODE_NUNITS (mode);
33858 for (i = 0; i < n; i++)
33859 ops[i] = XVECEXP (vals, 0, i);
33860 op0 = gen_reg_rtx (half_mode);
33861 op1 = gen_reg_rtx (half_mode);
33862 ix86_expand_vector_init_interleave (half_mode, op0, ops,
33864 ix86_expand_vector_init_interleave (half_mode, op1,
33865 &ops [n >> 1], n >> 2);
33866 emit_insn (gen_rtx_SET (VOIDmode, target,
33867 gen_rtx_VEC_CONCAT (mode, op0, op1)));
33871 if (!TARGET_SSE4_1)
33879 /* Don't use ix86_expand_vector_init_interleave if we can't
33880 move from GPR to SSE register directly. */
33881 if (!TARGET_INTER_UNIT_MOVES)
33884 n = GET_MODE_NUNITS (mode);
33885 for (i = 0; i < n; i++)
33886 ops[i] = XVECEXP (vals, 0, i);
33887 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
33895 gcc_unreachable ();
33899 int i, j, n_elts, n_words, n_elt_per_word;
33900 enum machine_mode inner_mode;
33901 rtx words[4], shift;
33903 inner_mode = GET_MODE_INNER (mode);
33904 n_elts = GET_MODE_NUNITS (mode);
33905 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
33906 n_elt_per_word = n_elts / n_words;
33907 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
33909 for (i = 0; i < n_words; ++i)
33911 rtx word = NULL_RTX;
33913 for (j = 0; j < n_elt_per_word; ++j)
33915 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
33916 elt = convert_modes (word_mode, inner_mode, elt, true);
33922 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
33923 word, 1, OPTAB_LIB_WIDEN);
33924 word = expand_simple_binop (word_mode, IOR, word, elt,
33925 word, 1, OPTAB_LIB_WIDEN);
33933 emit_move_insn (target, gen_lowpart (mode, words[0]));
33934 else if (n_words == 2)
33936 rtx tmp = gen_reg_rtx (mode);
33937 emit_clobber (tmp);
33938 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
33939 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
33940 emit_move_insn (target, tmp);
33942 else if (n_words == 4)
33944 rtx tmp = gen_reg_rtx (V4SImode);
33945 gcc_assert (word_mode == SImode);
33946 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
33947 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
33948 emit_move_insn (target, gen_lowpart (mode, tmp));
33951 gcc_unreachable ();
33955 /* Initialize vector TARGET via VALS. Suppress the use of MMX
33956 instructions unless MMX_OK is true. */
33959 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
33961 enum machine_mode mode = GET_MODE (target);
33962 enum machine_mode inner_mode = GET_MODE_INNER (mode);
33963 int n_elts = GET_MODE_NUNITS (mode);
33964 int n_var = 0, one_var = -1;
33965 bool all_same = true, all_const_zero = true;
33969 for (i = 0; i < n_elts; ++i)
33971 x = XVECEXP (vals, 0, i);
33972 if (!(CONST_INT_P (x)
33973 || GET_CODE (x) == CONST_DOUBLE
33974 || GET_CODE (x) == CONST_FIXED))
33975 n_var++, one_var = i;
33976 else if (x != CONST0_RTX (inner_mode))
33977 all_const_zero = false;
33978 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
33982 /* Constants are best loaded from the constant pool. */
33985 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
33989 /* If all values are identical, broadcast the value. */
33991 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
33992 XVECEXP (vals, 0, 0)))
33995 /* Values where only one field is non-constant are best loaded from
33996 the pool and overwritten via move later. */
34000 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
34001 XVECEXP (vals, 0, one_var),
34005 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
34009 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
34013 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
34015 enum machine_mode mode = GET_MODE (target);
34016 enum machine_mode inner_mode = GET_MODE_INNER (mode);
34017 enum machine_mode half_mode;
34018 bool use_vec_merge = false;
34020 static rtx (*gen_extract[6][2]) (rtx, rtx)
34022 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
34023 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
34024 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
34025 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
34026 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
34027 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
34029 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
34031 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
34032 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
34033 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
34034 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
34035 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
34036 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
34046 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
34047 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
34049 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
34051 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
34052 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
34058 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
34062 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
34063 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
34065 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
34067 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
34068 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
34075 /* For the two element vectors, we implement a VEC_CONCAT with
34076 the extraction of the other element. */
34078 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
34079 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
34082 op0 = val, op1 = tmp;
34084 op0 = tmp, op1 = val;
34086 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
34087 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
34092 use_vec_merge = TARGET_SSE4_1;
34099 use_vec_merge = true;
34103 /* tmp = target = A B C D */
34104 tmp = copy_to_reg (target);
34105 /* target = A A B B */
34106 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
34107 /* target = X A B B */
34108 ix86_expand_vector_set (false, target, val, 0);
34109 /* target = A X C D */
34110 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
34111 const1_rtx, const0_rtx,
34112 GEN_INT (2+4), GEN_INT (3+4)));
34116 /* tmp = target = A B C D */
34117 tmp = copy_to_reg (target);
34118 /* tmp = X B C D */
34119 ix86_expand_vector_set (false, tmp, val, 0);
34120 /* target = A B X D */
34121 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
34122 const0_rtx, const1_rtx,
34123 GEN_INT (0+4), GEN_INT (3+4)));
34127 /* tmp = target = A B C D */
34128 tmp = copy_to_reg (target);
34129 /* tmp = X B C D */
34130 ix86_expand_vector_set (false, tmp, val, 0);
34131 /* target = A B X D */
34132 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
34133 const0_rtx, const1_rtx,
34134 GEN_INT (2+4), GEN_INT (0+4)));
34138 gcc_unreachable ();
34143 use_vec_merge = TARGET_SSE4_1;
34147 /* Element 0 handled by vec_merge below. */
34150 use_vec_merge = true;
34156 /* With SSE2, use integer shuffles to swap element 0 and ELT,
34157 store into element 0, then shuffle them back. */
34161 order[0] = GEN_INT (elt);
34162 order[1] = const1_rtx;
34163 order[2] = const2_rtx;
34164 order[3] = GEN_INT (3);
34165 order[elt] = const0_rtx;
34167 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
34168 order[1], order[2], order[3]));
34170 ix86_expand_vector_set (false, target, val, 0);
34172 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
34173 order[1], order[2], order[3]));
34177 /* For SSE1, we have to reuse the V4SF code. */
34178 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
34179 gen_lowpart (SFmode, val), elt);
34184 use_vec_merge = TARGET_SSE2;
34187 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
34191 use_vec_merge = TARGET_SSE4_1;
34198 half_mode = V16QImode;
34204 half_mode = V8HImode;
34210 half_mode = V4SImode;
34216 half_mode = V2DImode;
34222 half_mode = V4SFmode;
34228 half_mode = V2DFmode;
34234 /* Compute offset. */
34238 gcc_assert (i <= 1);
34240 /* Extract the half. */
34241 tmp = gen_reg_rtx (half_mode);
34242 emit_insn (gen_extract[j][i] (tmp, target));
34244 /* Put val in tmp at elt. */
34245 ix86_expand_vector_set (false, tmp, val, elt);
34248 emit_insn (gen_insert[j][i] (target, target, tmp));
34257 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
34258 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
34259 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
34263 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
34265 emit_move_insn (mem, target);
34267 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
34268 emit_move_insn (tmp, val);
34270 emit_move_insn (target, mem);
34275 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
34277 enum machine_mode mode = GET_MODE (vec);
34278 enum machine_mode inner_mode = GET_MODE_INNER (mode);
34279 bool use_vec_extr = false;
34292 use_vec_extr = true;
34296 use_vec_extr = TARGET_SSE4_1;
34308 tmp = gen_reg_rtx (mode);
34309 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
34310 GEN_INT (elt), GEN_INT (elt),
34311 GEN_INT (elt+4), GEN_INT (elt+4)));
34315 tmp = gen_reg_rtx (mode);
34316 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
34320 gcc_unreachable ();
34323 use_vec_extr = true;
34328 use_vec_extr = TARGET_SSE4_1;
34342 tmp = gen_reg_rtx (mode);
34343 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
34344 GEN_INT (elt), GEN_INT (elt),
34345 GEN_INT (elt), GEN_INT (elt)));
34349 tmp = gen_reg_rtx (mode);
34350 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
34354 gcc_unreachable ();
34357 use_vec_extr = true;
34362 /* For SSE1, we have to reuse the V4SF code. */
34363 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
34364 gen_lowpart (V4SFmode, vec), elt);
34370 use_vec_extr = TARGET_SSE2;
34373 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
34377 use_vec_extr = TARGET_SSE4_1;
34383 tmp = gen_reg_rtx (V4SFmode);
34385 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
34387 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
34388 ix86_expand_vector_extract (false, target, tmp, elt & 3);
34396 tmp = gen_reg_rtx (V2DFmode);
34398 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
34400 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
34401 ix86_expand_vector_extract (false, target, tmp, elt & 1);
34409 tmp = gen_reg_rtx (V16QImode);
34411 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
34413 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
34414 ix86_expand_vector_extract (false, target, tmp, elt & 15);
34422 tmp = gen_reg_rtx (V8HImode);
34424 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
34426 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
34427 ix86_expand_vector_extract (false, target, tmp, elt & 7);
34435 tmp = gen_reg_rtx (V4SImode);
34437 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
34439 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
34440 ix86_expand_vector_extract (false, target, tmp, elt & 3);
34448 tmp = gen_reg_rtx (V2DImode);
34450 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
34452 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
34453 ix86_expand_vector_extract (false, target, tmp, elt & 1);
34459 /* ??? Could extract the appropriate HImode element and shift. */
34466 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
34467 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
34469 /* Let the rtl optimizers know about the zero extension performed. */
34470 if (inner_mode == QImode || inner_mode == HImode)
34472 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
34473 target = gen_lowpart (SImode, target);
34476 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
34480 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
34482 emit_move_insn (mem, vec);
34484 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
34485 emit_move_insn (target, tmp);
34489 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
34490 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
34491 The upper bits of DEST are undefined, though they shouldn't cause
34492 exceptions (some bits from src or all zeros are ok). */
34495 emit_reduc_half (rtx dest, rtx src, int i)
34498 switch (GET_MODE (src))
34502 tem = gen_sse_movhlps (dest, src, src);
34504 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
34505 GEN_INT (1 + 4), GEN_INT (1 + 4));
34508 tem = gen_vec_interleave_highv2df (dest, src, src);
34514 tem = gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, dest),
34515 gen_lowpart (V1TImode, src),
34520 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
34522 tem = gen_avx_shufps256 (dest, src, src,
34523 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
34527 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
34529 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
34536 tem = gen_avx2_permv2ti (gen_lowpart (V4DImode, dest),
34537 gen_lowpart (V4DImode, src),
34538 gen_lowpart (V4DImode, src),
34541 tem = gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, dest),
34542 gen_lowpart (V2TImode, src),
34546 gcc_unreachable ();
34551 /* Expand a vector reduction. FN is the binary pattern to reduce;
34552 DEST is the destination; IN is the input vector. */
34555 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
34557 rtx half, dst, vec = in;
34558 enum machine_mode mode = GET_MODE (in);
34561 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
34563 && mode == V8HImode
34564 && fn == gen_uminv8hi3)
34566 emit_insn (gen_sse4_1_phminposuw (dest, in));
34570 for (i = GET_MODE_BITSIZE (mode);
34571 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
34574 half = gen_reg_rtx (mode);
34575 emit_reduc_half (half, vec, i);
34576 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
34579 dst = gen_reg_rtx (mode);
34580 emit_insn (fn (dst, half, vec));
34585 /* Target hook for scalar_mode_supported_p. */
34587 ix86_scalar_mode_supported_p (enum machine_mode mode)
34589 if (DECIMAL_FLOAT_MODE_P (mode))
34590 return default_decimal_float_supported_p ();
34591 else if (mode == TFmode)
34594 return default_scalar_mode_supported_p (mode);
34597 /* Implements target hook vector_mode_supported_p. */
34599 ix86_vector_mode_supported_p (enum machine_mode mode)
34601 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
34603 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
34605 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
34607 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
34609 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
34614 /* Target hook for c_mode_for_suffix. */
34615 static enum machine_mode
34616 ix86_c_mode_for_suffix (char suffix)
34626 /* Worker function for TARGET_MD_ASM_CLOBBERS.
34628 We do this in the new i386 backend to maintain source compatibility
34629 with the old cc0-based compiler. */
34632 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
34633 tree inputs ATTRIBUTE_UNUSED,
34636 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
34638 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
34643 /* Implements target vector targetm.asm.encode_section_info. */
34645 static void ATTRIBUTE_UNUSED
34646 ix86_encode_section_info (tree decl, rtx rtl, int first)
34648 default_encode_section_info (decl, rtl, first);
34650 if (TREE_CODE (decl) == VAR_DECL
34651 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
34652 && ix86_in_large_data_p (decl))
34653 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
34656 /* Worker function for REVERSE_CONDITION. */
34659 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
34661 return (mode != CCFPmode && mode != CCFPUmode
34662 ? reverse_condition (code)
34663 : reverse_condition_maybe_unordered (code));
34666 /* Output code to perform an x87 FP register move, from OPERANDS[1]
34670 output_387_reg_move (rtx insn, rtx *operands)
34672 if (REG_P (operands[0]))
34674 if (REG_P (operands[1])
34675 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
34677 if (REGNO (operands[0]) == FIRST_STACK_REG)
34678 return output_387_ffreep (operands, 0);
34679 return "fstp\t%y0";
34681 if (STACK_TOP_P (operands[0]))
34682 return "fld%Z1\t%y1";
34685 else if (MEM_P (operands[0]))
34687 gcc_assert (REG_P (operands[1]));
34688 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
34689 return "fstp%Z0\t%y0";
34692 /* There is no non-popping store to memory for XFmode.
34693 So if we need one, follow the store with a load. */
34694 if (GET_MODE (operands[0]) == XFmode)
34695 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
34697 return "fst%Z0\t%y0";
34704 /* Output code to perform a conditional jump to LABEL, if C2 flag in
34705 FP status register is set. */
34708 ix86_emit_fp_unordered_jump (rtx label)
34710 rtx reg = gen_reg_rtx (HImode);
34713 emit_insn (gen_x86_fnstsw_1 (reg));
34715 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
34717 emit_insn (gen_x86_sahf_1 (reg));
34719 temp = gen_rtx_REG (CCmode, FLAGS_REG);
34720 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
34724 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
34726 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
34727 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
34730 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
34731 gen_rtx_LABEL_REF (VOIDmode, label),
34733 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
34735 emit_jump_insn (temp);
34736 predict_jump (REG_BR_PROB_BASE * 10 / 100);
34739 /* Output code to perform a log1p XFmode calculation. */
34741 void ix86_emit_i387_log1p (rtx op0, rtx op1)
34743 rtx label1 = gen_label_rtx ();
34744 rtx label2 = gen_label_rtx ();
34746 rtx tmp = gen_reg_rtx (XFmode);
34747 rtx tmp2 = gen_reg_rtx (XFmode);
34750 emit_insn (gen_absxf2 (tmp, op1));
34751 test = gen_rtx_GE (VOIDmode, tmp,
34752 CONST_DOUBLE_FROM_REAL_VALUE (
34753 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
34755 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
34757 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
34758 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
34759 emit_jump (label2);
34761 emit_label (label1);
34762 emit_move_insn (tmp, CONST1_RTX (XFmode));
34763 emit_insn (gen_addxf3 (tmp, op1, tmp));
34764 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
34765 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
34767 emit_label (label2);
34770 /* Emit code for round calculation. */
34771 void ix86_emit_i387_round (rtx op0, rtx op1)
34773 enum machine_mode inmode = GET_MODE (op1);
34774 enum machine_mode outmode = GET_MODE (op0);
34775 rtx e1, e2, res, tmp, tmp1, half;
34776 rtx scratch = gen_reg_rtx (HImode);
34777 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
34778 rtx jump_label = gen_label_rtx ();
34780 rtx (*gen_abs) (rtx, rtx);
34781 rtx (*gen_neg) (rtx, rtx);
34786 gen_abs = gen_abssf2;
34789 gen_abs = gen_absdf2;
34792 gen_abs = gen_absxf2;
34795 gcc_unreachable ();
34801 gen_neg = gen_negsf2;
34804 gen_neg = gen_negdf2;
34807 gen_neg = gen_negxf2;
34810 gen_neg = gen_neghi2;
34813 gen_neg = gen_negsi2;
34816 gen_neg = gen_negdi2;
34819 gcc_unreachable ();
34822 e1 = gen_reg_rtx (inmode);
34823 e2 = gen_reg_rtx (inmode);
34824 res = gen_reg_rtx (outmode);
34826 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
34828 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
34830 /* scratch = fxam(op1) */
34831 emit_insn (gen_rtx_SET (VOIDmode, scratch,
34832 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
34834 /* e1 = fabs(op1) */
34835 emit_insn (gen_abs (e1, op1));
34837 /* e2 = e1 + 0.5 */
34838 half = force_reg (inmode, half);
34839 emit_insn (gen_rtx_SET (VOIDmode, e2,
34840 gen_rtx_PLUS (inmode, e1, half)));
34842 /* res = floor(e2) */
34843 if (inmode != XFmode)
34845 tmp1 = gen_reg_rtx (XFmode);
34847 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
34848 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
34858 rtx tmp0 = gen_reg_rtx (XFmode);
34860 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
34862 emit_insn (gen_rtx_SET (VOIDmode, res,
34863 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
34864 UNSPEC_TRUNC_NOOP)));
34868 emit_insn (gen_frndintxf2_floor (res, tmp1));
34871 emit_insn (gen_lfloorxfhi2 (res, tmp1));
34874 emit_insn (gen_lfloorxfsi2 (res, tmp1));
34877 emit_insn (gen_lfloorxfdi2 (res, tmp1));
34880 gcc_unreachable ();
34883 /* flags = signbit(a) */
34884 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
34886 /* if (flags) then res = -res */
34887 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
34888 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
34889 gen_rtx_LABEL_REF (VOIDmode, jump_label),
34891 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
34892 predict_jump (REG_BR_PROB_BASE * 50 / 100);
34893 JUMP_LABEL (insn) = jump_label;
34895 emit_insn (gen_neg (res, res));
34897 emit_label (jump_label);
34898 LABEL_NUSES (jump_label) = 1;
34900 emit_move_insn (op0, res);
34903 /* Output code to perform a Newton-Rhapson approximation of a single precision
34904 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
34906 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
34908 rtx x0, x1, e0, e1;
34910 x0 = gen_reg_rtx (mode);
34911 e0 = gen_reg_rtx (mode);
34912 e1 = gen_reg_rtx (mode);
34913 x1 = gen_reg_rtx (mode);
34915 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
34917 b = force_reg (mode, b);
34919 /* x0 = rcp(b) estimate */
34920 emit_insn (gen_rtx_SET (VOIDmode, x0,
34921 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
34924 emit_insn (gen_rtx_SET (VOIDmode, e0,
34925 gen_rtx_MULT (mode, x0, b)));
34928 emit_insn (gen_rtx_SET (VOIDmode, e0,
34929 gen_rtx_MULT (mode, x0, e0)));
34932 emit_insn (gen_rtx_SET (VOIDmode, e1,
34933 gen_rtx_PLUS (mode, x0, x0)));
34936 emit_insn (gen_rtx_SET (VOIDmode, x1,
34937 gen_rtx_MINUS (mode, e1, e0)));
34940 emit_insn (gen_rtx_SET (VOIDmode, res,
34941 gen_rtx_MULT (mode, a, x1)));
34944 /* Output code to perform a Newton-Rhapson approximation of a
34945 single precision floating point [reciprocal] square root. */
34947 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
34950 rtx x0, e0, e1, e2, e3, mthree, mhalf;
34953 x0 = gen_reg_rtx (mode);
34954 e0 = gen_reg_rtx (mode);
34955 e1 = gen_reg_rtx (mode);
34956 e2 = gen_reg_rtx (mode);
34957 e3 = gen_reg_rtx (mode);
34959 real_from_integer (&r, VOIDmode, -3, -1, 0);
34960 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
34962 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
34963 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
34965 if (VECTOR_MODE_P (mode))
34967 mthree = ix86_build_const_vector (mode, true, mthree);
34968 mhalf = ix86_build_const_vector (mode, true, mhalf);
34971 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
34972 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
34974 a = force_reg (mode, a);
34976 /* x0 = rsqrt(a) estimate */
34977 emit_insn (gen_rtx_SET (VOIDmode, x0,
34978 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
34981 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
34986 zero = gen_reg_rtx (mode);
34987 mask = gen_reg_rtx (mode);
34989 zero = force_reg (mode, CONST0_RTX(mode));
34990 emit_insn (gen_rtx_SET (VOIDmode, mask,
34991 gen_rtx_NE (mode, zero, a)));
34993 emit_insn (gen_rtx_SET (VOIDmode, x0,
34994 gen_rtx_AND (mode, x0, mask)));
34998 emit_insn (gen_rtx_SET (VOIDmode, e0,
34999 gen_rtx_MULT (mode, x0, a)));
35001 emit_insn (gen_rtx_SET (VOIDmode, e1,
35002 gen_rtx_MULT (mode, e0, x0)));
35005 mthree = force_reg (mode, mthree);
35006 emit_insn (gen_rtx_SET (VOIDmode, e2,
35007 gen_rtx_PLUS (mode, e1, mthree)));
35009 mhalf = force_reg (mode, mhalf);
35011 /* e3 = -.5 * x0 */
35012 emit_insn (gen_rtx_SET (VOIDmode, e3,
35013 gen_rtx_MULT (mode, x0, mhalf)));
35015 /* e3 = -.5 * e0 */
35016 emit_insn (gen_rtx_SET (VOIDmode, e3,
35017 gen_rtx_MULT (mode, e0, mhalf)));
35018 /* ret = e2 * e3 */
35019 emit_insn (gen_rtx_SET (VOIDmode, res,
35020 gen_rtx_MULT (mode, e2, e3)));
35023 #ifdef TARGET_SOLARIS
35024 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
35027 i386_solaris_elf_named_section (const char *name, unsigned int flags,
35030 /* With Binutils 2.15, the "@unwind" marker must be specified on
35031 every occurrence of the ".eh_frame" section, not just the first
35034 && strcmp (name, ".eh_frame") == 0)
35036 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
35037 flags & SECTION_WRITE ? "aw" : "a");
35042 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
35044 solaris_elf_asm_comdat_section (name, flags, decl);
35049 default_elf_asm_named_section (name, flags, decl);
35051 #endif /* TARGET_SOLARIS */
35053 /* Return the mangling of TYPE if it is an extended fundamental type. */
35055 static const char *
35056 ix86_mangle_type (const_tree type)
35058 type = TYPE_MAIN_VARIANT (type);
35060 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
35061 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
35064 switch (TYPE_MODE (type))
35067 /* __float128 is "g". */
35070 /* "long double" or __float80 is "e". */
35077 /* For 32-bit code we can save PIC register setup by using
35078 __stack_chk_fail_local hidden function instead of calling
35079 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
35080 register, so it is better to call __stack_chk_fail directly. */
35082 static tree ATTRIBUTE_UNUSED
35083 ix86_stack_protect_fail (void)
35085 return TARGET_64BIT
35086 ? default_external_stack_protect_fail ()
35087 : default_hidden_stack_protect_fail ();
35090 /* Select a format to encode pointers in exception handling data. CODE
35091 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
35092 true if the symbol may be affected by dynamic relocations.
35094 ??? All x86 object file formats are capable of representing this.
35095 After all, the relocation needed is the same as for the call insn.
35096 Whether or not a particular assembler allows us to enter such, I
35097 guess we'll have to see. */
35099 asm_preferred_eh_data_format (int code, int global)
35103 int type = DW_EH_PE_sdata8;
35105 || ix86_cmodel == CM_SMALL_PIC
35106 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
35107 type = DW_EH_PE_sdata4;
35108 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
35110 if (ix86_cmodel == CM_SMALL
35111 || (ix86_cmodel == CM_MEDIUM && code))
35112 return DW_EH_PE_udata4;
35113 return DW_EH_PE_absptr;
35116 /* Expand copysign from SIGN to the positive value ABS_VALUE
35117 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
35120 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
35122 enum machine_mode mode = GET_MODE (sign);
35123 rtx sgn = gen_reg_rtx (mode);
35124 if (mask == NULL_RTX)
35126 enum machine_mode vmode;
35128 if (mode == SFmode)
35130 else if (mode == DFmode)
35135 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
35136 if (!VECTOR_MODE_P (mode))
35138 /* We need to generate a scalar mode mask in this case. */
35139 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
35140 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
35141 mask = gen_reg_rtx (mode);
35142 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
35146 mask = gen_rtx_NOT (mode, mask);
35147 emit_insn (gen_rtx_SET (VOIDmode, sgn,
35148 gen_rtx_AND (mode, mask, sign)));
35149 emit_insn (gen_rtx_SET (VOIDmode, result,
35150 gen_rtx_IOR (mode, abs_value, sgn)));
35153 /* Expand fabs (OP0) and return a new rtx that holds the result. The
35154 mask for masking out the sign-bit is stored in *SMASK, if that is
35157 ix86_expand_sse_fabs (rtx op0, rtx *smask)
35159 enum machine_mode vmode, mode = GET_MODE (op0);
35162 xa = gen_reg_rtx (mode);
35163 if (mode == SFmode)
35165 else if (mode == DFmode)
35169 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
35170 if (!VECTOR_MODE_P (mode))
35172 /* We need to generate a scalar mode mask in this case. */
35173 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
35174 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
35175 mask = gen_reg_rtx (mode);
35176 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
35178 emit_insn (gen_rtx_SET (VOIDmode, xa,
35179 gen_rtx_AND (mode, op0, mask)));
35187 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
35188 swapping the operands if SWAP_OPERANDS is true. The expanded
35189 code is a forward jump to a newly created label in case the
35190 comparison is true. The generated label rtx is returned. */
35192 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
35193 bool swap_operands)
35204 label = gen_label_rtx ();
35205 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
35206 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35207 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
35208 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
35209 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
35210 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
35211 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
35212 JUMP_LABEL (tmp) = label;
35217 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
35218 using comparison code CODE. Operands are swapped for the comparison if
35219 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
35221 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
35222 bool swap_operands)
35224 rtx (*insn)(rtx, rtx, rtx, rtx);
35225 enum machine_mode mode = GET_MODE (op0);
35226 rtx mask = gen_reg_rtx (mode);
35235 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
35237 emit_insn (insn (mask, op0, op1,
35238 gen_rtx_fmt_ee (code, mode, op0, op1)));
35242 /* Generate and return a rtx of mode MODE for 2**n where n is the number
35243 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
35245 ix86_gen_TWO52 (enum machine_mode mode)
35247 REAL_VALUE_TYPE TWO52r;
35250 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
35251 TWO52 = const_double_from_real_value (TWO52r, mode);
35252 TWO52 = force_reg (mode, TWO52);
35257 /* Expand SSE sequence for computing lround from OP1 storing
35260 ix86_expand_lround (rtx op0, rtx op1)
35262 /* C code for the stuff we're doing below:
35263 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
35266 enum machine_mode mode = GET_MODE (op1);
35267 const struct real_format *fmt;
35268 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
35271 /* load nextafter (0.5, 0.0) */
35272 fmt = REAL_MODE_FORMAT (mode);
35273 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
35274 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
35276 /* adj = copysign (0.5, op1) */
35277 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
35278 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
35280 /* adj = op1 + adj */
35281 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
35283 /* op0 = (imode)adj */
35284 expand_fix (op0, adj, 0);
35287 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
35290 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
35292 /* C code for the stuff we're doing below (for do_floor):
35294 xi -= (double)xi > op1 ? 1 : 0;
35297 enum machine_mode fmode = GET_MODE (op1);
35298 enum machine_mode imode = GET_MODE (op0);
35299 rtx ireg, freg, label, tmp;
35301 /* reg = (long)op1 */
35302 ireg = gen_reg_rtx (imode);
35303 expand_fix (ireg, op1, 0);
35305 /* freg = (double)reg */
35306 freg = gen_reg_rtx (fmode);
35307 expand_float (freg, ireg, 0);
35309 /* ireg = (freg > op1) ? ireg - 1 : ireg */
35310 label = ix86_expand_sse_compare_and_jump (UNLE,
35311 freg, op1, !do_floor);
35312 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
35313 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
35314 emit_move_insn (ireg, tmp);
35316 emit_label (label);
35317 LABEL_NUSES (label) = 1;
35319 emit_move_insn (op0, ireg);
35322 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
35323 result in OPERAND0. */
35325 ix86_expand_rint (rtx operand0, rtx operand1)
35327 /* C code for the stuff we're doing below:
35328 xa = fabs (operand1);
35329 if (!isless (xa, 2**52))
35331 xa = xa + 2**52 - 2**52;
35332 return copysign (xa, operand1);
35334 enum machine_mode mode = GET_MODE (operand0);
35335 rtx res, xa, label, TWO52, mask;
35337 res = gen_reg_rtx (mode);
35338 emit_move_insn (res, operand1);
35340 /* xa = abs (operand1) */
35341 xa = ix86_expand_sse_fabs (res, &mask);
35343 /* if (!isless (xa, TWO52)) goto label; */
35344 TWO52 = ix86_gen_TWO52 (mode);
35345 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35347 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35348 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
35350 ix86_sse_copysign_to_positive (res, xa, res, mask);
35352 emit_label (label);
35353 LABEL_NUSES (label) = 1;
35355 emit_move_insn (operand0, res);
35358 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35361 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
35363 /* C code for the stuff we expand below.
35364 double xa = fabs (x), x2;
35365 if (!isless (xa, TWO52))
35367 xa = xa + TWO52 - TWO52;
35368 x2 = copysign (xa, x);
35377 enum machine_mode mode = GET_MODE (operand0);
35378 rtx xa, TWO52, tmp, label, one, res, mask;
35380 TWO52 = ix86_gen_TWO52 (mode);
35382 /* Temporary for holding the result, initialized to the input
35383 operand to ease control flow. */
35384 res = gen_reg_rtx (mode);
35385 emit_move_insn (res, operand1);
35387 /* xa = abs (operand1) */
35388 xa = ix86_expand_sse_fabs (res, &mask);
35390 /* if (!isless (xa, TWO52)) goto label; */
35391 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35393 /* xa = xa + TWO52 - TWO52; */
35394 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35395 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
35397 /* xa = copysign (xa, operand1) */
35398 ix86_sse_copysign_to_positive (xa, xa, res, mask);
35400 /* generate 1.0 or -1.0 */
35401 one = force_reg (mode,
35402 const_double_from_real_value (do_floor
35403 ? dconst1 : dconstm1, mode));
35405 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35406 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
35407 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35408 gen_rtx_AND (mode, one, tmp)));
35409 /* We always need to subtract here to preserve signed zero. */
35410 tmp = expand_simple_binop (mode, MINUS,
35411 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35412 emit_move_insn (res, tmp);
35414 emit_label (label);
35415 LABEL_NUSES (label) = 1;
35417 emit_move_insn (operand0, res);
35420 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35423 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
35425 /* C code for the stuff we expand below.
35426 double xa = fabs (x), x2;
35427 if (!isless (xa, TWO52))
35429 x2 = (double)(long)x;
35436 if (HONOR_SIGNED_ZEROS (mode))
35437 return copysign (x2, x);
35440 enum machine_mode mode = GET_MODE (operand0);
35441 rtx xa, xi, TWO52, tmp, label, one, res, mask;
35443 TWO52 = ix86_gen_TWO52 (mode);
35445 /* Temporary for holding the result, initialized to the input
35446 operand to ease control flow. */
35447 res = gen_reg_rtx (mode);
35448 emit_move_insn (res, operand1);
35450 /* xa = abs (operand1) */
35451 xa = ix86_expand_sse_fabs (res, &mask);
35453 /* if (!isless (xa, TWO52)) goto label; */
35454 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35456 /* xa = (double)(long)x */
35457 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35458 expand_fix (xi, res, 0);
35459 expand_float (xa, xi, 0);
35462 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
35464 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35465 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
35466 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35467 gen_rtx_AND (mode, one, tmp)));
35468 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
35469 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35470 emit_move_insn (res, tmp);
35472 if (HONOR_SIGNED_ZEROS (mode))
35473 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
35475 emit_label (label);
35476 LABEL_NUSES (label) = 1;
35478 emit_move_insn (operand0, res);
35481 /* Expand SSE sequence for computing round from OPERAND1 storing
35482 into OPERAND0. Sequence that works without relying on DImode truncation
35483 via cvttsd2siq that is only available on 64bit targets. */
35485 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
35487 /* C code for the stuff we expand below.
35488 double xa = fabs (x), xa2, x2;
35489 if (!isless (xa, TWO52))
35491 Using the absolute value and copying back sign makes
35492 -0.0 -> -0.0 correct.
35493 xa2 = xa + TWO52 - TWO52;
35498 else if (dxa > 0.5)
35500 x2 = copysign (xa2, x);
35503 enum machine_mode mode = GET_MODE (operand0);
35504 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
35506 TWO52 = ix86_gen_TWO52 (mode);
35508 /* Temporary for holding the result, initialized to the input
35509 operand to ease control flow. */
35510 res = gen_reg_rtx (mode);
35511 emit_move_insn (res, operand1);
35513 /* xa = abs (operand1) */
35514 xa = ix86_expand_sse_fabs (res, &mask);
35516 /* if (!isless (xa, TWO52)) goto label; */
35517 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35519 /* xa2 = xa + TWO52 - TWO52; */
35520 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35521 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
35523 /* dxa = xa2 - xa; */
35524 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
35526 /* generate 0.5, 1.0 and -0.5 */
35527 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
35528 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
35529 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
35533 tmp = gen_reg_rtx (mode);
35534 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
35535 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
35536 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35537 gen_rtx_AND (mode, one, tmp)));
35538 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35539 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
35540 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
35541 emit_insn (gen_rtx_SET (VOIDmode, tmp,
35542 gen_rtx_AND (mode, one, tmp)));
35543 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
35545 /* res = copysign (xa2, operand1) */
35546 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
35548 emit_label (label);
35549 LABEL_NUSES (label) = 1;
35551 emit_move_insn (operand0, res);
35554 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35557 ix86_expand_trunc (rtx operand0, rtx operand1)
35559 /* C code for SSE variant we expand below.
35560 double xa = fabs (x), x2;
35561 if (!isless (xa, TWO52))
35563 x2 = (double)(long)x;
35564 if (HONOR_SIGNED_ZEROS (mode))
35565 return copysign (x2, x);
35568 enum machine_mode mode = GET_MODE (operand0);
35569 rtx xa, xi, TWO52, label, res, mask;
35571 TWO52 = ix86_gen_TWO52 (mode);
35573 /* Temporary for holding the result, initialized to the input
35574 operand to ease control flow. */
35575 res = gen_reg_rtx (mode);
35576 emit_move_insn (res, operand1);
35578 /* xa = abs (operand1) */
35579 xa = ix86_expand_sse_fabs (res, &mask);
35581 /* if (!isless (xa, TWO52)) goto label; */
35582 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35584 /* x = (double)(long)x */
35585 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35586 expand_fix (xi, res, 0);
35587 expand_float (res, xi, 0);
35589 if (HONOR_SIGNED_ZEROS (mode))
35590 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
35592 emit_label (label);
35593 LABEL_NUSES (label) = 1;
35595 emit_move_insn (operand0, res);
35598 /* Expand SSE sequence for computing trunc from OPERAND1 storing
35601 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
35603 enum machine_mode mode = GET_MODE (operand0);
35604 rtx xa, mask, TWO52, label, one, res, smask, tmp;
35606 /* C code for SSE variant we expand below.
35607 double xa = fabs (x), x2;
35608 if (!isless (xa, TWO52))
35610 xa2 = xa + TWO52 - TWO52;
35614 x2 = copysign (xa2, x);
35618 TWO52 = ix86_gen_TWO52 (mode);
35620 /* Temporary for holding the result, initialized to the input
35621 operand to ease control flow. */
35622 res = gen_reg_rtx (mode);
35623 emit_move_insn (res, operand1);
35625 /* xa = abs (operand1) */
35626 xa = ix86_expand_sse_fabs (res, &smask);
35628 /* if (!isless (xa, TWO52)) goto label; */
35629 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35631 /* res = xa + TWO52 - TWO52; */
35632 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
35633 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
35634 emit_move_insn (res, tmp);
35637 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
35639 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
35640 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
35641 emit_insn (gen_rtx_SET (VOIDmode, mask,
35642 gen_rtx_AND (mode, mask, one)));
35643 tmp = expand_simple_binop (mode, MINUS,
35644 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
35645 emit_move_insn (res, tmp);
35647 /* res = copysign (res, operand1) */
35648 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
35650 emit_label (label);
35651 LABEL_NUSES (label) = 1;
35653 emit_move_insn (operand0, res);
35656 /* Expand SSE sequence for computing round from OPERAND1 storing
35659 ix86_expand_round (rtx operand0, rtx operand1)
35661 /* C code for the stuff we're doing below:
35662 double xa = fabs (x);
35663 if (!isless (xa, TWO52))
35665 xa = (double)(long)(xa + nextafter (0.5, 0.0));
35666 return copysign (xa, x);
35668 enum machine_mode mode = GET_MODE (operand0);
35669 rtx res, TWO52, xa, label, xi, half, mask;
35670 const struct real_format *fmt;
35671 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
35673 /* Temporary for holding the result, initialized to the input
35674 operand to ease control flow. */
35675 res = gen_reg_rtx (mode);
35676 emit_move_insn (res, operand1);
35678 TWO52 = ix86_gen_TWO52 (mode);
35679 xa = ix86_expand_sse_fabs (res, &mask);
35680 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
35682 /* load nextafter (0.5, 0.0) */
35683 fmt = REAL_MODE_FORMAT (mode);
35684 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
35685 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
35687 /* xa = xa + 0.5 */
35688 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
35689 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
35691 /* xa = (double)(int64_t)xa */
35692 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
35693 expand_fix (xi, xa, 0);
35694 expand_float (xa, xi, 0);
35696 /* res = copysign (xa, operand1) */
35697 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
35699 emit_label (label);
35700 LABEL_NUSES (label) = 1;
35702 emit_move_insn (operand0, res);
35705 /* Expand SSE sequence for computing round
35706 from OP1 storing into OP0 using sse4 round insn. */
35708 ix86_expand_round_sse4 (rtx op0, rtx op1)
35710 enum machine_mode mode = GET_MODE (op0);
35711 rtx e1, e2, res, half;
35712 const struct real_format *fmt;
35713 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
35714 rtx (*gen_copysign) (rtx, rtx, rtx);
35715 rtx (*gen_round) (rtx, rtx, rtx);
35720 gen_copysign = gen_copysignsf3;
35721 gen_round = gen_sse4_1_roundsf2;
35724 gen_copysign = gen_copysigndf3;
35725 gen_round = gen_sse4_1_rounddf2;
35728 gcc_unreachable ();
35731 /* round (a) = trunc (a + copysign (0.5, a)) */
35733 /* load nextafter (0.5, 0.0) */
35734 fmt = REAL_MODE_FORMAT (mode);
35735 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
35736 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
35737 half = const_double_from_real_value (pred_half, mode);
35739 /* e1 = copysign (0.5, op1) */
35740 e1 = gen_reg_rtx (mode);
35741 emit_insn (gen_copysign (e1, half, op1));
35743 /* e2 = op1 + e1 */
35744 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
35746 /* res = trunc (e2) */
35747 res = gen_reg_rtx (mode);
35748 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
35750 emit_move_insn (op0, res);
35754 /* Table of valid machine attributes. */
35755 static const struct attribute_spec ix86_attribute_table[] =
35757 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
35758 affects_type_identity } */
35759 /* Stdcall attribute says callee is responsible for popping arguments
35760 if they are not variable. */
35761 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35763 /* Fastcall attribute says callee is responsible for popping arguments
35764 if they are not variable. */
35765 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35767 /* Thiscall attribute says callee is responsible for popping arguments
35768 if they are not variable. */
35769 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35771 /* Cdecl attribute says the callee is a normal C declaration */
35772 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35774 /* Regparm attribute specifies how many integer arguments are to be
35775 passed in registers. */
35776 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
35778 /* Sseregparm attribute says we are using x86_64 calling conventions
35779 for FP arguments. */
35780 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
35782 /* The transactional memory builtins are implicitly regparm or fastcall
35783 depending on the ABI. Override the generic do-nothing attribute that
35784 these builtins were declared with. */
35785 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
35787 /* force_align_arg_pointer says this function realigns the stack at entry. */
35788 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
35789 false, true, true, ix86_handle_cconv_attribute, false },
35790 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35791 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
35792 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
35793 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
35796 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
35798 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
35800 #ifdef SUBTARGET_ATTRIBUTE_TABLE
35801 SUBTARGET_ATTRIBUTE_TABLE,
35803 /* ms_abi and sysv_abi calling convention function attributes. */
35804 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
35805 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
35806 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
35808 { "callee_pop_aggregate_return", 1, 1, false, true, true,
35809 ix86_handle_callee_pop_aggregate_return, true },
35811 { NULL, 0, 0, false, false, false, NULL, false }
35814 /* Implement targetm.vectorize.builtin_vectorization_cost. */
35816 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
35817 tree vectype ATTRIBUTE_UNUSED,
35818 int misalign ATTRIBUTE_UNUSED)
35820 switch (type_of_cost)
35823 return ix86_cost->scalar_stmt_cost;
35826 return ix86_cost->scalar_load_cost;
35829 return ix86_cost->scalar_store_cost;
35832 return ix86_cost->vec_stmt_cost;
35835 return ix86_cost->vec_align_load_cost;
35838 return ix86_cost->vec_store_cost;
35840 case vec_to_scalar:
35841 return ix86_cost->vec_to_scalar_cost;
35843 case scalar_to_vec:
35844 return ix86_cost->scalar_to_vec_cost;
35846 case unaligned_load:
35847 case unaligned_store:
35848 return ix86_cost->vec_unalign_load_cost;
35850 case cond_branch_taken:
35851 return ix86_cost->cond_taken_branch_cost;
35853 case cond_branch_not_taken:
35854 return ix86_cost->cond_not_taken_branch_cost;
35860 gcc_unreachable ();
35865 /* Return a vector mode with twice as many elements as VMODE. */
35866 /* ??? Consider moving this to a table generated by genmodes.c. */
35868 static enum machine_mode
35869 doublesize_vector_mode (enum machine_mode vmode)
35873 case V2SFmode: return V4SFmode;
35874 case V1DImode: return V2DImode;
35875 case V2SImode: return V4SImode;
35876 case V4HImode: return V8HImode;
35877 case V8QImode: return V16QImode;
35879 case V2DFmode: return V4DFmode;
35880 case V4SFmode: return V8SFmode;
35881 case V2DImode: return V4DImode;
35882 case V4SImode: return V8SImode;
35883 case V8HImode: return V16HImode;
35884 case V16QImode: return V32QImode;
35886 case V4DFmode: return V8DFmode;
35887 case V8SFmode: return V16SFmode;
35888 case V4DImode: return V8DImode;
35889 case V8SImode: return V16SImode;
35890 case V16HImode: return V32HImode;
35891 case V32QImode: return V64QImode;
35894 gcc_unreachable ();
35898 /* Construct (set target (vec_select op0 (parallel perm))) and
35899 return true if that's a valid instruction in the active ISA. */
35902 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
35904 rtx rperm[MAX_VECT_LEN], x;
35907 for (i = 0; i < nelt; ++i)
35908 rperm[i] = GEN_INT (perm[i]);
35910 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
35911 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
35912 x = gen_rtx_SET (VOIDmode, target, x);
35915 if (recog_memoized (x) < 0)
35923 /* Similar, but generate a vec_concat from op0 and op1 as well. */
35926 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
35927 const unsigned char *perm, unsigned nelt)
35929 enum machine_mode v2mode;
35932 v2mode = doublesize_vector_mode (GET_MODE (op0));
35933 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
35934 return expand_vselect (target, x, perm, nelt);
35937 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35938 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
35941 expand_vec_perm_blend (struct expand_vec_perm_d *d)
35943 enum machine_mode vmode = d->vmode;
35944 unsigned i, mask, nelt = d->nelt;
35945 rtx target, op0, op1, x;
35946 rtx rperm[32], vperm;
35948 if (d->op0 == d->op1)
35950 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
35952 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
35954 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
35959 /* This is a blend, not a permute. Elements must stay in their
35960 respective lanes. */
35961 for (i = 0; i < nelt; ++i)
35963 unsigned e = d->perm[i];
35964 if (!(e == i || e == i + nelt))
35971 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
35972 decision should be extracted elsewhere, so that we only try that
35973 sequence once all budget==3 options have been tried. */
35974 target = d->target;
35987 for (i = 0; i < nelt; ++i)
35988 mask |= (d->perm[i] >= nelt) << i;
35992 for (i = 0; i < 2; ++i)
35993 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
35998 for (i = 0; i < 4; ++i)
35999 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
36004 /* See if bytes move in pairs so we can use pblendw with
36005 an immediate argument, rather than pblendvb with a vector
36007 for (i = 0; i < 16; i += 2)
36008 if (d->perm[i] + 1 != d->perm[i + 1])
36011 for (i = 0; i < nelt; ++i)
36012 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
36015 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
36016 vperm = force_reg (vmode, vperm);
36018 if (GET_MODE_SIZE (vmode) == 16)
36019 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
36021 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
36025 for (i = 0; i < 8; ++i)
36026 mask |= (d->perm[i * 2] >= 16) << i;
36031 target = gen_lowpart (vmode, target);
36032 op0 = gen_lowpart (vmode, op0);
36033 op1 = gen_lowpart (vmode, op1);
36037 /* See if bytes move in pairs. If not, vpblendvb must be used. */
36038 for (i = 0; i < 32; i += 2)
36039 if (d->perm[i] + 1 != d->perm[i + 1])
36041 /* See if bytes move in quadruplets. If yes, vpblendd
36042 with immediate can be used. */
36043 for (i = 0; i < 32; i += 4)
36044 if (d->perm[i] + 2 != d->perm[i + 2])
36048 /* See if bytes move the same in both lanes. If yes,
36049 vpblendw with immediate can be used. */
36050 for (i = 0; i < 16; i += 2)
36051 if (d->perm[i] + 16 != d->perm[i + 16])
36054 /* Use vpblendw. */
36055 for (i = 0; i < 16; ++i)
36056 mask |= (d->perm[i * 2] >= 32) << i;
36061 /* Use vpblendd. */
36062 for (i = 0; i < 8; ++i)
36063 mask |= (d->perm[i * 4] >= 32) << i;
36068 /* See if words move in pairs. If yes, vpblendd can be used. */
36069 for (i = 0; i < 16; i += 2)
36070 if (d->perm[i] + 1 != d->perm[i + 1])
36074 /* See if words move the same in both lanes. If not,
36075 vpblendvb must be used. */
36076 for (i = 0; i < 8; i++)
36077 if (d->perm[i] + 8 != d->perm[i + 8])
36079 /* Use vpblendvb. */
36080 for (i = 0; i < 32; ++i)
36081 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
36085 target = gen_lowpart (vmode, target);
36086 op0 = gen_lowpart (vmode, op0);
36087 op1 = gen_lowpart (vmode, op1);
36088 goto finish_pblendvb;
36091 /* Use vpblendw. */
36092 for (i = 0; i < 16; ++i)
36093 mask |= (d->perm[i] >= 16) << i;
36097 /* Use vpblendd. */
36098 for (i = 0; i < 8; ++i)
36099 mask |= (d->perm[i * 2] >= 16) << i;
36104 /* Use vpblendd. */
36105 for (i = 0; i < 4; ++i)
36106 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
36111 gcc_unreachable ();
36114 /* This matches five different patterns with the different modes. */
36115 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
36116 x = gen_rtx_SET (VOIDmode, target, x);
36122 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36123 in terms of the variable form of vpermilps.
36125 Note that we will have already failed the immediate input vpermilps,
36126 which requires that the high and low part shuffle be identical; the
36127 variable form doesn't require that. */
36130 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
36132 rtx rperm[8], vperm;
36135 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
36138 /* We can only permute within the 128-bit lane. */
36139 for (i = 0; i < 8; ++i)
36141 unsigned e = d->perm[i];
36142 if (i < 4 ? e >= 4 : e < 4)
36149 for (i = 0; i < 8; ++i)
36151 unsigned e = d->perm[i];
36153 /* Within each 128-bit lane, the elements of op0 are numbered
36154 from 0 and the elements of op1 are numbered from 4. */
36160 rperm[i] = GEN_INT (e);
36163 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
36164 vperm = force_reg (V8SImode, vperm);
36165 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
36170 /* Return true if permutation D can be performed as VMODE permutation
36174 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
36176 unsigned int i, j, chunk;
36178 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
36179 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
36180 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
36183 if (GET_MODE_NUNITS (vmode) >= d->nelt)
36186 chunk = d->nelt / GET_MODE_NUNITS (vmode);
36187 for (i = 0; i < d->nelt; i += chunk)
36188 if (d->perm[i] & (chunk - 1))
36191 for (j = 1; j < chunk; ++j)
36192 if (d->perm[i] + j != d->perm[i + j])
36198 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36199 in terms of pshufb, vpperm, vpermq, vpermd or vperm2i128. */
36202 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
36204 unsigned i, nelt, eltsz, mask;
36205 unsigned char perm[32];
36206 enum machine_mode vmode = V16QImode;
36207 rtx rperm[32], vperm, target, op0, op1;
36211 if (d->op0 != d->op1)
36213 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
36216 && valid_perm_using_mode_p (V2TImode, d))
36221 /* Use vperm2i128 insn. The pattern uses
36222 V4DImode instead of V2TImode. */
36223 target = gen_lowpart (V4DImode, d->target);
36224 op0 = gen_lowpart (V4DImode, d->op0);
36225 op1 = gen_lowpart (V4DImode, d->op1);
36227 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
36228 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
36229 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
36237 if (GET_MODE_SIZE (d->vmode) == 16)
36242 else if (GET_MODE_SIZE (d->vmode) == 32)
36247 /* V4DImode should be already handled through
36248 expand_vselect by vpermq instruction. */
36249 gcc_assert (d->vmode != V4DImode);
36252 if (d->vmode == V8SImode
36253 || d->vmode == V16HImode
36254 || d->vmode == V32QImode)
36256 /* First see if vpermq can be used for
36257 V8SImode/V16HImode/V32QImode. */
36258 if (valid_perm_using_mode_p (V4DImode, d))
36260 for (i = 0; i < 4; i++)
36261 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
36264 return expand_vselect (gen_lowpart (V4DImode, d->target),
36265 gen_lowpart (V4DImode, d->op0),
36269 /* Next see if vpermd can be used. */
36270 if (valid_perm_using_mode_p (V8SImode, d))
36274 if (vmode == V32QImode)
36276 /* vpshufb only works intra lanes, it is not
36277 possible to shuffle bytes in between the lanes. */
36278 for (i = 0; i < nelt; ++i)
36279 if ((d->perm[i] ^ i) & (nelt / 2))
36290 if (vmode == V8SImode)
36291 for (i = 0; i < 8; ++i)
36292 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
36295 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
36296 if (d->op0 != d->op1)
36297 mask = 2 * nelt - 1;
36298 else if (vmode == V16QImode)
36301 mask = nelt / 2 - 1;
36303 for (i = 0; i < nelt; ++i)
36305 unsigned j, e = d->perm[i] & mask;
36306 for (j = 0; j < eltsz; ++j)
36307 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
36311 vperm = gen_rtx_CONST_VECTOR (vmode,
36312 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
36313 vperm = force_reg (vmode, vperm);
36315 target = gen_lowpart (vmode, d->target);
36316 op0 = gen_lowpart (vmode, d->op0);
36317 if (d->op0 == d->op1)
36319 if (vmode == V16QImode)
36320 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
36321 else if (vmode == V32QImode)
36322 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
36324 emit_insn (gen_avx2_permvarv8si (target, vperm, op0));
36328 op1 = gen_lowpart (vmode, d->op1);
36329 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
36335 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
36336 in a single instruction. */
36339 expand_vec_perm_1 (struct expand_vec_perm_d *d)
36341 unsigned i, nelt = d->nelt;
36342 unsigned char perm2[MAX_VECT_LEN];
36344 /* Check plain VEC_SELECT first, because AVX has instructions that could
36345 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
36346 input where SEL+CONCAT may not. */
36347 if (d->op0 == d->op1)
36349 int mask = nelt - 1;
36350 bool identity_perm = true;
36351 bool broadcast_perm = true;
36353 for (i = 0; i < nelt; i++)
36355 perm2[i] = d->perm[i] & mask;
36357 identity_perm = false;
36359 broadcast_perm = false;
36365 emit_move_insn (d->target, d->op0);
36368 else if (broadcast_perm && TARGET_AVX2)
36370 /* Use vpbroadcast{b,w,d}. */
36371 rtx op = d->op0, (*gen) (rtx, rtx) = NULL;
36375 op = gen_lowpart (V16QImode, op);
36376 gen = gen_avx2_pbroadcastv32qi;
36379 op = gen_lowpart (V8HImode, op);
36380 gen = gen_avx2_pbroadcastv16hi;
36383 op = gen_lowpart (V4SImode, op);
36384 gen = gen_avx2_pbroadcastv8si;
36387 gen = gen_avx2_pbroadcastv16qi;
36390 gen = gen_avx2_pbroadcastv8hi;
36392 /* For other modes prefer other shuffles this function creates. */
36398 emit_insn (gen (d->target, op));
36403 if (expand_vselect (d->target, d->op0, perm2, nelt))
36406 /* There are plenty of patterns in sse.md that are written for
36407 SEL+CONCAT and are not replicated for a single op. Perhaps
36408 that should be changed, to avoid the nastiness here. */
36410 /* Recognize interleave style patterns, which means incrementing
36411 every other permutation operand. */
36412 for (i = 0; i < nelt; i += 2)
36414 perm2[i] = d->perm[i] & mask;
36415 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
36417 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
36420 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
36423 for (i = 0; i < nelt; i += 4)
36425 perm2[i + 0] = d->perm[i + 0] & mask;
36426 perm2[i + 1] = d->perm[i + 1] & mask;
36427 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
36428 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
36431 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
36436 /* Finally, try the fully general two operand permute. */
36437 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
36440 /* Recognize interleave style patterns with reversed operands. */
36441 if (d->op0 != d->op1)
36443 for (i = 0; i < nelt; ++i)
36445 unsigned e = d->perm[i];
36453 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
36457 /* Try the SSE4.1 blend variable merge instructions. */
36458 if (expand_vec_perm_blend (d))
36461 /* Try one of the AVX vpermil variable permutations. */
36462 if (expand_vec_perm_vpermil (d))
36465 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
36466 vpshufb, vpermd or vpermq variable permutation. */
36467 if (expand_vec_perm_pshufb (d))
36473 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36474 in terms of a pair of pshuflw + pshufhw instructions. */
36477 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
36479 unsigned char perm2[MAX_VECT_LEN];
36483 if (d->vmode != V8HImode || d->op0 != d->op1)
36486 /* The two permutations only operate in 64-bit lanes. */
36487 for (i = 0; i < 4; ++i)
36488 if (d->perm[i] >= 4)
36490 for (i = 4; i < 8; ++i)
36491 if (d->perm[i] < 4)
36497 /* Emit the pshuflw. */
36498 memcpy (perm2, d->perm, 4);
36499 for (i = 4; i < 8; ++i)
36501 ok = expand_vselect (d->target, d->op0, perm2, 8);
36504 /* Emit the pshufhw. */
36505 memcpy (perm2 + 4, d->perm + 4, 4);
36506 for (i = 0; i < 4; ++i)
36508 ok = expand_vselect (d->target, d->target, perm2, 8);
36514 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36515 the permutation using the SSSE3 palignr instruction. This succeeds
36516 when all of the elements in PERM fit within one vector and we merely
36517 need to shift them down so that a single vector permutation has a
36518 chance to succeed. */
36521 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
36523 unsigned i, nelt = d->nelt;
36528 /* Even with AVX, palignr only operates on 128-bit vectors. */
36529 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
36532 min = nelt, max = 0;
36533 for (i = 0; i < nelt; ++i)
36535 unsigned e = d->perm[i];
36541 if (min == 0 || max - min >= nelt)
36544 /* Given that we have SSSE3, we know we'll be able to implement the
36545 single operand permutation after the palignr with pshufb. */
36549 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
36550 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
36551 gen_lowpart (TImode, d->op1),
36552 gen_lowpart (TImode, d->op0), shift));
36554 d->op0 = d->op1 = d->target;
36557 for (i = 0; i < nelt; ++i)
36559 unsigned e = d->perm[i] - min;
36565 /* Test for the degenerate case where the alignment by itself
36566 produces the desired permutation. */
36570 ok = expand_vec_perm_1 (d);
36576 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36577 a two vector permutation into a single vector permutation by using
36578 an interleave operation to merge the vectors. */
36581 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
36583 struct expand_vec_perm_d dremap, dfinal;
36584 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
36585 unsigned HOST_WIDE_INT contents;
36586 unsigned char remap[2 * MAX_VECT_LEN];
36588 bool ok, same_halves = false;
36590 if (GET_MODE_SIZE (d->vmode) == 16)
36592 if (d->op0 == d->op1)
36595 else if (GET_MODE_SIZE (d->vmode) == 32)
36599 /* For 32-byte modes allow even d->op0 == d->op1.
36600 The lack of cross-lane shuffling in some instructions
36601 might prevent a single insn shuffle. */
36606 /* Examine from whence the elements come. */
36608 for (i = 0; i < nelt; ++i)
36609 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
36611 memset (remap, 0xff, sizeof (remap));
36614 if (GET_MODE_SIZE (d->vmode) == 16)
36616 unsigned HOST_WIDE_INT h1, h2, h3, h4;
36618 /* Split the two input vectors into 4 halves. */
36619 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
36624 /* If the elements from the low halves use interleave low, and similarly
36625 for interleave high. If the elements are from mis-matched halves, we
36626 can use shufps for V4SF/V4SI or do a DImode shuffle. */
36627 if ((contents & (h1 | h3)) == contents)
36630 for (i = 0; i < nelt2; ++i)
36633 remap[i + nelt] = i * 2 + 1;
36634 dremap.perm[i * 2] = i;
36635 dremap.perm[i * 2 + 1] = i + nelt;
36637 if (!TARGET_SSE2 && d->vmode == V4SImode)
36638 dremap.vmode = V4SFmode;
36640 else if ((contents & (h2 | h4)) == contents)
36643 for (i = 0; i < nelt2; ++i)
36645 remap[i + nelt2] = i * 2;
36646 remap[i + nelt + nelt2] = i * 2 + 1;
36647 dremap.perm[i * 2] = i + nelt2;
36648 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
36650 if (!TARGET_SSE2 && d->vmode == V4SImode)
36651 dremap.vmode = V4SFmode;
36653 else if ((contents & (h1 | h4)) == contents)
36656 for (i = 0; i < nelt2; ++i)
36659 remap[i + nelt + nelt2] = i + nelt2;
36660 dremap.perm[i] = i;
36661 dremap.perm[i + nelt2] = i + nelt + nelt2;
36666 dremap.vmode = V2DImode;
36668 dremap.perm[0] = 0;
36669 dremap.perm[1] = 3;
36672 else if ((contents & (h2 | h3)) == contents)
36675 for (i = 0; i < nelt2; ++i)
36677 remap[i + nelt2] = i;
36678 remap[i + nelt] = i + nelt2;
36679 dremap.perm[i] = i + nelt2;
36680 dremap.perm[i + nelt2] = i + nelt;
36685 dremap.vmode = V2DImode;
36687 dremap.perm[0] = 1;
36688 dremap.perm[1] = 2;
36696 unsigned int nelt4 = nelt / 4, nzcnt = 0;
36697 unsigned HOST_WIDE_INT q[8];
36698 unsigned int nonzero_halves[4];
36700 /* Split the two input vectors into 8 quarters. */
36701 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
36702 for (i = 1; i < 8; ++i)
36703 q[i] = q[0] << (nelt4 * i);
36704 for (i = 0; i < 4; ++i)
36705 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
36707 nonzero_halves[nzcnt] = i;
36713 gcc_assert (d->op0 == d->op1);
36714 nonzero_halves[1] = nonzero_halves[0];
36715 same_halves = true;
36717 else if (d->op0 == d->op1)
36719 gcc_assert (nonzero_halves[0] == 0);
36720 gcc_assert (nonzero_halves[1] == 1);
36725 if (d->perm[0] / nelt2 == nonzero_halves[1])
36727 /* Attempt to increase the likelyhood that dfinal
36728 shuffle will be intra-lane. */
36729 char tmph = nonzero_halves[0];
36730 nonzero_halves[0] = nonzero_halves[1];
36731 nonzero_halves[1] = tmph;
36734 /* vperm2f128 or vperm2i128. */
36735 for (i = 0; i < nelt2; ++i)
36737 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
36738 remap[i + nonzero_halves[0] * nelt2] = i;
36739 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
36740 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
36743 if (d->vmode != V8SFmode
36744 && d->vmode != V4DFmode
36745 && d->vmode != V8SImode)
36747 dremap.vmode = V8SImode;
36749 for (i = 0; i < 4; ++i)
36751 dremap.perm[i] = i + nonzero_halves[0] * 4;
36752 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
36756 else if (d->op0 == d->op1)
36758 else if (TARGET_AVX2
36759 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
36762 for (i = 0; i < nelt4; ++i)
36765 remap[i + nelt] = i * 2 + 1;
36766 remap[i + nelt2] = i * 2 + nelt2;
36767 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
36768 dremap.perm[i * 2] = i;
36769 dremap.perm[i * 2 + 1] = i + nelt;
36770 dremap.perm[i * 2 + nelt2] = i + nelt2;
36771 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
36774 else if (TARGET_AVX2
36775 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
36778 for (i = 0; i < nelt4; ++i)
36780 remap[i + nelt4] = i * 2;
36781 remap[i + nelt + nelt4] = i * 2 + 1;
36782 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
36783 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
36784 dremap.perm[i * 2] = i + nelt4;
36785 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
36786 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
36787 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
36794 /* Use the remapping array set up above to move the elements from their
36795 swizzled locations into their final destinations. */
36797 for (i = 0; i < nelt; ++i)
36799 unsigned e = remap[d->perm[i]];
36800 gcc_assert (e < nelt);
36801 /* If same_halves is true, both halves of the remapped vector are the
36802 same. Avoid cross-lane accesses if possible. */
36803 if (same_halves && i >= nelt2)
36805 gcc_assert (e < nelt2);
36806 dfinal.perm[i] = e + nelt2;
36809 dfinal.perm[i] = e;
36811 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
36812 dfinal.op1 = dfinal.op0;
36813 dremap.target = dfinal.op0;
36815 /* Test if the final remap can be done with a single insn. For V4SFmode or
36816 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
36818 ok = expand_vec_perm_1 (&dfinal);
36819 seq = get_insns ();
36828 if (dremap.vmode != dfinal.vmode)
36830 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
36831 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
36832 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
36835 ok = expand_vec_perm_1 (&dremap);
36842 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36843 a single vector cross-lane permutation into vpermq followed
36844 by any of the single insn permutations. */
36847 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
36849 struct expand_vec_perm_d dremap, dfinal;
36850 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
36851 unsigned contents[2];
36855 && (d->vmode == V32QImode || d->vmode == V16HImode)
36856 && d->op0 == d->op1))
36861 for (i = 0; i < nelt2; ++i)
36863 contents[0] |= 1u << (d->perm[i] / nelt4);
36864 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
36867 for (i = 0; i < 2; ++i)
36869 unsigned int cnt = 0;
36870 for (j = 0; j < 4; ++j)
36871 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
36879 dremap.vmode = V4DImode;
36881 dremap.target = gen_reg_rtx (V4DImode);
36882 dremap.op0 = gen_lowpart (V4DImode, d->op0);
36883 dremap.op1 = dremap.op0;
36884 for (i = 0; i < 2; ++i)
36886 unsigned int cnt = 0;
36887 for (j = 0; j < 4; ++j)
36888 if ((contents[i] & (1u << j)) != 0)
36889 dremap.perm[2 * i + cnt++] = j;
36890 for (; cnt < 2; ++cnt)
36891 dremap.perm[2 * i + cnt] = 0;
36895 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
36896 dfinal.op1 = dfinal.op0;
36897 for (i = 0, j = 0; i < nelt; ++i)
36901 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
36902 if ((d->perm[i] / nelt4) == dremap.perm[j])
36904 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
36905 dfinal.perm[i] |= nelt4;
36907 gcc_unreachable ();
36910 ok = expand_vec_perm_1 (&dremap);
36913 ok = expand_vec_perm_1 (&dfinal);
36919 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36920 a two vector permutation using 2 intra-lane interleave insns
36921 and cross-lane shuffle for 32-byte vectors. */
36924 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
36927 rtx (*gen) (rtx, rtx, rtx);
36929 if (d->op0 == d->op1)
36931 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
36933 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
36939 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
36941 for (i = 0; i < nelt; i += 2)
36942 if (d->perm[i] != d->perm[0] + i / 2
36943 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
36953 gen = gen_vec_interleave_highv32qi;
36955 gen = gen_vec_interleave_lowv32qi;
36959 gen = gen_vec_interleave_highv16hi;
36961 gen = gen_vec_interleave_lowv16hi;
36965 gen = gen_vec_interleave_highv8si;
36967 gen = gen_vec_interleave_lowv8si;
36971 gen = gen_vec_interleave_highv4di;
36973 gen = gen_vec_interleave_lowv4di;
36977 gen = gen_vec_interleave_highv8sf;
36979 gen = gen_vec_interleave_lowv8sf;
36983 gen = gen_vec_interleave_highv4df;
36985 gen = gen_vec_interleave_lowv4df;
36988 gcc_unreachable ();
36991 emit_insn (gen (d->target, d->op0, d->op1));
36995 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
36996 permutation with two pshufb insns and an ior. We should have already
36997 failed all two instruction sequences. */
37000 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
37002 rtx rperm[2][16], vperm, l, h, op, m128;
37003 unsigned int i, nelt, eltsz;
37005 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
37007 gcc_assert (d->op0 != d->op1);
37010 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
37012 /* Generate two permutation masks. If the required element is within
37013 the given vector it is shuffled into the proper lane. If the required
37014 element is in the other vector, force a zero into the lane by setting
37015 bit 7 in the permutation mask. */
37016 m128 = GEN_INT (-128);
37017 for (i = 0; i < nelt; ++i)
37019 unsigned j, e = d->perm[i];
37020 unsigned which = (e >= nelt);
37024 for (j = 0; j < eltsz; ++j)
37026 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
37027 rperm[1-which][i*eltsz + j] = m128;
37031 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
37032 vperm = force_reg (V16QImode, vperm);
37034 l = gen_reg_rtx (V16QImode);
37035 op = gen_lowpart (V16QImode, d->op0);
37036 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
37038 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
37039 vperm = force_reg (V16QImode, vperm);
37041 h = gen_reg_rtx (V16QImode);
37042 op = gen_lowpart (V16QImode, d->op1);
37043 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
37045 op = gen_lowpart (V16QImode, d->target);
37046 emit_insn (gen_iorv16qi3 (op, l, h));
37051 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
37052 with two vpshufb insns, vpermq and vpor. We should have already failed
37053 all two or three instruction sequences. */
37056 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
37058 rtx rperm[2][32], vperm, l, h, hp, op, m128;
37059 unsigned int i, nelt, eltsz;
37062 || d->op0 != d->op1
37063 || (d->vmode != V32QImode && d->vmode != V16HImode))
37070 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
37072 /* Generate two permutation masks. If the required element is within
37073 the same lane, it is shuffled in. If the required element from the
37074 other lane, force a zero by setting bit 7 in the permutation mask.
37075 In the other mask the mask has non-negative elements if element
37076 is requested from the other lane, but also moved to the other lane,
37077 so that the result of vpshufb can have the two V2TImode halves
37079 m128 = GEN_INT (-128);
37080 for (i = 0; i < nelt; ++i)
37082 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
37083 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
37085 for (j = 0; j < eltsz; ++j)
37087 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
37088 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
37092 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
37093 vperm = force_reg (V32QImode, vperm);
37095 h = gen_reg_rtx (V32QImode);
37096 op = gen_lowpart (V32QImode, d->op0);
37097 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
37099 /* Swap the 128-byte lanes of h into hp. */
37100 hp = gen_reg_rtx (V4DImode);
37101 op = gen_lowpart (V4DImode, h);
37102 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
37105 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
37106 vperm = force_reg (V32QImode, vperm);
37108 l = gen_reg_rtx (V32QImode);
37109 op = gen_lowpart (V32QImode, d->op0);
37110 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
37112 op = gen_lowpart (V32QImode, d->target);
37113 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
37118 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
37119 and extract-odd permutations of two V32QImode and V16QImode operand
37120 with two vpshufb insns, vpor and vpermq. We should have already
37121 failed all two or three instruction sequences. */
37124 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
37126 rtx rperm[2][32], vperm, l, h, ior, op, m128;
37127 unsigned int i, nelt, eltsz;
37130 || d->op0 == d->op1
37131 || (d->vmode != V32QImode && d->vmode != V16HImode))
37134 for (i = 0; i < d->nelt; ++i)
37135 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
37142 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
37144 /* Generate two permutation masks. In the first permutation mask
37145 the first quarter will contain indexes for the first half
37146 of the op0, the second quarter will contain bit 7 set, third quarter
37147 will contain indexes for the second half of the op0 and the
37148 last quarter bit 7 set. In the second permutation mask
37149 the first quarter will contain bit 7 set, the second quarter
37150 indexes for the first half of the op1, the third quarter bit 7 set
37151 and last quarter indexes for the second half of the op1.
37152 I.e. the first mask e.g. for V32QImode extract even will be:
37153 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
37154 (all values masked with 0xf except for -128) and second mask
37155 for extract even will be
37156 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
37157 m128 = GEN_INT (-128);
37158 for (i = 0; i < nelt; ++i)
37160 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
37161 unsigned which = d->perm[i] >= nelt;
37162 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
37164 for (j = 0; j < eltsz; ++j)
37166 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
37167 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
37171 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
37172 vperm = force_reg (V32QImode, vperm);
37174 l = gen_reg_rtx (V32QImode);
37175 op = gen_lowpart (V32QImode, d->op0);
37176 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
37178 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
37179 vperm = force_reg (V32QImode, vperm);
37181 h = gen_reg_rtx (V32QImode);
37182 op = gen_lowpart (V32QImode, d->op1);
37183 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
37185 ior = gen_reg_rtx (V32QImode);
37186 emit_insn (gen_iorv32qi3 (ior, l, h));
37188 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
37189 op = gen_lowpart (V4DImode, d->target);
37190 ior = gen_lowpart (V4DImode, ior);
37191 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
37192 const1_rtx, GEN_INT (3)));
37197 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
37198 and extract-odd permutations. */
37201 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
37208 t1 = gen_reg_rtx (V4DFmode);
37209 t2 = gen_reg_rtx (V4DFmode);
37211 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37212 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
37213 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
37215 /* Now an unpck[lh]pd will produce the result required. */
37217 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
37219 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
37225 int mask = odd ? 0xdd : 0x88;
37227 t1 = gen_reg_rtx (V8SFmode);
37228 t2 = gen_reg_rtx (V8SFmode);
37229 t3 = gen_reg_rtx (V8SFmode);
37231 /* Shuffle within the 128-bit lanes to produce:
37232 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
37233 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
37236 /* Shuffle the lanes around to produce:
37237 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
37238 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
37241 /* Shuffle within the 128-bit lanes to produce:
37242 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
37243 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
37245 /* Shuffle within the 128-bit lanes to produce:
37246 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
37247 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
37249 /* Shuffle the lanes around to produce:
37250 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
37251 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
37260 /* These are always directly implementable by expand_vec_perm_1. */
37261 gcc_unreachable ();
37265 return expand_vec_perm_pshufb2 (d);
37268 /* We need 2*log2(N)-1 operations to achieve odd/even
37269 with interleave. */
37270 t1 = gen_reg_rtx (V8HImode);
37271 t2 = gen_reg_rtx (V8HImode);
37272 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
37273 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
37274 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
37275 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
37277 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
37279 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
37286 return expand_vec_perm_pshufb2 (d);
37289 t1 = gen_reg_rtx (V16QImode);
37290 t2 = gen_reg_rtx (V16QImode);
37291 t3 = gen_reg_rtx (V16QImode);
37292 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
37293 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
37294 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
37295 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
37296 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
37297 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
37299 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
37301 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
37308 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
37313 struct expand_vec_perm_d d_copy = *d;
37314 d_copy.vmode = V4DFmode;
37315 d_copy.target = gen_lowpart (V4DFmode, d->target);
37316 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
37317 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
37318 return expand_vec_perm_even_odd_1 (&d_copy, odd);
37321 t1 = gen_reg_rtx (V4DImode);
37322 t2 = gen_reg_rtx (V4DImode);
37324 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
37325 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
37326 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
37328 /* Now an vpunpck[lh]qdq will produce the result required. */
37330 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
37332 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
37339 struct expand_vec_perm_d d_copy = *d;
37340 d_copy.vmode = V8SFmode;
37341 d_copy.target = gen_lowpart (V8SFmode, d->target);
37342 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
37343 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
37344 return expand_vec_perm_even_odd_1 (&d_copy, odd);
37347 t1 = gen_reg_rtx (V8SImode);
37348 t2 = gen_reg_rtx (V8SImode);
37350 /* Shuffle the lanes around into
37351 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
37352 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t1),
37353 gen_lowpart (V4DImode, d->op0),
37354 gen_lowpart (V4DImode, d->op1),
37356 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t2),
37357 gen_lowpart (V4DImode, d->op0),
37358 gen_lowpart (V4DImode, d->op1),
37361 /* Swap the 2nd and 3rd position in each lane into
37362 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
37363 emit_insn (gen_avx2_pshufdv3 (t1, t1,
37364 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37365 emit_insn (gen_avx2_pshufdv3 (t2, t2,
37366 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
37368 /* Now an vpunpck[lh]qdq will produce
37369 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
37371 t3 = gen_avx2_interleave_highv4di (gen_lowpart (V4DImode, d->target),
37372 gen_lowpart (V4DImode, t1),
37373 gen_lowpart (V4DImode, t2));
37375 t3 = gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode, d->target),
37376 gen_lowpart (V4DImode, t1),
37377 gen_lowpart (V4DImode, t2));
37382 gcc_unreachable ();
37388 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37389 extract-even and extract-odd permutations. */
37392 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
37394 unsigned i, odd, nelt = d->nelt;
37397 if (odd != 0 && odd != 1)
37400 for (i = 1; i < nelt; ++i)
37401 if (d->perm[i] != 2 * i + odd)
37404 return expand_vec_perm_even_odd_1 (d, odd);
37407 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
37408 permutations. We assume that expand_vec_perm_1 has already failed. */
37411 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
37413 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
37414 enum machine_mode vmode = d->vmode;
37415 unsigned char perm2[4];
37423 /* These are special-cased in sse.md so that we can optionally
37424 use the vbroadcast instruction. They expand to two insns
37425 if the input happens to be in a register. */
37426 gcc_unreachable ();
37432 /* These are always implementable using standard shuffle patterns. */
37433 gcc_unreachable ();
37437 /* These can be implemented via interleave. We save one insn by
37438 stopping once we have promoted to V4SImode and then use pshufd. */
37441 optab otab = vec_interleave_low_optab;
37445 otab = vec_interleave_high_optab;
37450 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
37451 vmode = get_mode_wider_vector (vmode);
37452 op0 = gen_lowpart (vmode, op0);
37454 while (vmode != V4SImode);
37456 memset (perm2, elt, 4);
37457 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
37465 /* For AVX2 broadcasts of the first element vpbroadcast* or
37466 vpermq should be used by expand_vec_perm_1. */
37467 gcc_assert (!TARGET_AVX2 || d->perm[0]);
37471 gcc_unreachable ();
37475 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
37476 broadcast permutations. */
37479 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
37481 unsigned i, elt, nelt = d->nelt;
37483 if (d->op0 != d->op1)
37487 for (i = 1; i < nelt; ++i)
37488 if (d->perm[i] != elt)
37491 return expand_vec_perm_broadcast_1 (d);
37494 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
37495 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
37496 all the shorter instruction sequences. */
37499 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
37501 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
37502 unsigned int i, nelt, eltsz;
37506 || d->op0 == d->op1
37507 || (d->vmode != V32QImode && d->vmode != V16HImode))
37514 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
37516 /* Generate 4 permutation masks. If the required element is within
37517 the same lane, it is shuffled in. If the required element from the
37518 other lane, force a zero by setting bit 7 in the permutation mask.
37519 In the other mask the mask has non-negative elements if element
37520 is requested from the other lane, but also moved to the other lane,
37521 so that the result of vpshufb can have the two V2TImode halves
37523 m128 = GEN_INT (-128);
37524 for (i = 0; i < 32; ++i)
37526 rperm[0][i] = m128;
37527 rperm[1][i] = m128;
37528 rperm[2][i] = m128;
37529 rperm[3][i] = m128;
37535 for (i = 0; i < nelt; ++i)
37537 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
37538 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
37539 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
37541 for (j = 0; j < eltsz; ++j)
37542 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
37543 used[which] = true;
37546 for (i = 0; i < 2; ++i)
37548 if (!used[2 * i + 1])
37553 vperm = gen_rtx_CONST_VECTOR (V32QImode,
37554 gen_rtvec_v (32, rperm[2 * i + 1]));
37555 vperm = force_reg (V32QImode, vperm);
37556 h[i] = gen_reg_rtx (V32QImode);
37557 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
37558 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
37561 /* Swap the 128-byte lanes of h[X]. */
37562 for (i = 0; i < 2; ++i)
37564 if (h[i] == NULL_RTX)
37566 op = gen_reg_rtx (V4DImode);
37567 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
37568 const2_rtx, GEN_INT (3), const0_rtx,
37570 h[i] = gen_lowpart (V32QImode, op);
37573 for (i = 0; i < 2; ++i)
37580 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
37581 vperm = force_reg (V32QImode, vperm);
37582 l[i] = gen_reg_rtx (V32QImode);
37583 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
37584 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
37587 for (i = 0; i < 2; ++i)
37591 op = gen_reg_rtx (V32QImode);
37592 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
37599 gcc_assert (l[0] && l[1]);
37600 op = gen_lowpart (V32QImode, d->target);
37601 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
37605 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
37606 With all of the interface bits taken care of, perform the expansion
37607 in D and return true on success. */
37610 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
37612 /* Try a single instruction expansion. */
37613 if (expand_vec_perm_1 (d))
37616 /* Try sequences of two instructions. */
37618 if (expand_vec_perm_pshuflw_pshufhw (d))
37621 if (expand_vec_perm_palignr (d))
37624 if (expand_vec_perm_interleave2 (d))
37627 if (expand_vec_perm_broadcast (d))
37630 if (expand_vec_perm_vpermq_perm_1 (d))
37633 /* Try sequences of three instructions. */
37635 if (expand_vec_perm_pshufb2 (d))
37638 if (expand_vec_perm_interleave3 (d))
37641 /* Try sequences of four instructions. */
37643 if (expand_vec_perm_vpshufb2_vpermq (d))
37646 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
37649 /* ??? Look for narrow permutations whose element orderings would
37650 allow the promotion to a wider mode. */
37652 /* ??? Look for sequences of interleave or a wider permute that place
37653 the data into the correct lanes for a half-vector shuffle like
37654 pshuf[lh]w or vpermilps. */
37656 /* ??? Look for sequences of interleave that produce the desired results.
37657 The combinatorics of punpck[lh] get pretty ugly... */
37659 if (expand_vec_perm_even_odd (d))
37662 /* Even longer sequences. */
37663 if (expand_vec_perm_vpshufb4_vpermq2 (d))
37670 ix86_expand_vec_perm_const (rtx operands[4])
37672 struct expand_vec_perm_d d;
37673 unsigned char perm[MAX_VECT_LEN];
37674 int i, nelt, which;
37677 d.target = operands[0];
37678 d.op0 = operands[1];
37679 d.op1 = operands[2];
37682 d.vmode = GET_MODE (d.target);
37683 gcc_assert (VECTOR_MODE_P (d.vmode));
37684 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37685 d.testing_p = false;
37687 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
37688 gcc_assert (XVECLEN (sel, 0) == nelt);
37689 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
37691 for (i = which = 0; i < nelt; ++i)
37693 rtx e = XVECEXP (sel, 0, i);
37694 int ei = INTVAL (e) & (2 * nelt - 1);
37696 which |= (ei < nelt ? 1 : 2);
37707 if (!rtx_equal_p (d.op0, d.op1))
37710 /* The elements of PERM do not suggest that only the first operand
37711 is used, but both operands are identical. Allow easier matching
37712 of the permutation by folding the permutation into the single
37714 for (i = 0; i < nelt; ++i)
37715 if (d.perm[i] >= nelt)
37724 for (i = 0; i < nelt; ++i)
37730 if (ix86_expand_vec_perm_const_1 (&d))
37733 /* If the mask says both arguments are needed, but they are the same,
37734 the above tried to expand with d.op0 == d.op1. If that didn't work,
37735 retry with d.op0 != d.op1 as that is what testing has been done with. */
37736 if (which == 3 && d.op0 == d.op1)
37741 memcpy (d.perm, perm, sizeof (perm));
37742 d.op1 = gen_reg_rtx (d.vmode);
37744 ok = ix86_expand_vec_perm_const_1 (&d);
37745 seq = get_insns ();
37749 emit_move_insn (d.op1, d.op0);
37758 /* Implement targetm.vectorize.vec_perm_const_ok. */
37761 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
37762 const unsigned char *sel)
37764 struct expand_vec_perm_d d;
37765 unsigned int i, nelt, which;
37769 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37770 d.testing_p = true;
37772 /* Given sufficient ISA support we can just return true here
37773 for selected vector modes. */
37774 if (GET_MODE_SIZE (d.vmode) == 16)
37776 /* All implementable with a single vpperm insn. */
37779 /* All implementable with 2 pshufb + 1 ior. */
37782 /* All implementable with shufpd or unpck[lh]pd. */
37787 /* Extract the values from the vector CST into the permutation
37789 memcpy (d.perm, sel, nelt);
37790 for (i = which = 0; i < nelt; ++i)
37792 unsigned char e = d.perm[i];
37793 gcc_assert (e < 2 * nelt);
37794 which |= (e < nelt ? 1 : 2);
37797 /* For all elements from second vector, fold the elements to first. */
37799 for (i = 0; i < nelt; ++i)
37802 /* Check whether the mask can be applied to the vector type. */
37803 one_vec = (which != 3);
37805 /* Implementable with shufps or pshufd. */
37806 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
37809 /* Otherwise we have to go through the motions and see if we can
37810 figure out how to generate the requested permutation. */
37811 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
37812 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
37814 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
37817 ret = ix86_expand_vec_perm_const_1 (&d);
37824 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
37826 struct expand_vec_perm_d d;
37832 d.vmode = GET_MODE (targ);
37833 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
37834 d.testing_p = false;
37836 for (i = 0; i < nelt; ++i)
37837 d.perm[i] = i * 2 + odd;
37839 /* We'll either be able to implement the permutation directly... */
37840 if (expand_vec_perm_1 (&d))
37843 /* ... or we use the special-case patterns. */
37844 expand_vec_perm_even_odd_1 (&d, odd);
37847 /* Expand an insert into a vector register through pinsr insn.
37848 Return true if successful. */
37851 ix86_expand_pinsr (rtx *operands)
37853 rtx dst = operands[0];
37854 rtx src = operands[3];
37856 unsigned int size = INTVAL (operands[1]);
37857 unsigned int pos = INTVAL (operands[2]);
37859 if (GET_CODE (dst) == SUBREG)
37861 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
37862 dst = SUBREG_REG (dst);
37865 if (GET_CODE (src) == SUBREG)
37866 src = SUBREG_REG (src);
37868 switch (GET_MODE (dst))
37875 enum machine_mode srcmode, dstmode;
37876 rtx (*pinsr)(rtx, rtx, rtx, rtx);
37878 srcmode = mode_for_size (size, MODE_INT, 0);
37883 if (!TARGET_SSE4_1)
37885 dstmode = V16QImode;
37886 pinsr = gen_sse4_1_pinsrb;
37892 dstmode = V8HImode;
37893 pinsr = gen_sse2_pinsrw;
37897 if (!TARGET_SSE4_1)
37899 dstmode = V4SImode;
37900 pinsr = gen_sse4_1_pinsrd;
37904 gcc_assert (TARGET_64BIT);
37905 if (!TARGET_SSE4_1)
37907 dstmode = V2DImode;
37908 pinsr = gen_sse4_1_pinsrq;
37915 dst = gen_lowpart (dstmode, dst);
37916 src = gen_lowpart (srcmode, src);
37920 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
37929 /* This function returns the calling abi specific va_list type node.
37930 It returns the FNDECL specific va_list type. */
37933 ix86_fn_abi_va_list (tree fndecl)
37936 return va_list_type_node;
37937 gcc_assert (fndecl != NULL_TREE);
37939 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
37940 return ms_va_list_type_node;
37942 return sysv_va_list_type_node;
37945 /* Returns the canonical va_list type specified by TYPE. If there
37946 is no valid TYPE provided, it return NULL_TREE. */
37949 ix86_canonical_va_list_type (tree type)
37953 /* Resolve references and pointers to va_list type. */
37954 if (TREE_CODE (type) == MEM_REF)
37955 type = TREE_TYPE (type);
37956 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
37957 type = TREE_TYPE (type);
37958 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
37959 type = TREE_TYPE (type);
37961 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
37963 wtype = va_list_type_node;
37964 gcc_assert (wtype != NULL_TREE);
37966 if (TREE_CODE (wtype) == ARRAY_TYPE)
37968 /* If va_list is an array type, the argument may have decayed
37969 to a pointer type, e.g. by being passed to another function.
37970 In that case, unwrap both types so that we can compare the
37971 underlying records. */
37972 if (TREE_CODE (htype) == ARRAY_TYPE
37973 || POINTER_TYPE_P (htype))
37975 wtype = TREE_TYPE (wtype);
37976 htype = TREE_TYPE (htype);
37979 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37980 return va_list_type_node;
37981 wtype = sysv_va_list_type_node;
37982 gcc_assert (wtype != NULL_TREE);
37984 if (TREE_CODE (wtype) == ARRAY_TYPE)
37986 /* If va_list is an array type, the argument may have decayed
37987 to a pointer type, e.g. by being passed to another function.
37988 In that case, unwrap both types so that we can compare the
37989 underlying records. */
37990 if (TREE_CODE (htype) == ARRAY_TYPE
37991 || POINTER_TYPE_P (htype))
37993 wtype = TREE_TYPE (wtype);
37994 htype = TREE_TYPE (htype);
37997 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
37998 return sysv_va_list_type_node;
37999 wtype = ms_va_list_type_node;
38000 gcc_assert (wtype != NULL_TREE);
38002 if (TREE_CODE (wtype) == ARRAY_TYPE)
38004 /* If va_list is an array type, the argument may have decayed
38005 to a pointer type, e.g. by being passed to another function.
38006 In that case, unwrap both types so that we can compare the
38007 underlying records. */
38008 if (TREE_CODE (htype) == ARRAY_TYPE
38009 || POINTER_TYPE_P (htype))
38011 wtype = TREE_TYPE (wtype);
38012 htype = TREE_TYPE (htype);
38015 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
38016 return ms_va_list_type_node;
38019 return std_canonical_va_list_type (type);
38022 /* Iterate through the target-specific builtin types for va_list.
38023 IDX denotes the iterator, *PTREE is set to the result type of
38024 the va_list builtin, and *PNAME to its internal type.
38025 Returns zero if there is no element for this index, otherwise
38026 IDX should be increased upon the next call.
38027 Note, do not iterate a base builtin's name like __builtin_va_list.
38028 Used from c_common_nodes_and_builtins. */
38031 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
38041 *ptree = ms_va_list_type_node;
38042 *pname = "__builtin_ms_va_list";
38046 *ptree = sysv_va_list_type_node;
38047 *pname = "__builtin_sysv_va_list";
38055 #undef TARGET_SCHED_DISPATCH
38056 #define TARGET_SCHED_DISPATCH has_dispatch
38057 #undef TARGET_SCHED_DISPATCH_DO
38058 #define TARGET_SCHED_DISPATCH_DO do_dispatch
38059 #undef TARGET_SCHED_REASSOCIATION_WIDTH
38060 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
38062 /* The size of the dispatch window is the total number of bytes of
38063 object code allowed in a window. */
38064 #define DISPATCH_WINDOW_SIZE 16
38066 /* Number of dispatch windows considered for scheduling. */
38067 #define MAX_DISPATCH_WINDOWS 3
38069 /* Maximum number of instructions in a window. */
38072 /* Maximum number of immediate operands in a window. */
38075 /* Maximum number of immediate bits allowed in a window. */
38076 #define MAX_IMM_SIZE 128
38078 /* Maximum number of 32 bit immediates allowed in a window. */
38079 #define MAX_IMM_32 4
38081 /* Maximum number of 64 bit immediates allowed in a window. */
38082 #define MAX_IMM_64 2
38084 /* Maximum total of loads or prefetches allowed in a window. */
38087 /* Maximum total of stores allowed in a window. */
38088 #define MAX_STORE 1
38094 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
38095 enum dispatch_group {
38110 /* Number of allowable groups in a dispatch window. It is an array
38111 indexed by dispatch_group enum. 100 is used as a big number,
38112 because the number of these kind of operations does not have any
38113 effect in dispatch window, but we need them for other reasons in
38115 static unsigned int num_allowable_groups[disp_last] = {
38116 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
38119 char group_name[disp_last + 1][16] = {
38120 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
38121 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
38122 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
38125 /* Instruction path. */
38128 path_single, /* Single micro op. */
38129 path_double, /* Double micro op. */
38130 path_multi, /* Instructions with more than 2 micro op.. */
38134 /* sched_insn_info defines a window to the instructions scheduled in
38135 the basic block. It contains a pointer to the insn_info table and
38136 the instruction scheduled.
38138 Windows are allocated for each basic block and are linked
38140 typedef struct sched_insn_info_s {
38142 enum dispatch_group group;
38143 enum insn_path path;
38148 /* Linked list of dispatch windows. This is a two way list of
38149 dispatch windows of a basic block. It contains information about
38150 the number of uops in the window and the total number of
38151 instructions and of bytes in the object code for this dispatch
38153 typedef struct dispatch_windows_s {
38154 int num_insn; /* Number of insn in the window. */
38155 int num_uops; /* Number of uops in the window. */
38156 int window_size; /* Number of bytes in the window. */
38157 int window_num; /* Window number between 0 or 1. */
38158 int num_imm; /* Number of immediates in an insn. */
38159 int num_imm_32; /* Number of 32 bit immediates in an insn. */
38160 int num_imm_64; /* Number of 64 bit immediates in an insn. */
38161 int imm_size; /* Total immediates in the window. */
38162 int num_loads; /* Total memory loads in the window. */
38163 int num_stores; /* Total memory stores in the window. */
38164 int violation; /* Violation exists in window. */
38165 sched_insn_info *window; /* Pointer to the window. */
38166 struct dispatch_windows_s *next;
38167 struct dispatch_windows_s *prev;
38168 } dispatch_windows;
38170 /* Immediate valuse used in an insn. */
38171 typedef struct imm_info_s
38178 static dispatch_windows *dispatch_window_list;
38179 static dispatch_windows *dispatch_window_list1;
38181 /* Get dispatch group of insn. */
38183 static enum dispatch_group
38184 get_mem_group (rtx insn)
38186 enum attr_memory memory;
38188 if (INSN_CODE (insn) < 0)
38189 return disp_no_group;
38190 memory = get_attr_memory (insn);
38191 if (memory == MEMORY_STORE)
38194 if (memory == MEMORY_LOAD)
38197 if (memory == MEMORY_BOTH)
38198 return disp_load_store;
38200 return disp_no_group;
38203 /* Return true if insn is a compare instruction. */
38208 enum attr_type type;
38210 type = get_attr_type (insn);
38211 return (type == TYPE_TEST
38212 || type == TYPE_ICMP
38213 || type == TYPE_FCMP
38214 || GET_CODE (PATTERN (insn)) == COMPARE);
38217 /* Return true if a dispatch violation encountered. */
38220 dispatch_violation (void)
38222 if (dispatch_window_list->next)
38223 return dispatch_window_list->next->violation;
38224 return dispatch_window_list->violation;
38227 /* Return true if insn is a branch instruction. */
38230 is_branch (rtx insn)
38232 return (CALL_P (insn) || JUMP_P (insn));
38235 /* Return true if insn is a prefetch instruction. */
38238 is_prefetch (rtx insn)
38240 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
38243 /* This function initializes a dispatch window and the list container holding a
38244 pointer to the window. */
38247 init_window (int window_num)
38250 dispatch_windows *new_list;
38252 if (window_num == 0)
38253 new_list = dispatch_window_list;
38255 new_list = dispatch_window_list1;
38257 new_list->num_insn = 0;
38258 new_list->num_uops = 0;
38259 new_list->window_size = 0;
38260 new_list->next = NULL;
38261 new_list->prev = NULL;
38262 new_list->window_num = window_num;
38263 new_list->num_imm = 0;
38264 new_list->num_imm_32 = 0;
38265 new_list->num_imm_64 = 0;
38266 new_list->imm_size = 0;
38267 new_list->num_loads = 0;
38268 new_list->num_stores = 0;
38269 new_list->violation = false;
38271 for (i = 0; i < MAX_INSN; i++)
38273 new_list->window[i].insn = NULL;
38274 new_list->window[i].group = disp_no_group;
38275 new_list->window[i].path = no_path;
38276 new_list->window[i].byte_len = 0;
38277 new_list->window[i].imm_bytes = 0;
38282 /* This function allocates and initializes a dispatch window and the
38283 list container holding a pointer to the window. */
38285 static dispatch_windows *
38286 allocate_window (void)
38288 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
38289 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
38294 /* This routine initializes the dispatch scheduling information. It
38295 initiates building dispatch scheduler tables and constructs the
38296 first dispatch window. */
38299 init_dispatch_sched (void)
38301 /* Allocate a dispatch list and a window. */
38302 dispatch_window_list = allocate_window ();
38303 dispatch_window_list1 = allocate_window ();
38308 /* This function returns true if a branch is detected. End of a basic block
38309 does not have to be a branch, but here we assume only branches end a
38313 is_end_basic_block (enum dispatch_group group)
38315 return group == disp_branch;
38318 /* This function is called when the end of a window processing is reached. */
38321 process_end_window (void)
38323 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
38324 if (dispatch_window_list->next)
38326 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
38327 gcc_assert (dispatch_window_list->window_size
38328 + dispatch_window_list1->window_size <= 48);
38334 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
38335 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
38336 for 48 bytes of instructions. Note that these windows are not dispatch
38337 windows that their sizes are DISPATCH_WINDOW_SIZE. */
38339 static dispatch_windows *
38340 allocate_next_window (int window_num)
38342 if (window_num == 0)
38344 if (dispatch_window_list->next)
38347 return dispatch_window_list;
38350 dispatch_window_list->next = dispatch_window_list1;
38351 dispatch_window_list1->prev = dispatch_window_list;
38353 return dispatch_window_list1;
38356 /* Increment the number of immediate operands of an instruction. */
38359 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
38364 switch ( GET_CODE (*in_rtx))
38369 (imm_values->imm)++;
38370 if (x86_64_immediate_operand (*in_rtx, SImode))
38371 (imm_values->imm32)++;
38373 (imm_values->imm64)++;
38377 (imm_values->imm)++;
38378 (imm_values->imm64)++;
38382 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
38384 (imm_values->imm)++;
38385 (imm_values->imm32)++;
38396 /* Compute number of immediate operands of an instruction. */
38399 find_constant (rtx in_rtx, imm_info *imm_values)
38401 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
38402 (rtx_function) find_constant_1, (void *) imm_values);
38405 /* Return total size of immediate operands of an instruction along with number
38406 of corresponding immediate-operands. It initializes its parameters to zero
38407 befor calling FIND_CONSTANT.
38408 INSN is the input instruction. IMM is the total of immediates.
38409 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
38413 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
38415 imm_info imm_values = {0, 0, 0};
38417 find_constant (insn, &imm_values);
38418 *imm = imm_values.imm;
38419 *imm32 = imm_values.imm32;
38420 *imm64 = imm_values.imm64;
38421 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
38424 /* This function indicates if an operand of an instruction is an
38428 has_immediate (rtx insn)
38430 int num_imm_operand;
38431 int num_imm32_operand;
38432 int num_imm64_operand;
38435 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38436 &num_imm64_operand);
38440 /* Return single or double path for instructions. */
38442 static enum insn_path
38443 get_insn_path (rtx insn)
38445 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
38447 if ((int)path == 0)
38448 return path_single;
38450 if ((int)path == 1)
38451 return path_double;
38456 /* Return insn dispatch group. */
38458 static enum dispatch_group
38459 get_insn_group (rtx insn)
38461 enum dispatch_group group = get_mem_group (insn);
38465 if (is_branch (insn))
38466 return disp_branch;
38471 if (has_immediate (insn))
38474 if (is_prefetch (insn))
38475 return disp_prefetch;
38477 return disp_no_group;
38480 /* Count number of GROUP restricted instructions in a dispatch
38481 window WINDOW_LIST. */
38484 count_num_restricted (rtx insn, dispatch_windows *window_list)
38486 enum dispatch_group group = get_insn_group (insn);
38488 int num_imm_operand;
38489 int num_imm32_operand;
38490 int num_imm64_operand;
38492 if (group == disp_no_group)
38495 if (group == disp_imm)
38497 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38498 &num_imm64_operand);
38499 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
38500 || num_imm_operand + window_list->num_imm > MAX_IMM
38501 || (num_imm32_operand > 0
38502 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
38503 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
38504 || (num_imm64_operand > 0
38505 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
38506 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
38507 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
38508 && num_imm64_operand > 0
38509 && ((window_list->num_imm_64 > 0
38510 && window_list->num_insn >= 2)
38511 || window_list->num_insn >= 3)))
38517 if ((group == disp_load_store
38518 && (window_list->num_loads >= MAX_LOAD
38519 || window_list->num_stores >= MAX_STORE))
38520 || ((group == disp_load
38521 || group == disp_prefetch)
38522 && window_list->num_loads >= MAX_LOAD)
38523 || (group == disp_store
38524 && window_list->num_stores >= MAX_STORE))
38530 /* This function returns true if insn satisfies dispatch rules on the
38531 last window scheduled. */
38534 fits_dispatch_window (rtx insn)
38536 dispatch_windows *window_list = dispatch_window_list;
38537 dispatch_windows *window_list_next = dispatch_window_list->next;
38538 unsigned int num_restrict;
38539 enum dispatch_group group = get_insn_group (insn);
38540 enum insn_path path = get_insn_path (insn);
38543 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
38544 instructions should be given the lowest priority in the
38545 scheduling process in Haifa scheduler to make sure they will be
38546 scheduled in the same dispatch window as the refrence to them. */
38547 if (group == disp_jcc || group == disp_cmp)
38550 /* Check nonrestricted. */
38551 if (group == disp_no_group || group == disp_branch)
38554 /* Get last dispatch window. */
38555 if (window_list_next)
38556 window_list = window_list_next;
38558 if (window_list->window_num == 1)
38560 sum = window_list->prev->window_size + window_list->window_size;
38563 || (min_insn_size (insn) + sum) >= 48)
38564 /* Window 1 is full. Go for next window. */
38568 num_restrict = count_num_restricted (insn, window_list);
38570 if (num_restrict > num_allowable_groups[group])
38573 /* See if it fits in the first window. */
38574 if (window_list->window_num == 0)
38576 /* The first widow should have only single and double path
38578 if (path == path_double
38579 && (window_list->num_uops + 2) > MAX_INSN)
38581 else if (path != path_single)
38587 /* Add an instruction INSN with NUM_UOPS micro-operations to the
38588 dispatch window WINDOW_LIST. */
38591 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
38593 int byte_len = min_insn_size (insn);
38594 int num_insn = window_list->num_insn;
38596 sched_insn_info *window = window_list->window;
38597 enum dispatch_group group = get_insn_group (insn);
38598 enum insn_path path = get_insn_path (insn);
38599 int num_imm_operand;
38600 int num_imm32_operand;
38601 int num_imm64_operand;
38603 if (!window_list->violation && group != disp_cmp
38604 && !fits_dispatch_window (insn))
38605 window_list->violation = true;
38607 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38608 &num_imm64_operand);
38610 /* Initialize window with new instruction. */
38611 window[num_insn].insn = insn;
38612 window[num_insn].byte_len = byte_len;
38613 window[num_insn].group = group;
38614 window[num_insn].path = path;
38615 window[num_insn].imm_bytes = imm_size;
38617 window_list->window_size += byte_len;
38618 window_list->num_insn = num_insn + 1;
38619 window_list->num_uops = window_list->num_uops + num_uops;
38620 window_list->imm_size += imm_size;
38621 window_list->num_imm += num_imm_operand;
38622 window_list->num_imm_32 += num_imm32_operand;
38623 window_list->num_imm_64 += num_imm64_operand;
38625 if (group == disp_store)
38626 window_list->num_stores += 1;
38627 else if (group == disp_load
38628 || group == disp_prefetch)
38629 window_list->num_loads += 1;
38630 else if (group == disp_load_store)
38632 window_list->num_stores += 1;
38633 window_list->num_loads += 1;
38637 /* Adds a scheduled instruction, INSN, to the current dispatch window.
38638 If the total bytes of instructions or the number of instructions in
38639 the window exceed allowable, it allocates a new window. */
38642 add_to_dispatch_window (rtx insn)
38645 dispatch_windows *window_list;
38646 dispatch_windows *next_list;
38647 dispatch_windows *window0_list;
38648 enum insn_path path;
38649 enum dispatch_group insn_group;
38657 if (INSN_CODE (insn) < 0)
38660 byte_len = min_insn_size (insn);
38661 window_list = dispatch_window_list;
38662 next_list = window_list->next;
38663 path = get_insn_path (insn);
38664 insn_group = get_insn_group (insn);
38666 /* Get the last dispatch window. */
38668 window_list = dispatch_window_list->next;
38670 if (path == path_single)
38672 else if (path == path_double)
38675 insn_num_uops = (int) path;
38677 /* If current window is full, get a new window.
38678 Window number zero is full, if MAX_INSN uops are scheduled in it.
38679 Window number one is full, if window zero's bytes plus window
38680 one's bytes is 32, or if the bytes of the new instruction added
38681 to the total makes it greater than 48, or it has already MAX_INSN
38682 instructions in it. */
38683 num_insn = window_list->num_insn;
38684 num_uops = window_list->num_uops;
38685 window_num = window_list->window_num;
38686 insn_fits = fits_dispatch_window (insn);
38688 if (num_insn >= MAX_INSN
38689 || num_uops + insn_num_uops > MAX_INSN
38692 window_num = ~window_num & 1;
38693 window_list = allocate_next_window (window_num);
38696 if (window_num == 0)
38698 add_insn_window (insn, window_list, insn_num_uops);
38699 if (window_list->num_insn >= MAX_INSN
38700 && insn_group == disp_branch)
38702 process_end_window ();
38706 else if (window_num == 1)
38708 window0_list = window_list->prev;
38709 sum = window0_list->window_size + window_list->window_size;
38711 || (byte_len + sum) >= 48)
38713 process_end_window ();
38714 window_list = dispatch_window_list;
38717 add_insn_window (insn, window_list, insn_num_uops);
38720 gcc_unreachable ();
38722 if (is_end_basic_block (insn_group))
38724 /* End of basic block is reached do end-basic-block process. */
38725 process_end_window ();
38730 /* Print the dispatch window, WINDOW_NUM, to FILE. */
38732 DEBUG_FUNCTION static void
38733 debug_dispatch_window_file (FILE *file, int window_num)
38735 dispatch_windows *list;
38738 if (window_num == 0)
38739 list = dispatch_window_list;
38741 list = dispatch_window_list1;
38743 fprintf (file, "Window #%d:\n", list->window_num);
38744 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
38745 list->num_insn, list->num_uops, list->window_size);
38746 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38747 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
38749 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
38751 fprintf (file, " insn info:\n");
38753 for (i = 0; i < MAX_INSN; i++)
38755 if (!list->window[i].insn)
38757 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
38758 i, group_name[list->window[i].group],
38759 i, (void *)list->window[i].insn,
38760 i, list->window[i].path,
38761 i, list->window[i].byte_len,
38762 i, list->window[i].imm_bytes);
38766 /* Print to stdout a dispatch window. */
38768 DEBUG_FUNCTION void
38769 debug_dispatch_window (int window_num)
38771 debug_dispatch_window_file (stdout, window_num);
38774 /* Print INSN dispatch information to FILE. */
38776 DEBUG_FUNCTION static void
38777 debug_insn_dispatch_info_file (FILE *file, rtx insn)
38780 enum insn_path path;
38781 enum dispatch_group group;
38783 int num_imm_operand;
38784 int num_imm32_operand;
38785 int num_imm64_operand;
38787 if (INSN_CODE (insn) < 0)
38790 byte_len = min_insn_size (insn);
38791 path = get_insn_path (insn);
38792 group = get_insn_group (insn);
38793 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
38794 &num_imm64_operand);
38796 fprintf (file, " insn info:\n");
38797 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
38798 group_name[group], path, byte_len);
38799 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38800 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
38803 /* Print to STDERR the status of the ready list with respect to
38804 dispatch windows. */
38806 DEBUG_FUNCTION void
38807 debug_ready_dispatch (void)
38810 int no_ready = number_in_ready ();
38812 fprintf (stdout, "Number of ready: %d\n", no_ready);
38814 for (i = 0; i < no_ready; i++)
38815 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
38818 /* This routine is the driver of the dispatch scheduler. */
38821 do_dispatch (rtx insn, int mode)
38823 if (mode == DISPATCH_INIT)
38824 init_dispatch_sched ();
38825 else if (mode == ADD_TO_DISPATCH_WINDOW)
38826 add_to_dispatch_window (insn);
38829 /* Return TRUE if Dispatch Scheduling is supported. */
38832 has_dispatch (rtx insn, int action)
38834 if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
38835 && flag_dispatch_scheduler)
38841 case IS_DISPATCH_ON:
38846 return is_cmp (insn);
38848 case DISPATCH_VIOLATION:
38849 return dispatch_violation ();
38851 case FITS_DISPATCH_WINDOW:
38852 return fits_dispatch_window (insn);
38858 /* Implementation of reassociation_width target hook used by
38859 reassoc phase to identify parallelism level in reassociated
38860 tree. Statements tree_code is passed in OPC. Arguments type
38863 Currently parallel reassociation is enabled for Atom
38864 processors only and we set reassociation width to be 2
38865 because Atom may issue up to 2 instructions per cycle.
38867 Return value should be fixed if parallel reassociation is
38868 enabled for other processors. */
38871 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
38872 enum machine_mode mode)
38876 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
38878 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
38884 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
38885 place emms and femms instructions. */
38887 static enum machine_mode
38888 ix86_preferred_simd_mode (enum machine_mode mode)
38896 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
38898 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
38900 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
38902 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
38905 if (TARGET_AVX && !TARGET_PREFER_AVX128)
38911 if (!TARGET_VECTORIZE_DOUBLE)
38913 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
38915 else if (TARGET_SSE2)
38924 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
38927 static unsigned int
38928 ix86_autovectorize_vector_sizes (void)
38930 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
38933 /* Initialize the GCC target structure. */
38934 #undef TARGET_RETURN_IN_MEMORY
38935 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
38937 #undef TARGET_LEGITIMIZE_ADDRESS
38938 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
38940 #undef TARGET_ATTRIBUTE_TABLE
38941 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
38942 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38943 # undef TARGET_MERGE_DECL_ATTRIBUTES
38944 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
38947 #undef TARGET_COMP_TYPE_ATTRIBUTES
38948 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
38950 #undef TARGET_INIT_BUILTINS
38951 #define TARGET_INIT_BUILTINS ix86_init_builtins
38952 #undef TARGET_BUILTIN_DECL
38953 #define TARGET_BUILTIN_DECL ix86_builtin_decl
38954 #undef TARGET_EXPAND_BUILTIN
38955 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
38957 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
38958 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
38959 ix86_builtin_vectorized_function
38961 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
38962 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
38964 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
38965 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
38967 #undef TARGET_VECTORIZE_BUILTIN_GATHER
38968 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
38970 #undef TARGET_BUILTIN_RECIPROCAL
38971 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
38973 #undef TARGET_ASM_FUNCTION_EPILOGUE
38974 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
38976 #undef TARGET_ENCODE_SECTION_INFO
38977 #ifndef SUBTARGET_ENCODE_SECTION_INFO
38978 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
38980 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
38983 #undef TARGET_ASM_OPEN_PAREN
38984 #define TARGET_ASM_OPEN_PAREN ""
38985 #undef TARGET_ASM_CLOSE_PAREN
38986 #define TARGET_ASM_CLOSE_PAREN ""
38988 #undef TARGET_ASM_BYTE_OP
38989 #define TARGET_ASM_BYTE_OP ASM_BYTE
38991 #undef TARGET_ASM_ALIGNED_HI_OP
38992 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
38993 #undef TARGET_ASM_ALIGNED_SI_OP
38994 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
38996 #undef TARGET_ASM_ALIGNED_DI_OP
38997 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
39000 #undef TARGET_PROFILE_BEFORE_PROLOGUE
39001 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
39003 #undef TARGET_ASM_UNALIGNED_HI_OP
39004 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
39005 #undef TARGET_ASM_UNALIGNED_SI_OP
39006 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
39007 #undef TARGET_ASM_UNALIGNED_DI_OP
39008 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
39010 #undef TARGET_PRINT_OPERAND
39011 #define TARGET_PRINT_OPERAND ix86_print_operand
39012 #undef TARGET_PRINT_OPERAND_ADDRESS
39013 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
39014 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
39015 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
39016 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
39017 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
39019 #undef TARGET_SCHED_INIT_GLOBAL
39020 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
39021 #undef TARGET_SCHED_ADJUST_COST
39022 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
39023 #undef TARGET_SCHED_ISSUE_RATE
39024 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
39025 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
39026 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
39027 ia32_multipass_dfa_lookahead
39029 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
39030 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
39033 #undef TARGET_HAVE_TLS
39034 #define TARGET_HAVE_TLS true
39036 #undef TARGET_CANNOT_FORCE_CONST_MEM
39037 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
39038 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
39039 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
39041 #undef TARGET_DELEGITIMIZE_ADDRESS
39042 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
39044 #undef TARGET_MS_BITFIELD_LAYOUT_P
39045 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
39048 #undef TARGET_BINDS_LOCAL_P
39049 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
39051 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
39052 #undef TARGET_BINDS_LOCAL_P
39053 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
39056 #undef TARGET_ASM_OUTPUT_MI_THUNK
39057 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
39058 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
39059 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
39061 #undef TARGET_ASM_FILE_START
39062 #define TARGET_ASM_FILE_START x86_file_start
39064 #undef TARGET_OPTION_OVERRIDE
39065 #define TARGET_OPTION_OVERRIDE ix86_option_override
39067 #undef TARGET_REGISTER_MOVE_COST
39068 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
39069 #undef TARGET_MEMORY_MOVE_COST
39070 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
39071 #undef TARGET_RTX_COSTS
39072 #define TARGET_RTX_COSTS ix86_rtx_costs
39073 #undef TARGET_ADDRESS_COST
39074 #define TARGET_ADDRESS_COST ix86_address_cost
39076 #undef TARGET_FIXED_CONDITION_CODE_REGS
39077 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
39078 #undef TARGET_CC_MODES_COMPATIBLE
39079 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
39081 #undef TARGET_MACHINE_DEPENDENT_REORG
39082 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
39084 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
39085 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
39087 #undef TARGET_BUILD_BUILTIN_VA_LIST
39088 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
39090 #undef TARGET_ENUM_VA_LIST_P
39091 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
39093 #undef TARGET_FN_ABI_VA_LIST
39094 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
39096 #undef TARGET_CANONICAL_VA_LIST_TYPE
39097 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
39099 #undef TARGET_EXPAND_BUILTIN_VA_START
39100 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
39102 #undef TARGET_MD_ASM_CLOBBERS
39103 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
39105 #undef TARGET_PROMOTE_PROTOTYPES
39106 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
39107 #undef TARGET_STRUCT_VALUE_RTX
39108 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
39109 #undef TARGET_SETUP_INCOMING_VARARGS
39110 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
39111 #undef TARGET_MUST_PASS_IN_STACK
39112 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
39113 #undef TARGET_FUNCTION_ARG_ADVANCE
39114 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
39115 #undef TARGET_FUNCTION_ARG
39116 #define TARGET_FUNCTION_ARG ix86_function_arg
39117 #undef TARGET_FUNCTION_ARG_BOUNDARY
39118 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
39119 #undef TARGET_PASS_BY_REFERENCE
39120 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
39121 #undef TARGET_INTERNAL_ARG_POINTER
39122 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
39123 #undef TARGET_UPDATE_STACK_BOUNDARY
39124 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
39125 #undef TARGET_GET_DRAP_RTX
39126 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
39127 #undef TARGET_STRICT_ARGUMENT_NAMING
39128 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
39129 #undef TARGET_STATIC_CHAIN
39130 #define TARGET_STATIC_CHAIN ix86_static_chain
39131 #undef TARGET_TRAMPOLINE_INIT
39132 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
39133 #undef TARGET_RETURN_POPS_ARGS
39134 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
39136 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
39137 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
39139 #undef TARGET_SCALAR_MODE_SUPPORTED_P
39140 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
39142 #undef TARGET_VECTOR_MODE_SUPPORTED_P
39143 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
39145 #undef TARGET_C_MODE_FOR_SUFFIX
39146 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
39149 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
39150 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
39153 #ifdef SUBTARGET_INSERT_ATTRIBUTES
39154 #undef TARGET_INSERT_ATTRIBUTES
39155 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
39158 #undef TARGET_MANGLE_TYPE
39159 #define TARGET_MANGLE_TYPE ix86_mangle_type
39161 #ifndef TARGET_MACHO
39162 #undef TARGET_STACK_PROTECT_FAIL
39163 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
39166 #undef TARGET_FUNCTION_VALUE
39167 #define TARGET_FUNCTION_VALUE ix86_function_value
39169 #undef TARGET_FUNCTION_VALUE_REGNO_P
39170 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
39172 #undef TARGET_PROMOTE_FUNCTION_MODE
39173 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
39175 #undef TARGET_SECONDARY_RELOAD
39176 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
39178 #undef TARGET_CLASS_MAX_NREGS
39179 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
39181 #undef TARGET_PREFERRED_RELOAD_CLASS
39182 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
39183 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
39184 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
39185 #undef TARGET_CLASS_LIKELY_SPILLED_P
39186 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
39188 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
39189 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
39190 ix86_builtin_vectorization_cost
39191 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
39192 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
39193 ix86_vectorize_vec_perm_const_ok
39194 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
39195 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
39196 ix86_preferred_simd_mode
39197 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
39198 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
39199 ix86_autovectorize_vector_sizes
39201 #undef TARGET_SET_CURRENT_FUNCTION
39202 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
39204 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
39205 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
39207 #undef TARGET_OPTION_SAVE
39208 #define TARGET_OPTION_SAVE ix86_function_specific_save
39210 #undef TARGET_OPTION_RESTORE
39211 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
39213 #undef TARGET_OPTION_PRINT
39214 #define TARGET_OPTION_PRINT ix86_function_specific_print
39216 #undef TARGET_CAN_INLINE_P
39217 #define TARGET_CAN_INLINE_P ix86_can_inline_p
39219 #undef TARGET_EXPAND_TO_RTL_HOOK
39220 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
39222 #undef TARGET_LEGITIMATE_ADDRESS_P
39223 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
39225 #undef TARGET_LEGITIMATE_CONSTANT_P
39226 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
39228 #undef TARGET_FRAME_POINTER_REQUIRED
39229 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
39231 #undef TARGET_CAN_ELIMINATE
39232 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
39234 #undef TARGET_EXTRA_LIVE_ON_ENTRY
39235 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
39237 #undef TARGET_ASM_CODE_END
39238 #define TARGET_ASM_CODE_END ix86_code_end
39240 #undef TARGET_CONDITIONAL_REGISTER_USAGE
39241 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
39244 #undef TARGET_INIT_LIBFUNCS
39245 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
39248 struct gcc_target targetm = TARGET_INITIALIZER;
39250 #include "gt-i386.h"