1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 #include "sched-int.h"
62 #include "diagnostic.h"
64 enum upper_128bits_state
71 typedef struct block_info_def
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
78 /* TRUE if block has been processed. */
80 /* TRUE if block has been scanned. */
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev;
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88 enum call_avx256_state
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256 = -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256,
94 /* Callee passes 256bit AVX register. */
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
99 /* vzeroupper intrinsic. */
103 /* Check if a 256bit AVX register is referenced in stores. */
106 check_avx256_stores (rtx dest, const_rtx set, void *data)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
110 || (GET_CODE (set) == SET
111 && REG_P (SET_SRC (set))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
114 enum upper_128bits_state *state
115 = (enum upper_128bits_state *) data;
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
124 STATE is state of the upper 128bits of AVX registers at entry. */
127 move_or_delete_vzeroupper_2 (basic_block bb,
128 enum upper_128bits_state state)
131 rtx vzeroupper_insn = NULL_RTX;
136 if (BLOCK_INFO (bb)->unchanged)
139 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
142 BLOCK_INFO (bb)->state = state;
146 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
149 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
150 bb->index, BLOCK_INFO (bb)->state);
154 BLOCK_INFO (bb)->prev = state;
157 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
162 /* BB_END changes when it is deleted. */
163 bb_end = BB_END (bb);
165 while (insn != bb_end)
167 insn = NEXT_INSN (insn);
169 if (!NONDEBUG_INSN_P (insn))
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn) || CALL_P (insn))
175 if (!vzeroupper_insn)
178 if (PREV_INSN (insn) != vzeroupper_insn)
182 fprintf (dump_file, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file, PREV_INSN (insn));
184 fprintf (dump_file, "before:\n");
185 print_rtl_single (dump_file, insn);
187 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
190 vzeroupper_insn = NULL_RTX;
194 pat = PATTERN (insn);
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat) == UNSPEC_VOLATILE
198 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file, "Found vzeroupper:\n");
204 print_rtl_single (dump_file, insn);
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat) == PARALLEL
211 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
217 /* Delete pending vzeroupper insertion. */
220 delete_insn (vzeroupper_insn);
221 vzeroupper_insn = NULL_RTX;
224 else if (state != used)
226 note_stores (pat, check_avx256_stores, &state);
233 /* Process vzeroupper intrinsic. */
234 avx256 = INTVAL (XVECEXP (pat, 0, 0));
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256 == callee_return_avx256)
247 /* Remove unnecessary vzeroupper since upper 128bits are
251 fprintf (dump_file, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file, insn);
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
260 if (avx256 != callee_return_pass_avx256)
263 if (avx256 == callee_return_pass_avx256
264 || avx256 == callee_pass_avx256)
266 /* Must remove vzeroupper since callee passes in 256bit
270 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file, insn);
277 vzeroupper_insn = insn;
283 BLOCK_INFO (bb)->state = state;
284 BLOCK_INFO (bb)->unchanged = unchanged;
285 BLOCK_INFO (bb)->scanned = true;
288 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb->index, unchanged ? "unchanged" : "changed",
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
299 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
303 enum upper_128bits_state state, old_state, new_state;
307 fprintf (dump_file, " Process [bb %i]: status: %d\n",
308 block->index, BLOCK_INFO (block)->processed);
310 if (BLOCK_INFO (block)->processed)
315 /* Check all predecessor edges of this block. */
316 seen_unknown = false;
317 FOR_EACH_EDGE (e, ei, block->preds)
321 switch (BLOCK_INFO (e->src)->state)
324 if (!unknown_is_unused)
338 old_state = BLOCK_INFO (block)->state;
339 move_or_delete_vzeroupper_2 (block, state);
340 new_state = BLOCK_INFO (block)->state;
342 if (state != unknown || new_state == used)
343 BLOCK_INFO (block)->processed = true;
345 /* Need to rescan if the upper 128bits of AVX registers are changed
347 if (new_state != old_state)
349 if (new_state == used)
350 cfun->machine->rescan_vzeroupper_p = 1;
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
362 move_or_delete_vzeroupper (void)
367 fibheap_t worklist, pending, fibheap_swap;
368 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def));
376 /* Process outgoing edges of entry point. */
378 fprintf (dump_file, "Process outgoing edges of entry point\n");
380 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
382 move_or_delete_vzeroupper_2 (e->dest,
383 cfun->machine->caller_pass_avx256_p
385 BLOCK_INFO (e->dest)->processed = true;
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
391 bb_order = XNEWVEC (int, last_basic_block);
392 pre_and_rev_post_order_compute (NULL, rc_order, false);
393 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
394 bb_order[rc_order[i]] = i;
397 worklist = fibheap_new ();
398 pending = fibheap_new ();
399 visited = sbitmap_alloc (last_basic_block);
400 in_worklist = sbitmap_alloc (last_basic_block);
401 in_pending = sbitmap_alloc (last_basic_block);
402 sbitmap_zero (in_worklist);
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending);
407 if (BLOCK_INFO (bb)->processed)
408 RESET_BIT (in_pending, bb->index);
411 move_or_delete_vzeroupper_1 (bb, false);
412 fibheap_insert (pending, bb_order[bb->index], bb);
416 fprintf (dump_file, "Check remaining basic blocks\n");
418 while (!fibheap_empty (pending))
420 fibheap_swap = pending;
422 worklist = fibheap_swap;
423 sbitmap_swap = in_pending;
424 in_pending = in_worklist;
425 in_worklist = sbitmap_swap;
427 sbitmap_zero (visited);
429 cfun->machine->rescan_vzeroupper_p = 0;
431 while (!fibheap_empty (worklist))
433 bb = (basic_block) fibheap_extract_min (worklist);
434 RESET_BIT (in_worklist, bb->index);
435 gcc_assert (!TEST_BIT (visited, bb->index));
436 if (!TEST_BIT (visited, bb->index))
440 SET_BIT (visited, bb->index);
442 if (move_or_delete_vzeroupper_1 (bb, false))
443 FOR_EACH_EDGE (e, ei, bb->succs)
445 if (e->dest == EXIT_BLOCK_PTR
446 || BLOCK_INFO (e->dest)->processed)
449 if (TEST_BIT (visited, e->dest->index))
451 if (!TEST_BIT (in_pending, e->dest->index))
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending, e->dest->index);
455 fibheap_insert (pending,
456 bb_order[e->dest->index],
460 else if (!TEST_BIT (in_worklist, e->dest->index))
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist, e->dest->index);
464 fibheap_insert (worklist, bb_order[e->dest->index],
471 if (!cfun->machine->rescan_vzeroupper_p)
476 fibheap_delete (worklist);
477 fibheap_delete (pending);
478 sbitmap_free (visited);
479 sbitmap_free (in_worklist);
480 sbitmap_free (in_pending);
483 fprintf (dump_file, "Process remaining basic blocks\n");
486 move_or_delete_vzeroupper_1 (bb, true);
488 free_aux_for_blocks ();
491 static rtx legitimize_dllimport_symbol (rtx, bool);
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
512 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
566 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
567 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
581 /* Processor costs (relative to an add) */
583 struct processor_costs i386_cost = { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
636 DUMMY_STRINGOP_ALGS},
637 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs i486_cost = { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
708 DUMMY_STRINGOP_ALGS},
709 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
710 DUMMY_STRINGOP_ALGS},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
725 struct processor_costs pentium_cost = {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
778 DUMMY_STRINGOP_ALGS},
779 {{libcall, {{-1, rep_prefix_4_byte}}},
780 DUMMY_STRINGOP_ALGS},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
795 struct processor_costs pentiumpro_cost = {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
853 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
854 DUMMY_STRINGOP_ALGS},
855 {{rep_prefix_4_byte, {{1024, unrolled_loop},
856 {8192, rep_prefix_4_byte}, {-1, libcall}}},
857 DUMMY_STRINGOP_ALGS},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
872 struct processor_costs geode_cost = {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
926 DUMMY_STRINGOP_ALGS},
927 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
928 DUMMY_STRINGOP_ALGS},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
943 struct processor_costs k6_cost = {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
999 DUMMY_STRINGOP_ALGS},
1000 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1001 DUMMY_STRINGOP_ALGS},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1016 struct processor_costs athlon_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1072 DUMMY_STRINGOP_ALGS},
1073 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1074 DUMMY_STRINGOP_ALGS},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1089 struct processor_costs k8_cost = {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1150 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1151 {{libcall, {{8, loop}, {24, unrolled_loop},
1152 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1153 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1167 struct processor_costs amdfam10_cost = {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1212 MOVD reg64, xmmreg Double FADD 3
1214 MOVD reg32, xmmreg Double FADD 3
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1237 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1238 {{libcall, {{8, loop}, {24, unrolled_loop},
1239 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1240 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1254 struct processor_costs bdver1_cost = {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1299 MOVD reg64, xmmreg Double FADD 3
1301 MOVD reg32, xmmreg Double FADD 3
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1324 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1325 {{libcall, {{8, loop}, {24, unrolled_loop},
1326 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1327 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1341 struct processor_costs bdver2_cost = {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (4), /* SI */
1349 COSTS_N_INSNS (6), /* DI */
1350 COSTS_N_INSNS (6)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1361 4, /* cost for loading QImode using movzbl */
1362 {5, 5, 4}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {5, 5, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {4, 4}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 4}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1386 MOVD reg64, xmmreg Double FADD 3
1388 MOVD reg32, xmmreg Double FADD 3
1390 16, /* size of l1 cache. */
1391 2048, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 /* New AMD processors never drop prefetches; if they cannot be performed
1394 immediately, they are queued. We set number of simultaneous prefetches
1395 to a large constant to reflect this (it probably is not a good idea not
1396 to limit number of prefetches at all, as their execution also takes some
1398 100, /* number of parallel prefetches */
1399 2, /* Branch cost */
1400 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1401 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1402 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1403 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1404 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1405 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1407 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1408 very small blocks it is better to use loop. For large blocks, libcall
1409 can do nontemporary accesses and beat inline considerably. */
1410 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1411 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1412 {{libcall, {{8, loop}, {24, unrolled_loop},
1413 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1414 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1415 6, /* scalar_stmt_cost. */
1416 4, /* scalar load_cost. */
1417 4, /* scalar_store_cost. */
1418 6, /* vec_stmt_cost. */
1419 0, /* vec_to_scalar_cost. */
1420 2, /* scalar_to_vec_cost. */
1421 4, /* vec_align_load_cost. */
1422 4, /* vec_unalign_load_cost. */
1423 4, /* vec_store_cost. */
1424 2, /* cond_taken_branch_cost. */
1425 1, /* cond_not_taken_branch_cost. */
1428 struct processor_costs btver1_cost = {
1429 COSTS_N_INSNS (1), /* cost of an add instruction */
1430 COSTS_N_INSNS (2), /* cost of a lea instruction */
1431 COSTS_N_INSNS (1), /* variable shift costs */
1432 COSTS_N_INSNS (1), /* constant shift costs */
1433 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1434 COSTS_N_INSNS (4), /* HI */
1435 COSTS_N_INSNS (3), /* SI */
1436 COSTS_N_INSNS (4), /* DI */
1437 COSTS_N_INSNS (5)}, /* other */
1438 0, /* cost of multiply per each bit set */
1439 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1440 COSTS_N_INSNS (35), /* HI */
1441 COSTS_N_INSNS (51), /* SI */
1442 COSTS_N_INSNS (83), /* DI */
1443 COSTS_N_INSNS (83)}, /* other */
1444 COSTS_N_INSNS (1), /* cost of movsx */
1445 COSTS_N_INSNS (1), /* cost of movzx */
1446 8, /* "large" insn */
1448 4, /* cost for loading QImode using movzbl */
1449 {3, 4, 3}, /* cost of loading integer registers
1450 in QImode, HImode and SImode.
1451 Relative to reg-reg move (2). */
1452 {3, 4, 3}, /* cost of storing integer registers */
1453 4, /* cost of reg,reg fld/fst */
1454 {4, 4, 12}, /* cost of loading fp registers
1455 in SFmode, DFmode and XFmode */
1456 {6, 6, 8}, /* cost of storing fp registers
1457 in SFmode, DFmode and XFmode */
1458 2, /* cost of moving MMX register */
1459 {3, 3}, /* cost of loading MMX registers
1460 in SImode and DImode */
1461 {4, 4}, /* cost of storing MMX registers
1462 in SImode and DImode */
1463 2, /* cost of moving SSE register */
1464 {4, 4, 3}, /* cost of loading SSE registers
1465 in SImode, DImode and TImode */
1466 {4, 4, 5}, /* cost of storing SSE registers
1467 in SImode, DImode and TImode */
1468 3, /* MMX or SSE register to integer */
1470 MOVD reg64, xmmreg Double FSTORE 4
1471 MOVD reg32, xmmreg Double FSTORE 4
1473 MOVD reg64, xmmreg Double FADD 3
1475 MOVD reg32, xmmreg Double FADD 3
1477 32, /* size of l1 cache. */
1478 512, /* size of l2 cache. */
1479 64, /* size of prefetch block */
1480 100, /* number of parallel prefetches */
1481 2, /* Branch cost */
1482 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1483 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1484 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1485 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1486 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1487 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1489 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1490 very small blocks it is better to use loop. For large blocks, libcall can
1491 do nontemporary accesses and beat inline considerably. */
1492 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1493 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1494 {{libcall, {{8, loop}, {24, unrolled_loop},
1495 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1496 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1497 4, /* scalar_stmt_cost. */
1498 2, /* scalar load_cost. */
1499 2, /* scalar_store_cost. */
1500 6, /* vec_stmt_cost. */
1501 0, /* vec_to_scalar_cost. */
1502 2, /* scalar_to_vec_cost. */
1503 2, /* vec_align_load_cost. */
1504 2, /* vec_unalign_load_cost. */
1505 2, /* vec_store_cost. */
1506 2, /* cond_taken_branch_cost. */
1507 1, /* cond_not_taken_branch_cost. */
1511 struct processor_costs pentium4_cost = {
1512 COSTS_N_INSNS (1), /* cost of an add instruction */
1513 COSTS_N_INSNS (3), /* cost of a lea instruction */
1514 COSTS_N_INSNS (4), /* variable shift costs */
1515 COSTS_N_INSNS (4), /* constant shift costs */
1516 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1517 COSTS_N_INSNS (15), /* HI */
1518 COSTS_N_INSNS (15), /* SI */
1519 COSTS_N_INSNS (15), /* DI */
1520 COSTS_N_INSNS (15)}, /* other */
1521 0, /* cost of multiply per each bit set */
1522 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1523 COSTS_N_INSNS (56), /* HI */
1524 COSTS_N_INSNS (56), /* SI */
1525 COSTS_N_INSNS (56), /* DI */
1526 COSTS_N_INSNS (56)}, /* other */
1527 COSTS_N_INSNS (1), /* cost of movsx */
1528 COSTS_N_INSNS (1), /* cost of movzx */
1529 16, /* "large" insn */
1531 2, /* cost for loading QImode using movzbl */
1532 {4, 5, 4}, /* cost of loading integer registers
1533 in QImode, HImode and SImode.
1534 Relative to reg-reg move (2). */
1535 {2, 3, 2}, /* cost of storing integer registers */
1536 2, /* cost of reg,reg fld/fst */
1537 {2, 2, 6}, /* cost of loading fp registers
1538 in SFmode, DFmode and XFmode */
1539 {4, 4, 6}, /* cost of storing fp registers
1540 in SFmode, DFmode and XFmode */
1541 2, /* cost of moving MMX register */
1542 {2, 2}, /* cost of loading MMX registers
1543 in SImode and DImode */
1544 {2, 2}, /* cost of storing MMX registers
1545 in SImode and DImode */
1546 12, /* cost of moving SSE register */
1547 {12, 12, 12}, /* cost of loading SSE registers
1548 in SImode, DImode and TImode */
1549 {2, 2, 8}, /* cost of storing SSE registers
1550 in SImode, DImode and TImode */
1551 10, /* MMX or SSE register to integer */
1552 8, /* size of l1 cache. */
1553 256, /* size of l2 cache. */
1554 64, /* size of prefetch block */
1555 6, /* number of parallel prefetches */
1556 2, /* Branch cost */
1557 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1558 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1559 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1562 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1563 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1564 DUMMY_STRINGOP_ALGS},
1565 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1567 DUMMY_STRINGOP_ALGS},
1568 1, /* scalar_stmt_cost. */
1569 1, /* scalar load_cost. */
1570 1, /* scalar_store_cost. */
1571 1, /* vec_stmt_cost. */
1572 1, /* vec_to_scalar_cost. */
1573 1, /* scalar_to_vec_cost. */
1574 1, /* vec_align_load_cost. */
1575 2, /* vec_unalign_load_cost. */
1576 1, /* vec_store_cost. */
1577 3, /* cond_taken_branch_cost. */
1578 1, /* cond_not_taken_branch_cost. */
1582 struct processor_costs nocona_cost = {
1583 COSTS_N_INSNS (1), /* cost of an add instruction */
1584 COSTS_N_INSNS (1), /* cost of a lea instruction */
1585 COSTS_N_INSNS (1), /* variable shift costs */
1586 COSTS_N_INSNS (1), /* constant shift costs */
1587 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1588 COSTS_N_INSNS (10), /* HI */
1589 COSTS_N_INSNS (10), /* SI */
1590 COSTS_N_INSNS (10), /* DI */
1591 COSTS_N_INSNS (10)}, /* other */
1592 0, /* cost of multiply per each bit set */
1593 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1594 COSTS_N_INSNS (66), /* HI */
1595 COSTS_N_INSNS (66), /* SI */
1596 COSTS_N_INSNS (66), /* DI */
1597 COSTS_N_INSNS (66)}, /* other */
1598 COSTS_N_INSNS (1), /* cost of movsx */
1599 COSTS_N_INSNS (1), /* cost of movzx */
1600 16, /* "large" insn */
1601 17, /* MOVE_RATIO */
1602 4, /* cost for loading QImode using movzbl */
1603 {4, 4, 4}, /* cost of loading integer registers
1604 in QImode, HImode and SImode.
1605 Relative to reg-reg move (2). */
1606 {4, 4, 4}, /* cost of storing integer registers */
1607 3, /* cost of reg,reg fld/fst */
1608 {12, 12, 12}, /* cost of loading fp registers
1609 in SFmode, DFmode and XFmode */
1610 {4, 4, 4}, /* cost of storing fp registers
1611 in SFmode, DFmode and XFmode */
1612 6, /* cost of moving MMX register */
1613 {12, 12}, /* cost of loading MMX registers
1614 in SImode and DImode */
1615 {12, 12}, /* cost of storing MMX registers
1616 in SImode and DImode */
1617 6, /* cost of moving SSE register */
1618 {12, 12, 12}, /* cost of loading SSE registers
1619 in SImode, DImode and TImode */
1620 {12, 12, 12}, /* cost of storing SSE registers
1621 in SImode, DImode and TImode */
1622 8, /* MMX or SSE register to integer */
1623 8, /* size of l1 cache. */
1624 1024, /* size of l2 cache. */
1625 128, /* size of prefetch block */
1626 8, /* number of parallel prefetches */
1627 1, /* Branch cost */
1628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1629 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1630 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1633 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1634 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1635 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1636 {100000, unrolled_loop}, {-1, libcall}}}},
1637 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1639 {libcall, {{24, loop}, {64, unrolled_loop},
1640 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1641 1, /* scalar_stmt_cost. */
1642 1, /* scalar load_cost. */
1643 1, /* scalar_store_cost. */
1644 1, /* vec_stmt_cost. */
1645 1, /* vec_to_scalar_cost. */
1646 1, /* scalar_to_vec_cost. */
1647 1, /* vec_align_load_cost. */
1648 2, /* vec_unalign_load_cost. */
1649 1, /* vec_store_cost. */
1650 3, /* cond_taken_branch_cost. */
1651 1, /* cond_not_taken_branch_cost. */
1655 struct processor_costs atom_cost = {
1656 COSTS_N_INSNS (1), /* cost of an add instruction */
1657 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1658 COSTS_N_INSNS (1), /* variable shift costs */
1659 COSTS_N_INSNS (1), /* constant shift costs */
1660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1661 COSTS_N_INSNS (4), /* HI */
1662 COSTS_N_INSNS (3), /* SI */
1663 COSTS_N_INSNS (4), /* DI */
1664 COSTS_N_INSNS (2)}, /* other */
1665 0, /* cost of multiply per each bit set */
1666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1667 COSTS_N_INSNS (26), /* HI */
1668 COSTS_N_INSNS (42), /* SI */
1669 COSTS_N_INSNS (74), /* DI */
1670 COSTS_N_INSNS (74)}, /* other */
1671 COSTS_N_INSNS (1), /* cost of movsx */
1672 COSTS_N_INSNS (1), /* cost of movzx */
1673 8, /* "large" insn */
1674 17, /* MOVE_RATIO */
1675 2, /* cost for loading QImode using movzbl */
1676 {4, 4, 4}, /* cost of loading integer registers
1677 in QImode, HImode and SImode.
1678 Relative to reg-reg move (2). */
1679 {4, 4, 4}, /* cost of storing integer registers */
1680 4, /* cost of reg,reg fld/fst */
1681 {12, 12, 12}, /* cost of loading fp registers
1682 in SFmode, DFmode and XFmode */
1683 {6, 6, 8}, /* cost of storing fp registers
1684 in SFmode, DFmode and XFmode */
1685 2, /* cost of moving MMX register */
1686 {8, 8}, /* cost of loading MMX registers
1687 in SImode and DImode */
1688 {8, 8}, /* cost of storing MMX registers
1689 in SImode and DImode */
1690 2, /* cost of moving SSE register */
1691 {8, 8, 8}, /* cost of loading SSE registers
1692 in SImode, DImode and TImode */
1693 {8, 8, 8}, /* cost of storing SSE registers
1694 in SImode, DImode and TImode */
1695 5, /* MMX or SSE register to integer */
1696 32, /* size of l1 cache. */
1697 256, /* size of l2 cache. */
1698 64, /* size of prefetch block */
1699 6, /* number of parallel prefetches */
1700 3, /* Branch cost */
1701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1702 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1703 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1704 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1705 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1706 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1707 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1708 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1709 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1710 {{libcall, {{8, loop}, {15, unrolled_loop},
1711 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1712 {libcall, {{24, loop}, {32, unrolled_loop},
1713 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1714 1, /* scalar_stmt_cost. */
1715 1, /* scalar load_cost. */
1716 1, /* scalar_store_cost. */
1717 1, /* vec_stmt_cost. */
1718 1, /* vec_to_scalar_cost. */
1719 1, /* scalar_to_vec_cost. */
1720 1, /* vec_align_load_cost. */
1721 2, /* vec_unalign_load_cost. */
1722 1, /* vec_store_cost. */
1723 3, /* cond_taken_branch_cost. */
1724 1, /* cond_not_taken_branch_cost. */
1727 /* Generic64 should produce code tuned for Nocona and K8. */
1729 struct processor_costs generic64_cost = {
1730 COSTS_N_INSNS (1), /* cost of an add instruction */
1731 /* On all chips taken into consideration lea is 2 cycles and more. With
1732 this cost however our current implementation of synth_mult results in
1733 use of unnecessary temporary registers causing regression on several
1734 SPECfp benchmarks. */
1735 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1736 COSTS_N_INSNS (1), /* variable shift costs */
1737 COSTS_N_INSNS (1), /* constant shift costs */
1738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1739 COSTS_N_INSNS (4), /* HI */
1740 COSTS_N_INSNS (3), /* SI */
1741 COSTS_N_INSNS (4), /* DI */
1742 COSTS_N_INSNS (2)}, /* other */
1743 0, /* cost of multiply per each bit set */
1744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1745 COSTS_N_INSNS (26), /* HI */
1746 COSTS_N_INSNS (42), /* SI */
1747 COSTS_N_INSNS (74), /* DI */
1748 COSTS_N_INSNS (74)}, /* other */
1749 COSTS_N_INSNS (1), /* cost of movsx */
1750 COSTS_N_INSNS (1), /* cost of movzx */
1751 8, /* "large" insn */
1752 17, /* MOVE_RATIO */
1753 4, /* cost for loading QImode using movzbl */
1754 {4, 4, 4}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {4, 4, 4}, /* cost of storing integer registers */
1758 4, /* cost of reg,reg fld/fst */
1759 {12, 12, 12}, /* cost of loading fp registers
1760 in SFmode, DFmode and XFmode */
1761 {6, 6, 8}, /* cost of storing fp registers
1762 in SFmode, DFmode and XFmode */
1763 2, /* cost of moving MMX register */
1764 {8, 8}, /* cost of loading MMX registers
1765 in SImode and DImode */
1766 {8, 8}, /* cost of storing MMX registers
1767 in SImode and DImode */
1768 2, /* cost of moving SSE register */
1769 {8, 8, 8}, /* cost of loading SSE registers
1770 in SImode, DImode and TImode */
1771 {8, 8, 8}, /* cost of storing SSE registers
1772 in SImode, DImode and TImode */
1773 5, /* MMX or SSE register to integer */
1774 32, /* size of l1 cache. */
1775 512, /* size of l2 cache. */
1776 64, /* size of prefetch block */
1777 6, /* number of parallel prefetches */
1778 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1779 value is increased to perhaps more appropriate value of 5. */
1780 3, /* Branch cost */
1781 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1782 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1783 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1784 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1785 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1786 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1787 {DUMMY_STRINGOP_ALGS,
1788 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1789 {DUMMY_STRINGOP_ALGS,
1790 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1791 1, /* scalar_stmt_cost. */
1792 1, /* scalar load_cost. */
1793 1, /* scalar_store_cost. */
1794 1, /* vec_stmt_cost. */
1795 1, /* vec_to_scalar_cost. */
1796 1, /* scalar_to_vec_cost. */
1797 1, /* vec_align_load_cost. */
1798 2, /* vec_unalign_load_cost. */
1799 1, /* vec_store_cost. */
1800 3, /* cond_taken_branch_cost. */
1801 1, /* cond_not_taken_branch_cost. */
1804 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1807 struct processor_costs generic32_cost = {
1808 COSTS_N_INSNS (1), /* cost of an add instruction */
1809 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1810 COSTS_N_INSNS (1), /* variable shift costs */
1811 COSTS_N_INSNS (1), /* constant shift costs */
1812 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1813 COSTS_N_INSNS (4), /* HI */
1814 COSTS_N_INSNS (3), /* SI */
1815 COSTS_N_INSNS (4), /* DI */
1816 COSTS_N_INSNS (2)}, /* other */
1817 0, /* cost of multiply per each bit set */
1818 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1819 COSTS_N_INSNS (26), /* HI */
1820 COSTS_N_INSNS (42), /* SI */
1821 COSTS_N_INSNS (74), /* DI */
1822 COSTS_N_INSNS (74)}, /* other */
1823 COSTS_N_INSNS (1), /* cost of movsx */
1824 COSTS_N_INSNS (1), /* cost of movzx */
1825 8, /* "large" insn */
1826 17, /* MOVE_RATIO */
1827 4, /* cost for loading QImode using movzbl */
1828 {4, 4, 4}, /* cost of loading integer registers
1829 in QImode, HImode and SImode.
1830 Relative to reg-reg move (2). */
1831 {4, 4, 4}, /* cost of storing integer registers */
1832 4, /* cost of reg,reg fld/fst */
1833 {12, 12, 12}, /* cost of loading fp registers
1834 in SFmode, DFmode and XFmode */
1835 {6, 6, 8}, /* cost of storing fp registers
1836 in SFmode, DFmode and XFmode */
1837 2, /* cost of moving MMX register */
1838 {8, 8}, /* cost of loading MMX registers
1839 in SImode and DImode */
1840 {8, 8}, /* cost of storing MMX registers
1841 in SImode and DImode */
1842 2, /* cost of moving SSE register */
1843 {8, 8, 8}, /* cost of loading SSE registers
1844 in SImode, DImode and TImode */
1845 {8, 8, 8}, /* cost of storing SSE registers
1846 in SImode, DImode and TImode */
1847 5, /* MMX or SSE register to integer */
1848 32, /* size of l1 cache. */
1849 256, /* size of l2 cache. */
1850 64, /* size of prefetch block */
1851 6, /* number of parallel prefetches */
1852 3, /* Branch cost */
1853 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1854 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1855 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1856 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1857 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1858 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1859 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1860 DUMMY_STRINGOP_ALGS},
1861 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1862 DUMMY_STRINGOP_ALGS},
1863 1, /* scalar_stmt_cost. */
1864 1, /* scalar load_cost. */
1865 1, /* scalar_store_cost. */
1866 1, /* vec_stmt_cost. */
1867 1, /* vec_to_scalar_cost. */
1868 1, /* scalar_to_vec_cost. */
1869 1, /* vec_align_load_cost. */
1870 2, /* vec_unalign_load_cost. */
1871 1, /* vec_store_cost. */
1872 3, /* cond_taken_branch_cost. */
1873 1, /* cond_not_taken_branch_cost. */
1876 const struct processor_costs *ix86_cost = &pentium_cost;
1878 /* Processor feature/optimization bitmasks. */
1879 #define m_386 (1<<PROCESSOR_I386)
1880 #define m_486 (1<<PROCESSOR_I486)
1881 #define m_PENT (1<<PROCESSOR_PENTIUM)
1882 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1883 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1884 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1885 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1886 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1887 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1888 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1889 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1890 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1891 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1892 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1893 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1894 #define m_ATOM (1<<PROCESSOR_ATOM)
1896 #define m_GEODE (1<<PROCESSOR_GEODE)
1897 #define m_K6 (1<<PROCESSOR_K6)
1898 #define m_K6_GEODE (m_K6 | m_GEODE)
1899 #define m_K8 (1<<PROCESSOR_K8)
1900 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1903 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1904 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1905 #define m_BDVER (m_BDVER1 | m_BDVER2)
1906 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1907 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1909 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1910 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1912 /* Generic instruction choice should be common subset of supported CPUs
1913 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1914 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1916 /* Feature tests against the various tunings. */
1917 unsigned char ix86_tune_features[X86_TUNE_LAST];
1919 /* Feature tests against the various tunings used to create ix86_tune_features
1920 based on the processor mask. */
1921 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1922 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1923 negatively, so enabling for Generic64 seems like good code size
1924 tradeoff. We can't enable it for 32bit generic because it does not
1925 work well with PPro base chips. */
1926 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1928 /* X86_TUNE_PUSH_MEMORY */
1929 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1931 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1934 /* X86_TUNE_UNROLL_STRLEN */
1935 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1937 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1938 on simulation result. But after P4 was made, no performance benefit
1939 was observed with branch hints. It also increases the code size.
1940 As a result, icc never generates branch hints. */
1943 /* X86_TUNE_DOUBLE_WITH_ADD */
1946 /* X86_TUNE_USE_SAHF */
1947 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
1949 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1950 partial dependencies. */
1951 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1953 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1954 register stalls on Generic32 compilation setting as well. However
1955 in current implementation the partial register stalls are not eliminated
1956 very well - they can be introduced via subregs synthesized by combine
1957 and can happen in caller/callee saving sequences. Because this option
1958 pays back little on PPro based chips and is in conflict with partial reg
1959 dependencies used by Athlon/P4 based chips, it is better to leave it off
1960 for generic32 for now. */
1963 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1964 m_CORE2I7 | m_GENERIC,
1966 /* X86_TUNE_USE_HIMODE_FIOP */
1967 m_386 | m_486 | m_K6_GEODE,
1969 /* X86_TUNE_USE_SIMODE_FIOP */
1970 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
1972 /* X86_TUNE_USE_MOV0 */
1975 /* X86_TUNE_USE_CLTD */
1976 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
1978 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1981 /* X86_TUNE_SPLIT_LONG_MOVES */
1984 /* X86_TUNE_READ_MODIFY_WRITE */
1987 /* X86_TUNE_READ_MODIFY */
1990 /* X86_TUNE_PROMOTE_QIMODE */
1991 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1993 /* X86_TUNE_FAST_PREFIX */
1994 ~(m_386 | m_486 | m_PENT),
1996 /* X86_TUNE_SINGLE_STRINGOP */
1997 m_386 | m_P4_NOCONA,
1999 /* X86_TUNE_QIMODE_MATH */
2002 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2003 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2004 might be considered for Generic32 if our scheme for avoiding partial
2005 stalls was more effective. */
2008 /* X86_TUNE_PROMOTE_QI_REGS */
2011 /* X86_TUNE_PROMOTE_HI_REGS */
2014 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2015 over esp addition. */
2016 m_386 | m_486 | m_PENT | m_PPRO,
2018 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2019 over esp addition. */
2022 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2023 over esp subtraction. */
2024 m_386 | m_486 | m_PENT | m_K6_GEODE,
2026 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2027 over esp subtraction. */
2028 m_PENT | m_K6_GEODE,
2030 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2031 for DFmode copies */
2032 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2034 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2035 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2037 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2038 conflict here in between PPro/Pentium4 based chips that thread 128bit
2039 SSE registers as single units versus K8 based chips that divide SSE
2040 registers to two 64bit halves. This knob promotes all store destinations
2041 to be 128bit to allow register renaming on 128bit SSE units, but usually
2042 results in one extra microop on 64bit SSE units. Experimental results
2043 shows that disabling this option on P4 brings over 20% SPECfp regression,
2044 while enabling it on K8 brings roughly 2.4% regression that can be partly
2045 masked by careful scheduling of moves. */
2046 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2048 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2049 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
2051 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2054 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2057 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2058 are resolved on SSE register parts instead of whole registers, so we may
2059 maintain just lower part of scalar values in proper format leaving the
2060 upper part undefined. */
2063 /* X86_TUNE_SSE_TYPELESS_STORES */
2066 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2067 m_PPRO | m_P4_NOCONA,
2069 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2070 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2072 /* X86_TUNE_PROLOGUE_USING_MOVE */
2073 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2075 /* X86_TUNE_EPILOGUE_USING_MOVE */
2076 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2078 /* X86_TUNE_SHIFT1 */
2081 /* X86_TUNE_USE_FFREEP */
2084 /* X86_TUNE_INTER_UNIT_MOVES */
2085 ~(m_AMD_MULTIPLE | m_GENERIC),
2087 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2088 ~(m_AMDFAM10 | m_BDVER ),
2090 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2091 than 4 branch instructions in the 16 byte window. */
2092 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2094 /* X86_TUNE_SCHEDULE */
2095 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2097 /* X86_TUNE_USE_BT */
2098 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2100 /* X86_TUNE_USE_INCDEC */
2101 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2103 /* X86_TUNE_PAD_RETURNS */
2104 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2106 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2109 /* X86_TUNE_EXT_80387_CONSTANTS */
2110 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2112 /* X86_TUNE_SHORTEN_X87_SSE */
2115 /* X86_TUNE_AVOID_VECTOR_DECODE */
2116 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2118 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2119 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2122 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2123 vector path on AMD machines. */
2124 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2126 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2128 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2130 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2134 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2135 but one byte longer. */
2138 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2139 operand that cannot be represented using a modRM byte. The XOR
2140 replacement is long decoded, so this split helps here as well. */
2143 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2145 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2147 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2148 from integer to FP. */
2151 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2152 with a subsequent conditional jump instruction into a single
2153 compare-and-branch uop. */
2156 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2157 will impact LEA instruction selection. */
2160 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2164 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2165 at -O3. For the moment, the prefetching seems badly tuned for Intel
2167 m_K6_GEODE | m_AMD_MULTIPLE,
2169 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2170 the auto-vectorizer. */
2174 /* Feature tests against the various architecture variations. */
2175 unsigned char ix86_arch_features[X86_ARCH_LAST];
2177 /* Feature tests against the various architecture variations, used to create
2178 ix86_arch_features based on the processor mask. */
2179 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2180 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2181 ~(m_386 | m_486 | m_PENT | m_K6),
2183 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2186 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2189 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2192 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2196 static const unsigned int x86_accumulate_outgoing_args
2197 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2199 static const unsigned int x86_arch_always_fancy_math_387
2200 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2202 static const unsigned int x86_avx256_split_unaligned_load
2203 = m_COREI7 | m_GENERIC;
2205 static const unsigned int x86_avx256_split_unaligned_store
2206 = m_COREI7 | m_BDVER | m_GENERIC;
2208 /* In case the average insn count for single function invocation is
2209 lower than this constant, emit fast (but longer) prologue and
2211 #define FAST_PROLOGUE_INSN_COUNT 20
2213 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2214 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2215 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2216 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2218 /* Array of the smallest class containing reg number REGNO, indexed by
2219 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2221 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2223 /* ax, dx, cx, bx */
2224 AREG, DREG, CREG, BREG,
2225 /* si, di, bp, sp */
2226 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2228 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2229 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2232 /* flags, fpsr, fpcr, frame */
2233 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2235 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2238 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2241 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2242 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2243 /* SSE REX registers */
2244 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2248 /* The "default" register map used in 32bit mode. */
2250 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2252 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2253 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2254 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2255 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2256 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2257 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2258 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2261 /* The "default" register map used in 64bit mode. */
2263 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2265 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2266 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2267 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2268 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2269 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2270 8,9,10,11,12,13,14,15, /* extended integer registers */
2271 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2274 /* Define the register numbers to be used in Dwarf debugging information.
2275 The SVR4 reference port C compiler uses the following register numbers
2276 in its Dwarf output code:
2277 0 for %eax (gcc regno = 0)
2278 1 for %ecx (gcc regno = 2)
2279 2 for %edx (gcc regno = 1)
2280 3 for %ebx (gcc regno = 3)
2281 4 for %esp (gcc regno = 7)
2282 5 for %ebp (gcc regno = 6)
2283 6 for %esi (gcc regno = 4)
2284 7 for %edi (gcc regno = 5)
2285 The following three DWARF register numbers are never generated by
2286 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2287 believes these numbers have these meanings.
2288 8 for %eip (no gcc equivalent)
2289 9 for %eflags (gcc regno = 17)
2290 10 for %trapno (no gcc equivalent)
2291 It is not at all clear how we should number the FP stack registers
2292 for the x86 architecture. If the version of SDB on x86/svr4 were
2293 a bit less brain dead with respect to floating-point then we would
2294 have a precedent to follow with respect to DWARF register numbers
2295 for x86 FP registers, but the SDB on x86/svr4 is so completely
2296 broken with respect to FP registers that it is hardly worth thinking
2297 of it as something to strive for compatibility with.
2298 The version of x86/svr4 SDB I have at the moment does (partially)
2299 seem to believe that DWARF register number 11 is associated with
2300 the x86 register %st(0), but that's about all. Higher DWARF
2301 register numbers don't seem to be associated with anything in
2302 particular, and even for DWARF regno 11, SDB only seems to under-
2303 stand that it should say that a variable lives in %st(0) (when
2304 asked via an `=' command) if we said it was in DWARF regno 11,
2305 but SDB still prints garbage when asked for the value of the
2306 variable in question (via a `/' command).
2307 (Also note that the labels SDB prints for various FP stack regs
2308 when doing an `x' command are all wrong.)
2309 Note that these problems generally don't affect the native SVR4
2310 C compiler because it doesn't allow the use of -O with -g and
2311 because when it is *not* optimizing, it allocates a memory
2312 location for each floating-point variable, and the memory
2313 location is what gets described in the DWARF AT_location
2314 attribute for the variable in question.
2315 Regardless of the severe mental illness of the x86/svr4 SDB, we
2316 do something sensible here and we use the following DWARF
2317 register numbers. Note that these are all stack-top-relative
2319 11 for %st(0) (gcc regno = 8)
2320 12 for %st(1) (gcc regno = 9)
2321 13 for %st(2) (gcc regno = 10)
2322 14 for %st(3) (gcc regno = 11)
2323 15 for %st(4) (gcc regno = 12)
2324 16 for %st(5) (gcc regno = 13)
2325 17 for %st(6) (gcc regno = 14)
2326 18 for %st(7) (gcc regno = 15)
2328 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2330 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2331 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2332 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2333 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2334 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2335 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2336 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2339 /* Define parameter passing and return registers. */
2341 static int const x86_64_int_parameter_registers[6] =
2343 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2346 static int const x86_64_ms_abi_int_parameter_registers[4] =
2348 CX_REG, DX_REG, R8_REG, R9_REG
2351 static int const x86_64_int_return_registers[4] =
2353 AX_REG, DX_REG, DI_REG, SI_REG
2356 /* Define the structure for the machine field in struct function. */
2358 struct GTY(()) stack_local_entry {
2359 unsigned short mode;
2362 struct stack_local_entry *next;
2365 /* Structure describing stack frame layout.
2366 Stack grows downward:
2372 saved static chain if ix86_static_chain_on_stack
2374 saved frame pointer if frame_pointer_needed
2375 <- HARD_FRAME_POINTER
2381 <- sse_regs_save_offset
2384 [va_arg registers] |
2388 [padding2] | = to_allocate
2397 int outgoing_arguments_size;
2398 HOST_WIDE_INT frame;
2400 /* The offsets relative to ARG_POINTER. */
2401 HOST_WIDE_INT frame_pointer_offset;
2402 HOST_WIDE_INT hard_frame_pointer_offset;
2403 HOST_WIDE_INT stack_pointer_offset;
2404 HOST_WIDE_INT hfp_save_offset;
2405 HOST_WIDE_INT reg_save_offset;
2406 HOST_WIDE_INT sse_reg_save_offset;
2408 /* When save_regs_using_mov is set, emit prologue using
2409 move instead of push instructions. */
2410 bool save_regs_using_mov;
2413 /* Which cpu are we scheduling for. */
2414 enum attr_cpu ix86_schedule;
2416 /* Which cpu are we optimizing for. */
2417 enum processor_type ix86_tune;
2419 /* Which instruction set architecture to use. */
2420 enum processor_type ix86_arch;
2422 /* true if sse prefetch instruction is not NOOP. */
2423 int x86_prefetch_sse;
2425 /* -mstackrealign option */
2426 static const char ix86_force_align_arg_pointer_string[]
2427 = "force_align_arg_pointer";
2429 static rtx (*ix86_gen_leave) (void);
2430 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2431 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2432 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2433 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2434 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2435 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2436 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2437 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2438 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2440 /* Preferred alignment for stack boundary in bits. */
2441 unsigned int ix86_preferred_stack_boundary;
2443 /* Alignment for incoming stack boundary in bits specified at
2445 static unsigned int ix86_user_incoming_stack_boundary;
2447 /* Default alignment for incoming stack boundary in bits. */
2448 static unsigned int ix86_default_incoming_stack_boundary;
2450 /* Alignment for incoming stack boundary in bits. */
2451 unsigned int ix86_incoming_stack_boundary;
2453 /* Calling abi specific va_list type nodes. */
2454 static GTY(()) tree sysv_va_list_type_node;
2455 static GTY(()) tree ms_va_list_type_node;
2457 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2458 char internal_label_prefix[16];
2459 int internal_label_prefix_len;
2461 /* Fence to use after loop using movnt. */
2464 /* Register class used for passing given 64bit part of the argument.
2465 These represent classes as documented by the PS ABI, with the exception
2466 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2467 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2469 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2470 whenever possible (upper half does contain padding). */
2471 enum x86_64_reg_class
2474 X86_64_INTEGER_CLASS,
2475 X86_64_INTEGERSI_CLASS,
2482 X86_64_COMPLEX_X87_CLASS,
2486 #define MAX_CLASSES 4
2488 /* Table of constants used by fldpi, fldln2, etc.... */
2489 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2490 static bool ext_80387_constants_init = 0;
2493 static struct machine_function * ix86_init_machine_status (void);
2494 static rtx ix86_function_value (const_tree, const_tree, bool);
2495 static bool ix86_function_value_regno_p (const unsigned int);
2496 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2498 static rtx ix86_static_chain (const_tree, bool);
2499 static int ix86_function_regparm (const_tree, const_tree);
2500 static void ix86_compute_frame_layout (struct ix86_frame *);
2501 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2503 static void ix86_add_new_builtins (int);
2504 static rtx ix86_expand_vec_perm_builtin (tree);
2505 static tree ix86_canonical_va_list_type (tree);
2506 static void predict_jump (int);
2507 static unsigned int split_stack_prologue_scratch_regno (void);
2508 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2510 enum ix86_function_specific_strings
2512 IX86_FUNCTION_SPECIFIC_ARCH,
2513 IX86_FUNCTION_SPECIFIC_TUNE,
2514 IX86_FUNCTION_SPECIFIC_MAX
2517 static char *ix86_target_string (int, int, const char *, const char *,
2518 enum fpmath_unit, bool);
2519 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2520 static void ix86_function_specific_save (struct cl_target_option *);
2521 static void ix86_function_specific_restore (struct cl_target_option *);
2522 static void ix86_function_specific_print (FILE *, int,
2523 struct cl_target_option *);
2524 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2525 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2526 struct gcc_options *);
2527 static bool ix86_can_inline_p (tree, tree);
2528 static void ix86_set_current_function (tree);
2529 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2531 static enum calling_abi ix86_function_abi (const_tree);
2534 #ifndef SUBTARGET32_DEFAULT_CPU
2535 #define SUBTARGET32_DEFAULT_CPU "i386"
2538 /* The svr4 ABI for the i386 says that records and unions are returned
2540 #ifndef DEFAULT_PCC_STRUCT_RETURN
2541 #define DEFAULT_PCC_STRUCT_RETURN 1
2544 /* Whether -mtune= or -march= were specified */
2545 static int ix86_tune_defaulted;
2546 static int ix86_arch_specified;
2548 /* Vectorization library interface and handlers. */
2549 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2551 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2552 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2554 /* Processor target table, indexed by processor number */
2557 const struct processor_costs *cost; /* Processor costs */
2558 const int align_loop; /* Default alignments. */
2559 const int align_loop_max_skip;
2560 const int align_jump;
2561 const int align_jump_max_skip;
2562 const int align_func;
2565 static const struct ptt processor_target_table[PROCESSOR_max] =
2567 {&i386_cost, 4, 3, 4, 3, 4},
2568 {&i486_cost, 16, 15, 16, 15, 16},
2569 {&pentium_cost, 16, 7, 16, 7, 16},
2570 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2571 {&geode_cost, 0, 0, 0, 0, 0},
2572 {&k6_cost, 32, 7, 32, 7, 32},
2573 {&athlon_cost, 16, 7, 16, 7, 16},
2574 {&pentium4_cost, 0, 0, 0, 0, 0},
2575 {&k8_cost, 16, 7, 16, 7, 16},
2576 {&nocona_cost, 0, 0, 0, 0, 0},
2577 /* Core 2 32-bit. */
2578 {&generic32_cost, 16, 10, 16, 10, 16},
2579 /* Core 2 64-bit. */
2580 {&generic64_cost, 16, 10, 16, 10, 16},
2581 /* Core i7 32-bit. */
2582 {&generic32_cost, 16, 10, 16, 10, 16},
2583 /* Core i7 64-bit. */
2584 {&generic64_cost, 16, 10, 16, 10, 16},
2585 {&generic32_cost, 16, 7, 16, 7, 16},
2586 {&generic64_cost, 16, 10, 16, 10, 16},
2587 {&amdfam10_cost, 32, 24, 32, 7, 32},
2588 {&bdver1_cost, 32, 24, 32, 7, 32},
2589 {&bdver2_cost, 32, 24, 32, 7, 32},
2590 {&btver1_cost, 32, 24, 32, 7, 32},
2591 {&atom_cost, 16, 7, 16, 7, 16}
2594 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2624 /* Return true if a red-zone is in use. */
2627 ix86_using_red_zone (void)
2629 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2632 /* Return a string that documents the current -m options. The caller is
2633 responsible for freeing the string. */
2636 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2637 enum fpmath_unit fpmath, bool add_nl_p)
2639 struct ix86_target_opts
2641 const char *option; /* option string */
2642 int mask; /* isa mask options */
2645 /* This table is ordered so that options like -msse4.2 that imply
2646 preceding options while match those first. */
2647 static struct ix86_target_opts isa_opts[] =
2649 { "-m64", OPTION_MASK_ISA_64BIT },
2650 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2651 { "-mfma", OPTION_MASK_ISA_FMA },
2652 { "-mxop", OPTION_MASK_ISA_XOP },
2653 { "-mlwp", OPTION_MASK_ISA_LWP },
2654 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2655 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2656 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2657 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2658 { "-msse3", OPTION_MASK_ISA_SSE3 },
2659 { "-msse2", OPTION_MASK_ISA_SSE2 },
2660 { "-msse", OPTION_MASK_ISA_SSE },
2661 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2662 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2663 { "-mmmx", OPTION_MASK_ISA_MMX },
2664 { "-mabm", OPTION_MASK_ISA_ABM },
2665 { "-mbmi", OPTION_MASK_ISA_BMI },
2666 { "-mtbm", OPTION_MASK_ISA_TBM },
2667 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2668 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2669 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2670 { "-maes", OPTION_MASK_ISA_AES },
2671 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2672 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2673 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2674 { "-mf16c", OPTION_MASK_ISA_F16C },
2678 static struct ix86_target_opts flag_opts[] =
2680 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2681 { "-m80387", MASK_80387 },
2682 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2683 { "-malign-double", MASK_ALIGN_DOUBLE },
2684 { "-mcld", MASK_CLD },
2685 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2686 { "-mieee-fp", MASK_IEEE_FP },
2687 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2688 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2689 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2690 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2691 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2692 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2693 { "-mno-red-zone", MASK_NO_RED_ZONE },
2694 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2695 { "-mrecip", MASK_RECIP },
2696 { "-mrtd", MASK_RTD },
2697 { "-msseregparm", MASK_SSEREGPARM },
2698 { "-mstack-arg-probe", MASK_STACK_PROBE },
2699 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2700 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2701 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2702 { "-mvzeroupper", MASK_VZEROUPPER },
2703 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2704 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2705 { "-mprefer-avx128", MASK_PREFER_AVX128},
2708 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2711 char target_other[40];
2720 memset (opts, '\0', sizeof (opts));
2722 /* Add -march= option. */
2725 opts[num][0] = "-march=";
2726 opts[num++][1] = arch;
2729 /* Add -mtune= option. */
2732 opts[num][0] = "-mtune=";
2733 opts[num++][1] = tune;
2736 /* Pick out the options in isa options. */
2737 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2739 if ((isa & isa_opts[i].mask) != 0)
2741 opts[num++][0] = isa_opts[i].option;
2742 isa &= ~ isa_opts[i].mask;
2746 if (isa && add_nl_p)
2748 opts[num++][0] = isa_other;
2749 sprintf (isa_other, "(other isa: %#x)", isa);
2752 /* Add flag options. */
2753 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2755 if ((flags & flag_opts[i].mask) != 0)
2757 opts[num++][0] = flag_opts[i].option;
2758 flags &= ~ flag_opts[i].mask;
2762 if (flags && add_nl_p)
2764 opts[num++][0] = target_other;
2765 sprintf (target_other, "(other flags: %#x)", flags);
2768 /* Add -fpmath= option. */
2771 opts[num][0] = "-mfpmath=";
2772 switch ((int) fpmath)
2775 opts[num++][1] = "387";
2779 opts[num++][1] = "sse";
2782 case FPMATH_387 | FPMATH_SSE:
2783 opts[num++][1] = "sse+387";
2795 gcc_assert (num < ARRAY_SIZE (opts));
2797 /* Size the string. */
2799 sep_len = (add_nl_p) ? 3 : 1;
2800 for (i = 0; i < num; i++)
2803 for (j = 0; j < 2; j++)
2805 len += strlen (opts[i][j]);
2808 /* Build the string. */
2809 ret = ptr = (char *) xmalloc (len);
2812 for (i = 0; i < num; i++)
2816 for (j = 0; j < 2; j++)
2817 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2824 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2832 for (j = 0; j < 2; j++)
2835 memcpy (ptr, opts[i][j], len2[j]);
2837 line_len += len2[j];
2842 gcc_assert (ret + len >= ptr);
2847 /* Return true, if profiling code should be emitted before
2848 prologue. Otherwise it returns false.
2849 Note: For x86 with "hotfix" it is sorried. */
2851 ix86_profile_before_prologue (void)
2853 return flag_fentry != 0;
2856 /* Function that is callable from the debugger to print the current
2859 ix86_debug_options (void)
2861 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2862 ix86_arch_string, ix86_tune_string,
2867 fprintf (stderr, "%s\n\n", opts);
2871 fputs ("<no options>\n\n", stderr);
2876 /* Override various settings based on options. If MAIN_ARGS_P, the
2877 options are from the command line, otherwise they are from
2881 ix86_option_override_internal (bool main_args_p)
2884 unsigned int ix86_arch_mask, ix86_tune_mask;
2885 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2896 PTA_PREFETCH_SSE = 1 << 4,
2898 PTA_3DNOW_A = 1 << 6,
2902 PTA_POPCNT = 1 << 10,
2904 PTA_SSE4A = 1 << 12,
2905 PTA_NO_SAHF = 1 << 13,
2906 PTA_SSE4_1 = 1 << 14,
2907 PTA_SSE4_2 = 1 << 15,
2909 PTA_PCLMUL = 1 << 17,
2912 PTA_MOVBE = 1 << 20,
2916 PTA_FSGSBASE = 1 << 24,
2917 PTA_RDRND = 1 << 25,
2921 /* if this reaches 32, need to widen struct pta flags below */
2926 const char *const name; /* processor name or nickname. */
2927 const enum processor_type processor;
2928 const enum attr_cpu schedule;
2929 const unsigned /*enum pta_flags*/ flags;
2931 const processor_alias_table[] =
2933 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2934 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2935 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2936 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2937 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2938 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2939 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2940 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2941 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2942 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2943 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2944 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2945 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2947 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2949 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2950 PTA_MMX | PTA_SSE | PTA_SSE2},
2951 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2952 PTA_MMX |PTA_SSE | PTA_SSE2},
2953 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2954 PTA_MMX | PTA_SSE | PTA_SSE2},
2955 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2956 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2957 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2958 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2959 | PTA_CX16 | PTA_NO_SAHF},
2960 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2961 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2962 | PTA_SSSE3 | PTA_CX16},
2963 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2964 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2965 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
2966 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2967 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2968 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2969 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2970 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2971 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2972 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2973 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2974 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2975 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2976 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2977 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2978 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2979 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2980 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2981 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2982 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2983 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2984 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2985 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2986 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2987 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2988 {"x86-64", PROCESSOR_K8, CPU_K8,
2989 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2990 {"k8", PROCESSOR_K8, CPU_K8,
2991 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2992 | PTA_SSE2 | PTA_NO_SAHF},
2993 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2994 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2995 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2996 {"opteron", PROCESSOR_K8, CPU_K8,
2997 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2998 | PTA_SSE2 | PTA_NO_SAHF},
2999 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3000 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3001 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3002 {"athlon64", PROCESSOR_K8, CPU_K8,
3003 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3004 | PTA_SSE2 | PTA_NO_SAHF},
3005 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3006 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3007 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3008 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3009 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3010 | PTA_SSE2 | PTA_NO_SAHF},
3011 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3012 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3013 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3014 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3015 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3016 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3017 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3018 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3019 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3020 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3021 | PTA_XOP | PTA_LWP},
3022 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3023 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3024 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3025 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3026 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3028 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3029 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3030 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3031 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3032 0 /* flags are only used for -march switch. */ },
3033 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3034 PTA_64BIT /* flags are only used for -march switch. */ },
3037 int const pta_size = ARRAY_SIZE (processor_alias_table);
3039 /* Set up prefix/suffix so the error messages refer to either the command
3040 line argument, or the attribute(target). */
3049 prefix = "option(\"";
3054 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3055 SUBTARGET_OVERRIDE_OPTIONS;
3058 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3059 SUBSUBTARGET_OVERRIDE_OPTIONS;
3063 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3065 /* -fPIC is the default for x86_64. */
3066 if (TARGET_MACHO && TARGET_64BIT)
3069 /* Need to check -mtune=generic first. */
3070 if (ix86_tune_string)
3072 if (!strcmp (ix86_tune_string, "generic")
3073 || !strcmp (ix86_tune_string, "i686")
3074 /* As special support for cross compilers we read -mtune=native
3075 as -mtune=generic. With native compilers we won't see the
3076 -mtune=native, as it was changed by the driver. */
3077 || !strcmp (ix86_tune_string, "native"))
3080 ix86_tune_string = "generic64";
3082 ix86_tune_string = "generic32";
3084 /* If this call is for setting the option attribute, allow the
3085 generic32/generic64 that was previously set. */
3086 else if (!main_args_p
3087 && (!strcmp (ix86_tune_string, "generic32")
3088 || !strcmp (ix86_tune_string, "generic64")))
3090 else if (!strncmp (ix86_tune_string, "generic", 7))
3091 error ("bad value (%s) for %stune=%s %s",
3092 ix86_tune_string, prefix, suffix, sw);
3093 else if (!strcmp (ix86_tune_string, "x86-64"))
3094 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3095 "%stune=k8%s or %stune=generic%s instead as appropriate",
3096 prefix, suffix, prefix, suffix, prefix, suffix);
3100 if (ix86_arch_string)
3101 ix86_tune_string = ix86_arch_string;
3102 if (!ix86_tune_string)
3104 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3105 ix86_tune_defaulted = 1;
3108 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3109 need to use a sensible tune option. */
3110 if (!strcmp (ix86_tune_string, "generic")
3111 || !strcmp (ix86_tune_string, "x86-64")
3112 || !strcmp (ix86_tune_string, "i686"))
3115 ix86_tune_string = "generic64";
3117 ix86_tune_string = "generic32";
3121 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3123 /* rep; movq isn't available in 32-bit code. */
3124 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3125 ix86_stringop_alg = no_stringop;
3128 if (!ix86_arch_string)
3129 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3131 ix86_arch_specified = 1;
3133 if (!global_options_set.x_ix86_abi)
3134 ix86_abi = DEFAULT_ABI;
3136 if (ix86_abi == MS_ABI && TARGET_X32)
3137 error ("MS ABI not supported in x32 mode");
3139 if (global_options_set.x_ix86_cmodel)
3141 switch (ix86_cmodel)
3146 ix86_cmodel = CM_SMALL_PIC;
3148 error ("code model %qs not supported in the %s bit mode",
3155 ix86_cmodel = CM_MEDIUM_PIC;
3157 error ("code model %qs not supported in the %s bit mode",
3159 else if (TARGET_X32)
3160 error ("code model %qs not supported in x32 mode",
3167 ix86_cmodel = CM_LARGE_PIC;
3169 error ("code model %qs not supported in the %s bit mode",
3171 else if (TARGET_X32)
3172 error ("code model %qs not supported in x32 mode",
3178 error ("code model %s does not support PIC mode", "32");
3180 error ("code model %qs not supported in the %s bit mode",
3187 error ("code model %s does not support PIC mode", "kernel");
3188 ix86_cmodel = CM_32;
3191 error ("code model %qs not supported in the %s bit mode",
3201 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3202 use of rip-relative addressing. This eliminates fixups that
3203 would otherwise be needed if this object is to be placed in a
3204 DLL, and is essentially just as efficient as direct addressing. */
3205 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3206 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3207 else if (TARGET_64BIT)
3208 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3210 ix86_cmodel = CM_32;
3212 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3214 error ("-masm=intel not supported in this configuration");
3215 ix86_asm_dialect = ASM_ATT;
3217 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3218 sorry ("%i-bit mode not compiled in",
3219 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3221 for (i = 0; i < pta_size; i++)
3222 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3224 ix86_schedule = processor_alias_table[i].schedule;
3225 ix86_arch = processor_alias_table[i].processor;
3226 /* Default cpu tuning to the architecture. */
3227 ix86_tune = ix86_arch;
3229 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3230 error ("CPU you selected does not support x86-64 "
3233 if (processor_alias_table[i].flags & PTA_MMX
3234 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3235 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3236 if (processor_alias_table[i].flags & PTA_3DNOW
3237 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3238 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3239 if (processor_alias_table[i].flags & PTA_3DNOW_A
3240 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3241 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3242 if (processor_alias_table[i].flags & PTA_SSE
3243 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3244 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3245 if (processor_alias_table[i].flags & PTA_SSE2
3246 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3247 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3248 if (processor_alias_table[i].flags & PTA_SSE3
3249 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3250 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3251 if (processor_alias_table[i].flags & PTA_SSSE3
3252 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3253 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3254 if (processor_alias_table[i].flags & PTA_SSE4_1
3255 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3256 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3257 if (processor_alias_table[i].flags & PTA_SSE4_2
3258 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3259 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3260 if (processor_alias_table[i].flags & PTA_AVX
3261 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3262 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3263 if (processor_alias_table[i].flags & PTA_FMA
3264 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3265 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3266 if (processor_alias_table[i].flags & PTA_SSE4A
3267 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3268 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3269 if (processor_alias_table[i].flags & PTA_FMA4
3270 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3271 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3272 if (processor_alias_table[i].flags & PTA_XOP
3273 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3274 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3275 if (processor_alias_table[i].flags & PTA_LWP
3276 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3277 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3278 if (processor_alias_table[i].flags & PTA_ABM
3279 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3280 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3281 if (processor_alias_table[i].flags & PTA_BMI
3282 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3283 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3284 if (processor_alias_table[i].flags & PTA_TBM
3285 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3286 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3287 if (processor_alias_table[i].flags & PTA_CX16
3288 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3289 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3290 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3291 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3292 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3293 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3294 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3295 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3296 if (processor_alias_table[i].flags & PTA_MOVBE
3297 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3298 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3299 if (processor_alias_table[i].flags & PTA_AES
3300 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3301 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3302 if (processor_alias_table[i].flags & PTA_PCLMUL
3303 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3304 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3305 if (processor_alias_table[i].flags & PTA_FSGSBASE
3306 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3307 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3308 if (processor_alias_table[i].flags & PTA_RDRND
3309 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3310 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3311 if (processor_alias_table[i].flags & PTA_F16C
3312 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3313 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3314 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3315 x86_prefetch_sse = true;
3320 if (!strcmp (ix86_arch_string, "generic"))
3321 error ("generic CPU can be used only for %stune=%s %s",
3322 prefix, suffix, sw);
3323 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3324 error ("bad value (%s) for %sarch=%s %s",
3325 ix86_arch_string, prefix, suffix, sw);
3327 ix86_arch_mask = 1u << ix86_arch;
3328 for (i = 0; i < X86_ARCH_LAST; ++i)
3329 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3331 for (i = 0; i < pta_size; i++)
3332 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3334 ix86_schedule = processor_alias_table[i].schedule;
3335 ix86_tune = processor_alias_table[i].processor;
3338 if (!(processor_alias_table[i].flags & PTA_64BIT))
3340 if (ix86_tune_defaulted)
3342 ix86_tune_string = "x86-64";
3343 for (i = 0; i < pta_size; i++)
3344 if (! strcmp (ix86_tune_string,
3345 processor_alias_table[i].name))
3347 ix86_schedule = processor_alias_table[i].schedule;
3348 ix86_tune = processor_alias_table[i].processor;
3351 error ("CPU you selected does not support x86-64 "
3357 /* Adjust tuning when compiling for 32-bit ABI. */
3360 case PROCESSOR_GENERIC64:
3361 ix86_tune = PROCESSOR_GENERIC32;
3362 ix86_schedule = CPU_PENTIUMPRO;
3365 case PROCESSOR_CORE2_64:
3366 ix86_tune = PROCESSOR_CORE2_32;
3369 case PROCESSOR_COREI7_64:
3370 ix86_tune = PROCESSOR_COREI7_32;
3377 /* Intel CPUs have always interpreted SSE prefetch instructions as
3378 NOPs; so, we can enable SSE prefetch instructions even when
3379 -mtune (rather than -march) points us to a processor that has them.
3380 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3381 higher processors. */
3383 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3384 x86_prefetch_sse = true;
3388 if (ix86_tune_specified && i == pta_size)
3389 error ("bad value (%s) for %stune=%s %s",
3390 ix86_tune_string, prefix, suffix, sw);
3392 ix86_tune_mask = 1u << ix86_tune;
3393 for (i = 0; i < X86_TUNE_LAST; ++i)
3394 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3396 #ifndef USE_IX86_FRAME_POINTER
3397 #define USE_IX86_FRAME_POINTER 0
3400 #ifndef USE_X86_64_FRAME_POINTER
3401 #define USE_X86_64_FRAME_POINTER 0
3404 /* Set the default values for switches whose default depends on TARGET_64BIT
3405 in case they weren't overwritten by command line options. */
3408 if (optimize > 1 && !global_options_set.x_flag_zee)
3410 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3411 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3412 if (flag_asynchronous_unwind_tables == 2)
3413 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3414 if (flag_pcc_struct_return == 2)
3415 flag_pcc_struct_return = 0;
3419 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3420 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3421 if (flag_asynchronous_unwind_tables == 2)
3422 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3423 if (flag_pcc_struct_return == 2)
3424 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3428 ix86_cost = &ix86_size_cost;
3430 ix86_cost = processor_target_table[ix86_tune].cost;
3432 /* Arrange to set up i386_stack_locals for all functions. */
3433 init_machine_status = ix86_init_machine_status;
3435 /* Validate -mregparm= value. */
3436 if (global_options_set.x_ix86_regparm)
3439 warning (0, "-mregparm is ignored in 64-bit mode");
3440 if (ix86_regparm > REGPARM_MAX)
3442 error ("-mregparm=%d is not between 0 and %d",
3443 ix86_regparm, REGPARM_MAX);
3448 ix86_regparm = REGPARM_MAX;
3450 /* Default align_* from the processor table. */
3451 if (align_loops == 0)
3453 align_loops = processor_target_table[ix86_tune].align_loop;
3454 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3456 if (align_jumps == 0)
3458 align_jumps = processor_target_table[ix86_tune].align_jump;
3459 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3461 if (align_functions == 0)
3463 align_functions = processor_target_table[ix86_tune].align_func;
3466 /* Provide default for -mbranch-cost= value. */
3467 if (!global_options_set.x_ix86_branch_cost)
3468 ix86_branch_cost = ix86_cost->branch_cost;
3472 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3474 /* Enable by default the SSE and MMX builtins. Do allow the user to
3475 explicitly disable any of these. In particular, disabling SSE and
3476 MMX for kernel code is extremely useful. */
3477 if (!ix86_arch_specified)
3479 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3480 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3483 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3487 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3489 if (!ix86_arch_specified)
3491 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3493 /* i386 ABI does not specify red zone. It still makes sense to use it
3494 when programmer takes care to stack from being destroyed. */
3495 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3496 target_flags |= MASK_NO_RED_ZONE;
3499 /* Keep nonleaf frame pointers. */
3500 if (flag_omit_frame_pointer)
3501 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3502 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3503 flag_omit_frame_pointer = 1;
3505 /* If we're doing fast math, we don't care about comparison order
3506 wrt NaNs. This lets us use a shorter comparison sequence. */
3507 if (flag_finite_math_only)
3508 target_flags &= ~MASK_IEEE_FP;
3510 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3511 since the insns won't need emulation. */
3512 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3513 target_flags &= ~MASK_NO_FANCY_MATH_387;
3515 /* Likewise, if the target doesn't have a 387, or we've specified
3516 software floating point, don't use 387 inline intrinsics. */
3518 target_flags |= MASK_NO_FANCY_MATH_387;
3520 /* Turn on MMX builtins for -msse. */
3523 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3524 x86_prefetch_sse = true;
3527 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3528 if (TARGET_SSE4_2 || TARGET_ABM)
3529 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3531 /* Validate -mpreferred-stack-boundary= value or default it to
3532 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3533 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3534 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3536 int min = (TARGET_64BIT ? 4 : 2);
3537 int max = (TARGET_SEH ? 4 : 12);
3539 if (ix86_preferred_stack_boundary_arg < min
3540 || ix86_preferred_stack_boundary_arg > max)
3543 error ("-mpreferred-stack-boundary is not supported "
3546 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3547 ix86_preferred_stack_boundary_arg, min, max);
3550 ix86_preferred_stack_boundary
3551 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3554 /* Set the default value for -mstackrealign. */
3555 if (ix86_force_align_arg_pointer == -1)
3556 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3558 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3560 /* Validate -mincoming-stack-boundary= value or default it to
3561 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3562 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3563 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3565 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3566 || ix86_incoming_stack_boundary_arg > 12)
3567 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3568 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3571 ix86_user_incoming_stack_boundary
3572 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3573 ix86_incoming_stack_boundary
3574 = ix86_user_incoming_stack_boundary;
3578 /* Accept -msseregparm only if at least SSE support is enabled. */
3579 if (TARGET_SSEREGPARM
3581 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3583 if (global_options_set.x_ix86_fpmath)
3585 if (ix86_fpmath & FPMATH_SSE)
3589 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3590 ix86_fpmath = FPMATH_387;
3592 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3594 warning (0, "387 instruction set disabled, using SSE arithmetics");
3595 ix86_fpmath = FPMATH_SSE;
3600 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3602 /* If the i387 is disabled, then do not return values in it. */
3604 target_flags &= ~MASK_FLOAT_RETURNS;
3606 /* Use external vectorized library in vectorizing intrinsics. */
3607 if (global_options_set.x_ix86_veclibabi_type)
3608 switch (ix86_veclibabi_type)
3610 case ix86_veclibabi_type_svml:
3611 ix86_veclib_handler = ix86_veclibabi_svml;
3614 case ix86_veclibabi_type_acml:
3615 ix86_veclib_handler = ix86_veclibabi_acml;
3622 if ((!USE_IX86_FRAME_POINTER
3623 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3624 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3626 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3628 /* ??? Unwind info is not correct around the CFG unless either a frame
3629 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3630 unwind info generation to be aware of the CFG and propagating states
3632 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3633 || flag_exceptions || flag_non_call_exceptions)
3634 && flag_omit_frame_pointer
3635 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3637 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3638 warning (0, "unwind tables currently require either a frame pointer "
3639 "or %saccumulate-outgoing-args%s for correctness",
3641 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3644 /* If stack probes are required, the space used for large function
3645 arguments on the stack must also be probed, so enable
3646 -maccumulate-outgoing-args so this happens in the prologue. */
3647 if (TARGET_STACK_PROBE
3648 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3650 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3651 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3652 "for correctness", prefix, suffix);
3653 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3656 /* For sane SSE instruction set generation we need fcomi instruction.
3657 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3658 expands to a sequence that includes conditional move. */
3659 if (TARGET_SSE || TARGET_RDRND)
3662 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3665 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3666 p = strchr (internal_label_prefix, 'X');
3667 internal_label_prefix_len = p - internal_label_prefix;
3671 /* When scheduling description is not available, disable scheduler pass
3672 so it won't slow down the compilation and make x87 code slower. */
3673 if (!TARGET_SCHEDULE)
3674 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3676 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3677 ix86_cost->simultaneous_prefetches,
3678 global_options.x_param_values,
3679 global_options_set.x_param_values);
3680 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3681 global_options.x_param_values,
3682 global_options_set.x_param_values);
3683 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3684 global_options.x_param_values,
3685 global_options_set.x_param_values);
3686 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3687 global_options.x_param_values,
3688 global_options_set.x_param_values);
3690 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3691 if (flag_prefetch_loop_arrays < 0
3694 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3695 flag_prefetch_loop_arrays = 1;
3697 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3698 can be optimized to ap = __builtin_next_arg (0). */
3699 if (!TARGET_64BIT && !flag_split_stack)
3700 targetm.expand_builtin_va_start = NULL;
3704 ix86_gen_leave = gen_leave_rex64;
3705 ix86_gen_add3 = gen_adddi3;
3706 ix86_gen_sub3 = gen_subdi3;
3707 ix86_gen_sub3_carry = gen_subdi3_carry;
3708 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3709 ix86_gen_monitor = gen_sse3_monitor64;
3710 ix86_gen_andsp = gen_anddi3;
3711 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3712 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3713 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3717 ix86_gen_leave = gen_leave;
3718 ix86_gen_add3 = gen_addsi3;
3719 ix86_gen_sub3 = gen_subsi3;
3720 ix86_gen_sub3_carry = gen_subsi3_carry;
3721 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3722 ix86_gen_monitor = gen_sse3_monitor;
3723 ix86_gen_andsp = gen_andsi3;
3724 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3725 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3726 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3730 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3732 target_flags |= MASK_CLD & ~target_flags_explicit;
3735 if (!TARGET_64BIT && flag_pic)
3737 if (flag_fentry > 0)
3738 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3742 else if (TARGET_SEH)
3744 if (flag_fentry == 0)
3745 sorry ("-mno-fentry isn%'t compatible with SEH");
3748 else if (flag_fentry < 0)
3750 #if defined(PROFILE_BEFORE_PROLOGUE)
3759 /* When not optimize for size, enable vzeroupper optimization for
3760 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3761 AVX unaligned load/store. */
3764 if (flag_expensive_optimizations
3765 && !(target_flags_explicit & MASK_VZEROUPPER))
3766 target_flags |= MASK_VZEROUPPER;
3767 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3768 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3769 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3770 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3771 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3772 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3773 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3774 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
3775 target_flags |= MASK_PREFER_AVX128;
3780 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3781 target_flags &= ~MASK_VZEROUPPER;
3784 /* Save the initial options in case the user does function specific
3787 target_option_default_node = target_option_current_node
3788 = build_target_option_node ();
3791 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3794 function_pass_avx256_p (const_rtx val)
3799 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3802 if (GET_CODE (val) == PARALLEL)
3807 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3809 r = XVECEXP (val, 0, i);
3810 if (GET_CODE (r) == EXPR_LIST
3812 && REG_P (XEXP (r, 0))
3813 && (GET_MODE (XEXP (r, 0)) == OImode
3814 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3822 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3825 ix86_option_override (void)
3827 ix86_option_override_internal (true);
3830 /* Update register usage after having seen the compiler flags. */
3833 ix86_conditional_register_usage (void)
3838 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3840 if (fixed_regs[i] > 1)
3841 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3842 if (call_used_regs[i] > 1)
3843 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3846 /* The PIC register, if it exists, is fixed. */
3847 j = PIC_OFFSET_TABLE_REGNUM;
3848 if (j != INVALID_REGNUM)
3849 fixed_regs[j] = call_used_regs[j] = 1;
3851 /* The 64-bit MS_ABI changes the set of call-used registers. */
3852 if (TARGET_64BIT_MS_ABI)
3854 call_used_regs[SI_REG] = 0;
3855 call_used_regs[DI_REG] = 0;
3856 call_used_regs[XMM6_REG] = 0;
3857 call_used_regs[XMM7_REG] = 0;
3858 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3859 call_used_regs[i] = 0;
3862 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3863 other call-clobbered regs for 64-bit. */
3866 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3868 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3869 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3870 && call_used_regs[i])
3871 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3874 /* If MMX is disabled, squash the registers. */
3876 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3877 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3878 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3880 /* If SSE is disabled, squash the registers. */
3882 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3883 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3884 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3886 /* If the FPU is disabled, squash the registers. */
3887 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3888 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3889 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3890 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3892 /* If 32-bit, squash the 64-bit registers. */
3895 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3897 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3903 /* Save the current options */
3906 ix86_function_specific_save (struct cl_target_option *ptr)
3908 ptr->arch = ix86_arch;
3909 ptr->schedule = ix86_schedule;
3910 ptr->tune = ix86_tune;
3911 ptr->branch_cost = ix86_branch_cost;
3912 ptr->tune_defaulted = ix86_tune_defaulted;
3913 ptr->arch_specified = ix86_arch_specified;
3914 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3915 ptr->ix86_target_flags_explicit = target_flags_explicit;
3917 /* The fields are char but the variables are not; make sure the
3918 values fit in the fields. */
3919 gcc_assert (ptr->arch == ix86_arch);
3920 gcc_assert (ptr->schedule == ix86_schedule);
3921 gcc_assert (ptr->tune == ix86_tune);
3922 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3925 /* Restore the current options */
3928 ix86_function_specific_restore (struct cl_target_option *ptr)
3930 enum processor_type old_tune = ix86_tune;
3931 enum processor_type old_arch = ix86_arch;
3932 unsigned int ix86_arch_mask, ix86_tune_mask;
3935 ix86_arch = (enum processor_type) ptr->arch;
3936 ix86_schedule = (enum attr_cpu) ptr->schedule;
3937 ix86_tune = (enum processor_type) ptr->tune;
3938 ix86_branch_cost = ptr->branch_cost;
3939 ix86_tune_defaulted = ptr->tune_defaulted;
3940 ix86_arch_specified = ptr->arch_specified;
3941 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
3942 target_flags_explicit = ptr->ix86_target_flags_explicit;
3944 /* Recreate the arch feature tests if the arch changed */
3945 if (old_arch != ix86_arch)
3947 ix86_arch_mask = 1u << ix86_arch;
3948 for (i = 0; i < X86_ARCH_LAST; ++i)
3949 ix86_arch_features[i]
3950 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3953 /* Recreate the tune optimization tests */
3954 if (old_tune != ix86_tune)
3956 ix86_tune_mask = 1u << ix86_tune;
3957 for (i = 0; i < X86_TUNE_LAST; ++i)
3958 ix86_tune_features[i]
3959 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3963 /* Print the current options */
3966 ix86_function_specific_print (FILE *file, int indent,
3967 struct cl_target_option *ptr)
3970 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
3971 NULL, NULL, ptr->x_ix86_fpmath, false);
3973 fprintf (file, "%*sarch = %d (%s)\n",
3976 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3977 ? cpu_names[ptr->arch]
3980 fprintf (file, "%*stune = %d (%s)\n",
3983 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3984 ? cpu_names[ptr->tune]
3987 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3991 fprintf (file, "%*s%s\n", indent, "", target_string);
3992 free (target_string);
3997 /* Inner function to process the attribute((target(...))), take an argument and
3998 set the current options from the argument. If we have a list, recursively go
4002 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4003 struct gcc_options *enum_opts_set)
4008 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4009 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4010 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4011 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4012 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4028 enum ix86_opt_type type;
4033 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4034 IX86_ATTR_ISA ("abm", OPT_mabm),
4035 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4036 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4037 IX86_ATTR_ISA ("aes", OPT_maes),
4038 IX86_ATTR_ISA ("avx", OPT_mavx),
4039 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4040 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4041 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4042 IX86_ATTR_ISA ("sse", OPT_msse),
4043 IX86_ATTR_ISA ("sse2", OPT_msse2),
4044 IX86_ATTR_ISA ("sse3", OPT_msse3),
4045 IX86_ATTR_ISA ("sse4", OPT_msse4),
4046 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4047 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4048 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4049 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4050 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4051 IX86_ATTR_ISA ("xop", OPT_mxop),
4052 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4053 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4054 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4055 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4058 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4060 /* string options */
4061 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4062 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4065 IX86_ATTR_YES ("cld",
4069 IX86_ATTR_NO ("fancy-math-387",
4070 OPT_mfancy_math_387,
4071 MASK_NO_FANCY_MATH_387),
4073 IX86_ATTR_YES ("ieee-fp",
4077 IX86_ATTR_YES ("inline-all-stringops",
4078 OPT_minline_all_stringops,
4079 MASK_INLINE_ALL_STRINGOPS),
4081 IX86_ATTR_YES ("inline-stringops-dynamically",
4082 OPT_minline_stringops_dynamically,
4083 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4085 IX86_ATTR_NO ("align-stringops",
4086 OPT_mno_align_stringops,
4087 MASK_NO_ALIGN_STRINGOPS),
4089 IX86_ATTR_YES ("recip",
4095 /* If this is a list, recurse to get the options. */
4096 if (TREE_CODE (args) == TREE_LIST)
4100 for (; args; args = TREE_CHAIN (args))
4101 if (TREE_VALUE (args)
4102 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4103 p_strings, enum_opts_set))
4109 else if (TREE_CODE (args) != STRING_CST)
4112 /* Handle multiple arguments separated by commas. */
4113 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4115 while (next_optstr && *next_optstr != '\0')
4117 char *p = next_optstr;
4119 char *comma = strchr (next_optstr, ',');
4120 const char *opt_string;
4121 size_t len, opt_len;
4126 enum ix86_opt_type type = ix86_opt_unknown;
4132 len = comma - next_optstr;
4133 next_optstr = comma + 1;
4141 /* Recognize no-xxx. */
4142 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4151 /* Find the option. */
4154 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4156 type = attrs[i].type;
4157 opt_len = attrs[i].len;
4158 if (ch == attrs[i].string[0]
4159 && ((type != ix86_opt_str && type != ix86_opt_enum)
4162 && memcmp (p, attrs[i].string, opt_len) == 0)
4165 mask = attrs[i].mask;
4166 opt_string = attrs[i].string;
4171 /* Process the option. */
4174 error ("attribute(target(\"%s\")) is unknown", orig_p);
4178 else if (type == ix86_opt_isa)
4180 struct cl_decoded_option decoded;
4182 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4183 ix86_handle_option (&global_options, &global_options_set,
4184 &decoded, input_location);
4187 else if (type == ix86_opt_yes || type == ix86_opt_no)
4189 if (type == ix86_opt_no)
4190 opt_set_p = !opt_set_p;
4193 target_flags |= mask;
4195 target_flags &= ~mask;
4198 else if (type == ix86_opt_str)
4202 error ("option(\"%s\") was already specified", opt_string);
4206 p_strings[opt] = xstrdup (p + opt_len);
4209 else if (type == ix86_opt_enum)
4214 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4216 set_option (&global_options, enum_opts_set, opt, value,
4217 p + opt_len, DK_UNSPECIFIED, input_location,
4221 error ("attribute(target(\"%s\")) is unknown", orig_p);
4233 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4236 ix86_valid_target_attribute_tree (tree args)
4238 const char *orig_arch_string = ix86_arch_string;
4239 const char *orig_tune_string = ix86_tune_string;
4240 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4241 int orig_tune_defaulted = ix86_tune_defaulted;
4242 int orig_arch_specified = ix86_arch_specified;
4243 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4246 struct cl_target_option *def
4247 = TREE_TARGET_OPTION (target_option_default_node);
4248 struct gcc_options enum_opts_set;
4250 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4252 /* Process each of the options on the chain. */
4253 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4257 /* If the changed options are different from the default, rerun
4258 ix86_option_override_internal, and then save the options away.
4259 The string options are are attribute options, and will be undone
4260 when we copy the save structure. */
4261 if (ix86_isa_flags != def->x_ix86_isa_flags
4262 || target_flags != def->x_target_flags
4263 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4264 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4265 || enum_opts_set.x_ix86_fpmath)
4267 /* If we are using the default tune= or arch=, undo the string assigned,
4268 and use the default. */
4269 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4270 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4271 else if (!orig_arch_specified)
4272 ix86_arch_string = NULL;
4274 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4275 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4276 else if (orig_tune_defaulted)
4277 ix86_tune_string = NULL;
4279 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4280 if (enum_opts_set.x_ix86_fpmath)
4281 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4282 else if (!TARGET_64BIT && TARGET_SSE)
4284 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4285 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4288 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4289 ix86_option_override_internal (false);
4291 /* Add any builtin functions with the new isa if any. */
4292 ix86_add_new_builtins (ix86_isa_flags);
4294 /* Save the current options unless we are validating options for
4296 t = build_target_option_node ();
4298 ix86_arch_string = orig_arch_string;
4299 ix86_tune_string = orig_tune_string;
4300 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4302 /* Free up memory allocated to hold the strings */
4303 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4304 free (option_strings[i]);
4310 /* Hook to validate attribute((target("string"))). */
4313 ix86_valid_target_attribute_p (tree fndecl,
4314 tree ARG_UNUSED (name),
4316 int ARG_UNUSED (flags))
4318 struct cl_target_option cur_target;
4320 tree old_optimize = build_optimization_node ();
4321 tree new_target, new_optimize;
4322 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4324 /* If the function changed the optimization levels as well as setting target
4325 options, start with the optimizations specified. */
4326 if (func_optimize && func_optimize != old_optimize)
4327 cl_optimization_restore (&global_options,
4328 TREE_OPTIMIZATION (func_optimize));
4330 /* The target attributes may also change some optimization flags, so update
4331 the optimization options if necessary. */
4332 cl_target_option_save (&cur_target, &global_options);
4333 new_target = ix86_valid_target_attribute_tree (args);
4334 new_optimize = build_optimization_node ();
4341 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4343 if (old_optimize != new_optimize)
4344 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4347 cl_target_option_restore (&global_options, &cur_target);
4349 if (old_optimize != new_optimize)
4350 cl_optimization_restore (&global_options,
4351 TREE_OPTIMIZATION (old_optimize));
4357 /* Hook to determine if one function can safely inline another. */
4360 ix86_can_inline_p (tree caller, tree callee)
4363 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4364 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4366 /* If callee has no option attributes, then it is ok to inline. */
4370 /* If caller has no option attributes, but callee does then it is not ok to
4372 else if (!caller_tree)
4377 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4378 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4380 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4381 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4383 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4384 != callee_opts->x_ix86_isa_flags)
4387 /* See if we have the same non-isa options. */
4388 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4391 /* See if arch, tune, etc. are the same. */
4392 else if (caller_opts->arch != callee_opts->arch)
4395 else if (caller_opts->tune != callee_opts->tune)
4398 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4401 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4412 /* Remember the last target of ix86_set_current_function. */
4413 static GTY(()) tree ix86_previous_fndecl;
4415 /* Establish appropriate back-end context for processing the function
4416 FNDECL. The argument might be NULL to indicate processing at top
4417 level, outside of any function scope. */
4419 ix86_set_current_function (tree fndecl)
4421 /* Only change the context if the function changes. This hook is called
4422 several times in the course of compiling a function, and we don't want to
4423 slow things down too much or call target_reinit when it isn't safe. */
4424 if (fndecl && fndecl != ix86_previous_fndecl)
4426 tree old_tree = (ix86_previous_fndecl
4427 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4430 tree new_tree = (fndecl
4431 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4434 ix86_previous_fndecl = fndecl;
4435 if (old_tree == new_tree)
4440 cl_target_option_restore (&global_options,
4441 TREE_TARGET_OPTION (new_tree));
4447 struct cl_target_option *def
4448 = TREE_TARGET_OPTION (target_option_current_node);
4450 cl_target_option_restore (&global_options, def);
4457 /* Return true if this goes in large data/bss. */
4460 ix86_in_large_data_p (tree exp)
4462 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4465 /* Functions are never large data. */
4466 if (TREE_CODE (exp) == FUNCTION_DECL)
4469 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4471 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4472 if (strcmp (section, ".ldata") == 0
4473 || strcmp (section, ".lbss") == 0)
4479 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4481 /* If this is an incomplete type with size 0, then we can't put it
4482 in data because it might be too big when completed. */
4483 if (!size || size > ix86_section_threshold)
4490 /* Switch to the appropriate section for output of DECL.
4491 DECL is either a `VAR_DECL' node or a constant of some sort.
4492 RELOC indicates whether forming the initial value of DECL requires
4493 link-time relocations. */
4495 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4499 x86_64_elf_select_section (tree decl, int reloc,
4500 unsigned HOST_WIDE_INT align)
4502 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4503 && ix86_in_large_data_p (decl))
4505 const char *sname = NULL;
4506 unsigned int flags = SECTION_WRITE;
4507 switch (categorize_decl_for_section (decl, reloc))
4512 case SECCAT_DATA_REL:
4513 sname = ".ldata.rel";
4515 case SECCAT_DATA_REL_LOCAL:
4516 sname = ".ldata.rel.local";
4518 case SECCAT_DATA_REL_RO:
4519 sname = ".ldata.rel.ro";
4521 case SECCAT_DATA_REL_RO_LOCAL:
4522 sname = ".ldata.rel.ro.local";
4526 flags |= SECTION_BSS;
4529 case SECCAT_RODATA_MERGE_STR:
4530 case SECCAT_RODATA_MERGE_STR_INIT:
4531 case SECCAT_RODATA_MERGE_CONST:
4535 case SECCAT_SRODATA:
4542 /* We don't split these for medium model. Place them into
4543 default sections and hope for best. */
4548 /* We might get called with string constants, but get_named_section
4549 doesn't like them as they are not DECLs. Also, we need to set
4550 flags in that case. */
4552 return get_section (sname, flags, NULL);
4553 return get_named_section (decl, sname, reloc);
4556 return default_elf_select_section (decl, reloc, align);
4559 /* Build up a unique section name, expressed as a
4560 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4561 RELOC indicates whether the initial value of EXP requires
4562 link-time relocations. */
4564 static void ATTRIBUTE_UNUSED
4565 x86_64_elf_unique_section (tree decl, int reloc)
4567 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4568 && ix86_in_large_data_p (decl))
4570 const char *prefix = NULL;
4571 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4572 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4574 switch (categorize_decl_for_section (decl, reloc))
4577 case SECCAT_DATA_REL:
4578 case SECCAT_DATA_REL_LOCAL:
4579 case SECCAT_DATA_REL_RO:
4580 case SECCAT_DATA_REL_RO_LOCAL:
4581 prefix = one_only ? ".ld" : ".ldata";
4584 prefix = one_only ? ".lb" : ".lbss";
4587 case SECCAT_RODATA_MERGE_STR:
4588 case SECCAT_RODATA_MERGE_STR_INIT:
4589 case SECCAT_RODATA_MERGE_CONST:
4590 prefix = one_only ? ".lr" : ".lrodata";
4592 case SECCAT_SRODATA:
4599 /* We don't split these for medium model. Place them into
4600 default sections and hope for best. */
4605 const char *name, *linkonce;
4608 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4609 name = targetm.strip_name_encoding (name);
4611 /* If we're using one_only, then there needs to be a .gnu.linkonce
4612 prefix to the section name. */
4613 linkonce = one_only ? ".gnu.linkonce" : "";
4615 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4617 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4621 default_unique_section (decl, reloc);
4624 #ifdef COMMON_ASM_OP
4625 /* This says how to output assembler code to declare an
4626 uninitialized external linkage data object.
4628 For medium model x86-64 we need to use .largecomm opcode for
4631 x86_elf_aligned_common (FILE *file,
4632 const char *name, unsigned HOST_WIDE_INT size,
4635 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4636 && size > (unsigned int)ix86_section_threshold)
4637 fputs (".largecomm\t", file);
4639 fputs (COMMON_ASM_OP, file);
4640 assemble_name (file, name);
4641 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4642 size, align / BITS_PER_UNIT);
4646 /* Utility function for targets to use in implementing
4647 ASM_OUTPUT_ALIGNED_BSS. */
4650 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4651 const char *name, unsigned HOST_WIDE_INT size,
4654 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4655 && size > (unsigned int)ix86_section_threshold)
4656 switch_to_section (get_named_section (decl, ".lbss", 0));
4658 switch_to_section (bss_section);
4659 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4660 #ifdef ASM_DECLARE_OBJECT_NAME
4661 last_assemble_variable_decl = decl;
4662 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4664 /* Standard thing is just output label for the object. */
4665 ASM_OUTPUT_LABEL (file, name);
4666 #endif /* ASM_DECLARE_OBJECT_NAME */
4667 ASM_OUTPUT_SKIP (file, size ? size : 1);
4670 /* Decide whether we must probe the stack before any space allocation
4671 on this target. It's essentially TARGET_STACK_PROBE except when
4672 -fstack-check causes the stack to be already probed differently. */
4675 ix86_target_stack_probe (void)
4677 /* Do not probe the stack twice if static stack checking is enabled. */
4678 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4681 return TARGET_STACK_PROBE;
4684 /* Decide whether we can make a sibling call to a function. DECL is the
4685 declaration of the function being targeted by the call and EXP is the
4686 CALL_EXPR representing the call. */
4689 ix86_function_ok_for_sibcall (tree decl, tree exp)
4691 tree type, decl_or_type;
4694 /* If we are generating position-independent code, we cannot sibcall
4695 optimize any indirect call, or a direct call to a global function,
4696 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4700 && (!decl || !targetm.binds_local_p (decl)))
4703 /* If we need to align the outgoing stack, then sibcalling would
4704 unalign the stack, which may break the called function. */
4705 if (ix86_minimum_incoming_stack_boundary (true)
4706 < PREFERRED_STACK_BOUNDARY)
4711 decl_or_type = decl;
4712 type = TREE_TYPE (decl);
4716 /* We're looking at the CALL_EXPR, we need the type of the function. */
4717 type = CALL_EXPR_FN (exp); /* pointer expression */
4718 type = TREE_TYPE (type); /* pointer type */
4719 type = TREE_TYPE (type); /* function type */
4720 decl_or_type = type;
4723 /* Check that the return value locations are the same. Like
4724 if we are returning floats on the 80387 register stack, we cannot
4725 make a sibcall from a function that doesn't return a float to a
4726 function that does or, conversely, from a function that does return
4727 a float to a function that doesn't; the necessary stack adjustment
4728 would not be executed. This is also the place we notice
4729 differences in the return value ABI. Note that it is ok for one
4730 of the functions to have void return type as long as the return
4731 value of the other is passed in a register. */
4732 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4733 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4735 if (STACK_REG_P (a) || STACK_REG_P (b))
4737 if (!rtx_equal_p (a, b))
4740 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4742 /* Disable sibcall if we need to generate vzeroupper after
4744 if (TARGET_VZEROUPPER
4745 && cfun->machine->callee_return_avx256_p
4746 && !cfun->machine->caller_return_avx256_p)
4749 else if (!rtx_equal_p (a, b))
4754 /* The SYSV ABI has more call-clobbered registers;
4755 disallow sibcalls from MS to SYSV. */
4756 if (cfun->machine->call_abi == MS_ABI
4757 && ix86_function_type_abi (type) == SYSV_ABI)
4762 /* If this call is indirect, we'll need to be able to use a
4763 call-clobbered register for the address of the target function.
4764 Make sure that all such registers are not used for passing
4765 parameters. Note that DLLIMPORT functions are indirect. */
4767 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4769 if (ix86_function_regparm (type, NULL) >= 3)
4771 /* ??? Need to count the actual number of registers to be used,
4772 not the possible number of registers. Fix later. */
4778 /* Otherwise okay. That also includes certain types of indirect calls. */
4782 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4783 and "sseregparm" calling convention attributes;
4784 arguments as in struct attribute_spec.handler. */
4787 ix86_handle_cconv_attribute (tree *node, tree name,
4789 int flags ATTRIBUTE_UNUSED,
4792 if (TREE_CODE (*node) != FUNCTION_TYPE
4793 && TREE_CODE (*node) != METHOD_TYPE
4794 && TREE_CODE (*node) != FIELD_DECL
4795 && TREE_CODE (*node) != TYPE_DECL)
4797 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4799 *no_add_attrs = true;
4803 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4804 if (is_attribute_p ("regparm", name))
4808 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4810 error ("fastcall and regparm attributes are not compatible");
4813 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4815 error ("regparam and thiscall attributes are not compatible");
4818 cst = TREE_VALUE (args);
4819 if (TREE_CODE (cst) != INTEGER_CST)
4821 warning (OPT_Wattributes,
4822 "%qE attribute requires an integer constant argument",
4824 *no_add_attrs = true;
4826 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4828 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4830 *no_add_attrs = true;
4838 /* Do not warn when emulating the MS ABI. */
4839 if ((TREE_CODE (*node) != FUNCTION_TYPE
4840 && TREE_CODE (*node) != METHOD_TYPE)
4841 || ix86_function_type_abi (*node) != MS_ABI)
4842 warning (OPT_Wattributes, "%qE attribute ignored",
4844 *no_add_attrs = true;
4848 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4849 if (is_attribute_p ("fastcall", name))
4851 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4853 error ("fastcall and cdecl attributes are not compatible");
4855 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4857 error ("fastcall and stdcall attributes are not compatible");
4859 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4861 error ("fastcall and regparm attributes are not compatible");
4863 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4865 error ("fastcall and thiscall attributes are not compatible");
4869 /* Can combine stdcall with fastcall (redundant), regparm and
4871 else if (is_attribute_p ("stdcall", name))
4873 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4875 error ("stdcall and cdecl attributes are not compatible");
4877 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4879 error ("stdcall and fastcall attributes are not compatible");
4881 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4883 error ("stdcall and thiscall attributes are not compatible");
4887 /* Can combine cdecl with regparm and sseregparm. */
4888 else if (is_attribute_p ("cdecl", name))
4890 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4892 error ("stdcall and cdecl attributes are not compatible");
4894 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4896 error ("fastcall and cdecl attributes are not compatible");
4898 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4900 error ("cdecl and thiscall attributes are not compatible");
4903 else if (is_attribute_p ("thiscall", name))
4905 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4906 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4908 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4910 error ("stdcall and thiscall attributes are not compatible");
4912 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4914 error ("fastcall and thiscall attributes are not compatible");
4916 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4918 error ("cdecl and thiscall attributes are not compatible");
4922 /* Can combine sseregparm with all attributes. */
4927 /* This function determines from TYPE the calling-convention. */
4930 ix86_get_callcvt (const_tree type)
4932 unsigned int ret = 0;
4937 return IX86_CALLCVT_CDECL;
4939 attrs = TYPE_ATTRIBUTES (type);
4940 if (attrs != NULL_TREE)
4942 if (lookup_attribute ("cdecl", attrs))
4943 ret |= IX86_CALLCVT_CDECL;
4944 else if (lookup_attribute ("stdcall", attrs))
4945 ret |= IX86_CALLCVT_STDCALL;
4946 else if (lookup_attribute ("fastcall", attrs))
4947 ret |= IX86_CALLCVT_FASTCALL;
4948 else if (lookup_attribute ("thiscall", attrs))
4949 ret |= IX86_CALLCVT_THISCALL;
4951 /* Regparam isn't allowed for thiscall and fastcall. */
4952 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
4954 if (lookup_attribute ("regparm", attrs))
4955 ret |= IX86_CALLCVT_REGPARM;
4956 if (lookup_attribute ("sseregparm", attrs))
4957 ret |= IX86_CALLCVT_SSEREGPARM;
4960 if (IX86_BASE_CALLCVT(ret) != 0)
4964 is_stdarg = stdarg_p (type);
4965 if (TARGET_RTD && !is_stdarg)
4966 return IX86_CALLCVT_STDCALL | ret;
4970 || TREE_CODE (type) != METHOD_TYPE
4971 || ix86_function_type_abi (type) != MS_ABI)
4972 return IX86_CALLCVT_CDECL | ret;
4974 return IX86_CALLCVT_THISCALL;
4977 /* Return 0 if the attributes for two types are incompatible, 1 if they
4978 are compatible, and 2 if they are nearly compatible (which causes a
4979 warning to be generated). */
4982 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4984 unsigned int ccvt1, ccvt2;
4986 if (TREE_CODE (type1) != FUNCTION_TYPE
4987 && TREE_CODE (type1) != METHOD_TYPE)
4990 ccvt1 = ix86_get_callcvt (type1);
4991 ccvt2 = ix86_get_callcvt (type2);
4994 if (ix86_function_regparm (type1, NULL)
4995 != ix86_function_regparm (type2, NULL))
5001 /* Return the regparm value for a function with the indicated TYPE and DECL.
5002 DECL may be NULL when calling function indirectly
5003 or considering a libcall. */
5006 ix86_function_regparm (const_tree type, const_tree decl)
5013 return (ix86_function_type_abi (type) == SYSV_ABI
5014 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5015 ccvt = ix86_get_callcvt (type);
5016 regparm = ix86_regparm;
5018 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5020 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5023 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5027 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5029 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5032 /* Use register calling convention for local functions when possible. */
5034 && TREE_CODE (decl) == FUNCTION_DECL
5036 && !(profile_flag && !flag_fentry))
5038 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5039 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5040 if (i && i->local && i->can_change_signature)
5042 int local_regparm, globals = 0, regno;
5044 /* Make sure no regparm register is taken by a
5045 fixed register variable. */
5046 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5047 if (fixed_regs[local_regparm])
5050 /* We don't want to use regparm(3) for nested functions as
5051 these use a static chain pointer in the third argument. */
5052 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5055 /* In 32-bit mode save a register for the split stack. */
5056 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5059 /* Each fixed register usage increases register pressure,
5060 so less registers should be used for argument passing.
5061 This functionality can be overriden by an explicit
5063 for (regno = 0; regno <= DI_REG; regno++)
5064 if (fixed_regs[regno])
5068 = globals < local_regparm ? local_regparm - globals : 0;
5070 if (local_regparm > regparm)
5071 regparm = local_regparm;
5078 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5079 DFmode (2) arguments in SSE registers for a function with the
5080 indicated TYPE and DECL. DECL may be NULL when calling function
5081 indirectly or considering a libcall. Otherwise return 0. */
5084 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5086 gcc_assert (!TARGET_64BIT);
5088 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5089 by the sseregparm attribute. */
5090 if (TARGET_SSEREGPARM
5091 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5098 error ("calling %qD with attribute sseregparm without "
5099 "SSE/SSE2 enabled", decl);
5101 error ("calling %qT with attribute sseregparm without "
5102 "SSE/SSE2 enabled", type);
5110 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5111 (and DFmode for SSE2) arguments in SSE registers. */
5112 if (decl && TARGET_SSE_MATH && optimize
5113 && !(profile_flag && !flag_fentry))
5115 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5116 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5117 if (i && i->local && i->can_change_signature)
5118 return TARGET_SSE2 ? 2 : 1;
5124 /* Return true if EAX is live at the start of the function. Used by
5125 ix86_expand_prologue to determine if we need special help before
5126 calling allocate_stack_worker. */
5129 ix86_eax_live_at_start_p (void)
5131 /* Cheat. Don't bother working forward from ix86_function_regparm
5132 to the function type to whether an actual argument is located in
5133 eax. Instead just look at cfg info, which is still close enough
5134 to correct at this point. This gives false positives for broken
5135 functions that might use uninitialized data that happens to be
5136 allocated in eax, but who cares? */
5137 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5141 ix86_keep_aggregate_return_pointer (tree fntype)
5147 attr = lookup_attribute ("callee_pop_aggregate_return",
5148 TYPE_ATTRIBUTES (fntype));
5150 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5152 /* For 32-bit MS-ABI the default is to keep aggregate
5154 if (ix86_function_type_abi (fntype) == MS_ABI)
5157 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5160 /* Value is the number of bytes of arguments automatically
5161 popped when returning from a subroutine call.
5162 FUNDECL is the declaration node of the function (as a tree),
5163 FUNTYPE is the data type of the function (as a tree),
5164 or for a library call it is an identifier node for the subroutine name.
5165 SIZE is the number of bytes of arguments passed on the stack.
5167 On the 80386, the RTD insn may be used to pop them if the number
5168 of args is fixed, but if the number is variable then the caller
5169 must pop them all. RTD can't be used for library calls now
5170 because the library is compiled with the Unix compiler.
5171 Use of RTD is a selectable option, since it is incompatible with
5172 standard Unix calling sequences. If the option is not selected,
5173 the caller must always pop the args.
5175 The attribute stdcall is equivalent to RTD on a per module basis. */
5178 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5182 /* None of the 64-bit ABIs pop arguments. */
5186 ccvt = ix86_get_callcvt (funtype);
5188 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5189 | IX86_CALLCVT_THISCALL)) != 0
5190 && ! stdarg_p (funtype))
5193 /* Lose any fake structure return argument if it is passed on the stack. */
5194 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5195 && !ix86_keep_aggregate_return_pointer (funtype))
5197 int nregs = ix86_function_regparm (funtype, fundecl);
5199 return GET_MODE_SIZE (Pmode);
5205 /* Argument support functions. */
5207 /* Return true when register may be used to pass function parameters. */
5209 ix86_function_arg_regno_p (int regno)
5212 const int *parm_regs;
5217 return (regno < REGPARM_MAX
5218 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5220 return (regno < REGPARM_MAX
5221 || (TARGET_MMX && MMX_REGNO_P (regno)
5222 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5223 || (TARGET_SSE && SSE_REGNO_P (regno)
5224 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5229 if (SSE_REGNO_P (regno) && TARGET_SSE)
5234 if (TARGET_SSE && SSE_REGNO_P (regno)
5235 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5239 /* TODO: The function should depend on current function ABI but
5240 builtins.c would need updating then. Therefore we use the
5243 /* RAX is used as hidden argument to va_arg functions. */
5244 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5247 if (ix86_abi == MS_ABI)
5248 parm_regs = x86_64_ms_abi_int_parameter_registers;
5250 parm_regs = x86_64_int_parameter_registers;
5251 for (i = 0; i < (ix86_abi == MS_ABI
5252 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5253 if (regno == parm_regs[i])
5258 /* Return if we do not know how to pass TYPE solely in registers. */
5261 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5263 if (must_pass_in_stack_var_size_or_pad (mode, type))
5266 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5267 The layout_type routine is crafty and tries to trick us into passing
5268 currently unsupported vector types on the stack by using TImode. */
5269 return (!TARGET_64BIT && mode == TImode
5270 && type && TREE_CODE (type) != VECTOR_TYPE);
5273 /* It returns the size, in bytes, of the area reserved for arguments passed
5274 in registers for the function represented by fndecl dependent to the used
5277 ix86_reg_parm_stack_space (const_tree fndecl)
5279 enum calling_abi call_abi = SYSV_ABI;
5280 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5281 call_abi = ix86_function_abi (fndecl);
5283 call_abi = ix86_function_type_abi (fndecl);
5284 if (TARGET_64BIT && call_abi == MS_ABI)
5289 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5292 ix86_function_type_abi (const_tree fntype)
5294 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5296 enum calling_abi abi = ix86_abi;
5297 if (abi == SYSV_ABI)
5299 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5302 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5310 ix86_function_ms_hook_prologue (const_tree fn)
5312 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5314 if (decl_function_context (fn) != NULL_TREE)
5315 error_at (DECL_SOURCE_LOCATION (fn),
5316 "ms_hook_prologue is not compatible with nested function");
5323 static enum calling_abi
5324 ix86_function_abi (const_tree fndecl)
5328 return ix86_function_type_abi (TREE_TYPE (fndecl));
5331 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5334 ix86_cfun_abi (void)
5338 return cfun->machine->call_abi;
5341 /* Write the extra assembler code needed to declare a function properly. */
5344 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5347 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5351 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5352 unsigned int filler_cc = 0xcccccccc;
5354 for (i = 0; i < filler_count; i += 4)
5355 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5358 #ifdef SUBTARGET_ASM_UNWIND_INIT
5359 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5362 ASM_OUTPUT_LABEL (asm_out_file, fname);
5364 /* Output magic byte marker, if hot-patch attribute is set. */
5369 /* leaq [%rsp + 0], %rsp */
5370 asm_fprintf (asm_out_file, ASM_BYTE
5371 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5375 /* movl.s %edi, %edi
5377 movl.s %esp, %ebp */
5378 asm_fprintf (asm_out_file, ASM_BYTE
5379 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5385 extern void init_regs (void);
5387 /* Implementation of call abi switching target hook. Specific to FNDECL
5388 the specific call register sets are set. See also
5389 ix86_conditional_register_usage for more details. */
5391 ix86_call_abi_override (const_tree fndecl)
5393 if (fndecl == NULL_TREE)
5394 cfun->machine->call_abi = ix86_abi;
5396 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5399 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5400 expensive re-initialization of init_regs each time we switch function context
5401 since this is needed only during RTL expansion. */
5403 ix86_maybe_switch_abi (void)
5406 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5410 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5411 for a call to a function whose data type is FNTYPE.
5412 For a library call, FNTYPE is 0. */
5415 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5416 tree fntype, /* tree ptr for function decl */
5417 rtx libname, /* SYMBOL_REF of library name or 0 */
5421 struct cgraph_local_info *i;
5424 memset (cum, 0, sizeof (*cum));
5426 /* Initialize for the current callee. */
5429 cfun->machine->callee_pass_avx256_p = false;
5430 cfun->machine->callee_return_avx256_p = false;
5435 i = cgraph_local_info (fndecl);
5436 cum->call_abi = ix86_function_abi (fndecl);
5437 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5442 cum->call_abi = ix86_function_type_abi (fntype);
5444 fnret_type = TREE_TYPE (fntype);
5449 if (TARGET_VZEROUPPER && fnret_type)
5451 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5453 if (function_pass_avx256_p (fnret_value))
5455 /* The return value of this function uses 256bit AVX modes. */
5457 cfun->machine->callee_return_avx256_p = true;
5459 cfun->machine->caller_return_avx256_p = true;
5463 cum->caller = caller;
5465 /* Set up the number of registers to use for passing arguments. */
5467 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5468 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5469 "or subtarget optimization implying it");
5470 cum->nregs = ix86_regparm;
5473 cum->nregs = (cum->call_abi == SYSV_ABI
5474 ? X86_64_REGPARM_MAX
5475 : X86_64_MS_REGPARM_MAX);
5479 cum->sse_nregs = SSE_REGPARM_MAX;
5482 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5483 ? X86_64_SSE_REGPARM_MAX
5484 : X86_64_MS_SSE_REGPARM_MAX);
5488 cum->mmx_nregs = MMX_REGPARM_MAX;
5489 cum->warn_avx = true;
5490 cum->warn_sse = true;
5491 cum->warn_mmx = true;
5493 /* Because type might mismatch in between caller and callee, we need to
5494 use actual type of function for local calls.
5495 FIXME: cgraph_analyze can be told to actually record if function uses
5496 va_start so for local functions maybe_vaarg can be made aggressive
5498 FIXME: once typesytem is fixed, we won't need this code anymore. */
5499 if (i && i->local && i->can_change_signature)
5500 fntype = TREE_TYPE (fndecl);
5501 cum->maybe_vaarg = (fntype
5502 ? (!prototype_p (fntype) || stdarg_p (fntype))
5507 /* If there are variable arguments, then we won't pass anything
5508 in registers in 32-bit mode. */
5509 if (stdarg_p (fntype))
5520 /* Use ecx and edx registers if function has fastcall attribute,
5521 else look for regparm information. */
5524 unsigned int ccvt = ix86_get_callcvt (fntype);
5525 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5528 cum->fastcall = 1; /* Same first register as in fastcall. */
5530 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5536 cum->nregs = ix86_function_regparm (fntype, fndecl);
5539 /* Set up the number of SSE registers used for passing SFmode
5540 and DFmode arguments. Warn for mismatching ABI. */
5541 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5545 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5546 But in the case of vector types, it is some vector mode.
5548 When we have only some of our vector isa extensions enabled, then there
5549 are some modes for which vector_mode_supported_p is false. For these
5550 modes, the generic vector support in gcc will choose some non-vector mode
5551 in order to implement the type. By computing the natural mode, we'll
5552 select the proper ABI location for the operand and not depend on whatever
5553 the middle-end decides to do with these vector types.
5555 The midde-end can't deal with the vector types > 16 bytes. In this
5556 case, we return the original mode and warn ABI change if CUM isn't
5559 static enum machine_mode
5560 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5562 enum machine_mode mode = TYPE_MODE (type);
5564 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5566 HOST_WIDE_INT size = int_size_in_bytes (type);
5567 if ((size == 8 || size == 16 || size == 32)
5568 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5569 && TYPE_VECTOR_SUBPARTS (type) > 1)
5571 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5573 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5574 mode = MIN_MODE_VECTOR_FLOAT;
5576 mode = MIN_MODE_VECTOR_INT;
5578 /* Get the mode which has this inner mode and number of units. */
5579 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5580 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5581 && GET_MODE_INNER (mode) == innermode)
5583 if (size == 32 && !TARGET_AVX)
5585 static bool warnedavx;
5592 warning (0, "AVX vector argument without AVX "
5593 "enabled changes the ABI");
5595 return TYPE_MODE (type);
5608 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5609 this may not agree with the mode that the type system has chosen for the
5610 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5611 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5614 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5619 if (orig_mode != BLKmode)
5620 tmp = gen_rtx_REG (orig_mode, regno);
5623 tmp = gen_rtx_REG (mode, regno);
5624 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5625 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5631 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5632 of this code is to classify each 8bytes of incoming argument by the register
5633 class and assign registers accordingly. */
5635 /* Return the union class of CLASS1 and CLASS2.
5636 See the x86-64 PS ABI for details. */
5638 static enum x86_64_reg_class
5639 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5641 /* Rule #1: If both classes are equal, this is the resulting class. */
5642 if (class1 == class2)
5645 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5647 if (class1 == X86_64_NO_CLASS)
5649 if (class2 == X86_64_NO_CLASS)
5652 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5653 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5654 return X86_64_MEMORY_CLASS;
5656 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5657 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5658 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5659 return X86_64_INTEGERSI_CLASS;
5660 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5661 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5662 return X86_64_INTEGER_CLASS;
5664 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5666 if (class1 == X86_64_X87_CLASS
5667 || class1 == X86_64_X87UP_CLASS
5668 || class1 == X86_64_COMPLEX_X87_CLASS
5669 || class2 == X86_64_X87_CLASS
5670 || class2 == X86_64_X87UP_CLASS
5671 || class2 == X86_64_COMPLEX_X87_CLASS)
5672 return X86_64_MEMORY_CLASS;
5674 /* Rule #6: Otherwise class SSE is used. */
5675 return X86_64_SSE_CLASS;
5678 /* Classify the argument of type TYPE and mode MODE.
5679 CLASSES will be filled by the register class used to pass each word
5680 of the operand. The number of words is returned. In case the parameter
5681 should be passed in memory, 0 is returned. As a special case for zero
5682 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5684 BIT_OFFSET is used internally for handling records and specifies offset
5685 of the offset in bits modulo 256 to avoid overflow cases.
5687 See the x86-64 PS ABI for details.
5691 classify_argument (enum machine_mode mode, const_tree type,
5692 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5694 HOST_WIDE_INT bytes =
5695 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5696 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5698 /* Variable sized entities are always passed/returned in memory. */
5702 if (mode != VOIDmode
5703 && targetm.calls.must_pass_in_stack (mode, type))
5706 if (type && AGGREGATE_TYPE_P (type))
5710 enum x86_64_reg_class subclasses[MAX_CLASSES];
5712 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5716 for (i = 0; i < words; i++)
5717 classes[i] = X86_64_NO_CLASS;
5719 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5720 signalize memory class, so handle it as special case. */
5723 classes[0] = X86_64_NO_CLASS;
5727 /* Classify each field of record and merge classes. */
5728 switch (TREE_CODE (type))
5731 /* And now merge the fields of structure. */
5732 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5734 if (TREE_CODE (field) == FIELD_DECL)
5738 if (TREE_TYPE (field) == error_mark_node)
5741 /* Bitfields are always classified as integer. Handle them
5742 early, since later code would consider them to be
5743 misaligned integers. */
5744 if (DECL_BIT_FIELD (field))
5746 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5747 i < ((int_bit_position (field) + (bit_offset % 64))
5748 + tree_low_cst (DECL_SIZE (field), 0)
5751 merge_classes (X86_64_INTEGER_CLASS,
5758 type = TREE_TYPE (field);
5760 /* Flexible array member is ignored. */
5761 if (TYPE_MODE (type) == BLKmode
5762 && TREE_CODE (type) == ARRAY_TYPE
5763 && TYPE_SIZE (type) == NULL_TREE
5764 && TYPE_DOMAIN (type) != NULL_TREE
5765 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5770 if (!warned && warn_psabi)
5773 inform (input_location,
5774 "the ABI of passing struct with"
5775 " a flexible array member has"
5776 " changed in GCC 4.4");
5780 num = classify_argument (TYPE_MODE (type), type,
5782 (int_bit_position (field)
5783 + bit_offset) % 256);
5786 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5787 for (i = 0; i < num && (i + pos) < words; i++)
5789 merge_classes (subclasses[i], classes[i + pos]);
5796 /* Arrays are handled as small records. */
5799 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5800 TREE_TYPE (type), subclasses, bit_offset);
5804 /* The partial classes are now full classes. */
5805 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5806 subclasses[0] = X86_64_SSE_CLASS;
5807 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5808 && !((bit_offset % 64) == 0 && bytes == 4))
5809 subclasses[0] = X86_64_INTEGER_CLASS;
5811 for (i = 0; i < words; i++)
5812 classes[i] = subclasses[i % num];
5817 case QUAL_UNION_TYPE:
5818 /* Unions are similar to RECORD_TYPE but offset is always 0.
5820 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5822 if (TREE_CODE (field) == FIELD_DECL)
5826 if (TREE_TYPE (field) == error_mark_node)
5829 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5830 TREE_TYPE (field), subclasses,
5834 for (i = 0; i < num; i++)
5835 classes[i] = merge_classes (subclasses[i], classes[i]);
5846 /* When size > 16 bytes, if the first one isn't
5847 X86_64_SSE_CLASS or any other ones aren't
5848 X86_64_SSEUP_CLASS, everything should be passed in
5850 if (classes[0] != X86_64_SSE_CLASS)
5853 for (i = 1; i < words; i++)
5854 if (classes[i] != X86_64_SSEUP_CLASS)
5858 /* Final merger cleanup. */
5859 for (i = 0; i < words; i++)
5861 /* If one class is MEMORY, everything should be passed in
5863 if (classes[i] == X86_64_MEMORY_CLASS)
5866 /* The X86_64_SSEUP_CLASS should be always preceded by
5867 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5868 if (classes[i] == X86_64_SSEUP_CLASS
5869 && classes[i - 1] != X86_64_SSE_CLASS
5870 && classes[i - 1] != X86_64_SSEUP_CLASS)
5872 /* The first one should never be X86_64_SSEUP_CLASS. */
5873 gcc_assert (i != 0);
5874 classes[i] = X86_64_SSE_CLASS;
5877 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5878 everything should be passed in memory. */
5879 if (classes[i] == X86_64_X87UP_CLASS
5880 && (classes[i - 1] != X86_64_X87_CLASS))
5884 /* The first one should never be X86_64_X87UP_CLASS. */
5885 gcc_assert (i != 0);
5886 if (!warned && warn_psabi)
5889 inform (input_location,
5890 "the ABI of passing union with long double"
5891 " has changed in GCC 4.4");
5899 /* Compute alignment needed. We align all types to natural boundaries with
5900 exception of XFmode that is aligned to 64bits. */
5901 if (mode != VOIDmode && mode != BLKmode)
5903 int mode_alignment = GET_MODE_BITSIZE (mode);
5906 mode_alignment = 128;
5907 else if (mode == XCmode)
5908 mode_alignment = 256;
5909 if (COMPLEX_MODE_P (mode))
5910 mode_alignment /= 2;
5911 /* Misaligned fields are always returned in memory. */
5912 if (bit_offset % mode_alignment)
5916 /* for V1xx modes, just use the base mode */
5917 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5918 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5919 mode = GET_MODE_INNER (mode);
5921 /* Classification of atomic types. */
5926 classes[0] = X86_64_SSE_CLASS;
5929 classes[0] = X86_64_SSE_CLASS;
5930 classes[1] = X86_64_SSEUP_CLASS;
5940 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5944 classes[0] = X86_64_INTEGERSI_CLASS;
5947 else if (size <= 64)
5949 classes[0] = X86_64_INTEGER_CLASS;
5952 else if (size <= 64+32)
5954 classes[0] = X86_64_INTEGER_CLASS;
5955 classes[1] = X86_64_INTEGERSI_CLASS;
5958 else if (size <= 64+64)
5960 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5968 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5972 /* OImode shouldn't be used directly. */
5977 if (!(bit_offset % 64))
5978 classes[0] = X86_64_SSESF_CLASS;
5980 classes[0] = X86_64_SSE_CLASS;
5983 classes[0] = X86_64_SSEDF_CLASS;
5986 classes[0] = X86_64_X87_CLASS;
5987 classes[1] = X86_64_X87UP_CLASS;
5990 classes[0] = X86_64_SSE_CLASS;
5991 classes[1] = X86_64_SSEUP_CLASS;
5994 classes[0] = X86_64_SSE_CLASS;
5995 if (!(bit_offset % 64))
6001 if (!warned && warn_psabi)
6004 inform (input_location,
6005 "the ABI of passing structure with complex float"
6006 " member has changed in GCC 4.4");
6008 classes[1] = X86_64_SSESF_CLASS;
6012 classes[0] = X86_64_SSEDF_CLASS;
6013 classes[1] = X86_64_SSEDF_CLASS;
6016 classes[0] = X86_64_COMPLEX_X87_CLASS;
6019 /* This modes is larger than 16 bytes. */
6027 classes[0] = X86_64_SSE_CLASS;
6028 classes[1] = X86_64_SSEUP_CLASS;
6029 classes[2] = X86_64_SSEUP_CLASS;
6030 classes[3] = X86_64_SSEUP_CLASS;
6038 classes[0] = X86_64_SSE_CLASS;
6039 classes[1] = X86_64_SSEUP_CLASS;
6047 classes[0] = X86_64_SSE_CLASS;
6053 gcc_assert (VECTOR_MODE_P (mode));
6058 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6060 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6061 classes[0] = X86_64_INTEGERSI_CLASS;
6063 classes[0] = X86_64_INTEGER_CLASS;
6064 classes[1] = X86_64_INTEGER_CLASS;
6065 return 1 + (bytes > 8);
6069 /* Examine the argument and return set number of register required in each
6070 class. Return 0 iff parameter should be passed in memory. */
6072 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6073 int *int_nregs, int *sse_nregs)
6075 enum x86_64_reg_class regclass[MAX_CLASSES];
6076 int n = classify_argument (mode, type, regclass, 0);
6082 for (n--; n >= 0; n--)
6083 switch (regclass[n])
6085 case X86_64_INTEGER_CLASS:
6086 case X86_64_INTEGERSI_CLASS:
6089 case X86_64_SSE_CLASS:
6090 case X86_64_SSESF_CLASS:
6091 case X86_64_SSEDF_CLASS:
6094 case X86_64_NO_CLASS:
6095 case X86_64_SSEUP_CLASS:
6097 case X86_64_X87_CLASS:
6098 case X86_64_X87UP_CLASS:
6102 case X86_64_COMPLEX_X87_CLASS:
6103 return in_return ? 2 : 0;
6104 case X86_64_MEMORY_CLASS:
6110 /* Construct container for the argument used by GCC interface. See
6111 FUNCTION_ARG for the detailed description. */
6114 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6115 const_tree type, int in_return, int nintregs, int nsseregs,
6116 const int *intreg, int sse_regno)
6118 /* The following variables hold the static issued_error state. */
6119 static bool issued_sse_arg_error;
6120 static bool issued_sse_ret_error;
6121 static bool issued_x87_ret_error;
6123 enum machine_mode tmpmode;
6125 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6126 enum x86_64_reg_class regclass[MAX_CLASSES];
6130 int needed_sseregs, needed_intregs;
6131 rtx exp[MAX_CLASSES];
6134 n = classify_argument (mode, type, regclass, 0);
6137 if (!examine_argument (mode, type, in_return, &needed_intregs,
6140 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6143 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6144 some less clueful developer tries to use floating-point anyway. */
6145 if (needed_sseregs && !TARGET_SSE)
6149 if (!issued_sse_ret_error)
6151 error ("SSE register return with SSE disabled");
6152 issued_sse_ret_error = true;
6155 else if (!issued_sse_arg_error)
6157 error ("SSE register argument with SSE disabled");
6158 issued_sse_arg_error = true;
6163 /* Likewise, error if the ABI requires us to return values in the
6164 x87 registers and the user specified -mno-80387. */
6165 if (!TARGET_80387 && in_return)
6166 for (i = 0; i < n; i++)
6167 if (regclass[i] == X86_64_X87_CLASS
6168 || regclass[i] == X86_64_X87UP_CLASS
6169 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6171 if (!issued_x87_ret_error)
6173 error ("x87 register return with x87 disabled");
6174 issued_x87_ret_error = true;
6179 /* First construct simple cases. Avoid SCmode, since we want to use
6180 single register to pass this type. */
6181 if (n == 1 && mode != SCmode)
6182 switch (regclass[0])
6184 case X86_64_INTEGER_CLASS:
6185 case X86_64_INTEGERSI_CLASS:
6186 return gen_rtx_REG (mode, intreg[0]);
6187 case X86_64_SSE_CLASS:
6188 case X86_64_SSESF_CLASS:
6189 case X86_64_SSEDF_CLASS:
6190 if (mode != BLKmode)
6191 return gen_reg_or_parallel (mode, orig_mode,
6192 SSE_REGNO (sse_regno));
6194 case X86_64_X87_CLASS:
6195 case X86_64_COMPLEX_X87_CLASS:
6196 return gen_rtx_REG (mode, FIRST_STACK_REG);
6197 case X86_64_NO_CLASS:
6198 /* Zero sized array, struct or class. */
6203 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6204 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6205 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6207 && regclass[0] == X86_64_SSE_CLASS
6208 && regclass[1] == X86_64_SSEUP_CLASS
6209 && regclass[2] == X86_64_SSEUP_CLASS
6210 && regclass[3] == X86_64_SSEUP_CLASS
6212 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6215 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6216 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6217 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6218 && regclass[1] == X86_64_INTEGER_CLASS
6219 && (mode == CDImode || mode == TImode || mode == TFmode)
6220 && intreg[0] + 1 == intreg[1])
6221 return gen_rtx_REG (mode, intreg[0]);
6223 /* Otherwise figure out the entries of the PARALLEL. */
6224 for (i = 0; i < n; i++)
6228 switch (regclass[i])
6230 case X86_64_NO_CLASS:
6232 case X86_64_INTEGER_CLASS:
6233 case X86_64_INTEGERSI_CLASS:
6234 /* Merge TImodes on aligned occasions here too. */
6235 if (i * 8 + 8 > bytes)
6236 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6237 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6241 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6242 if (tmpmode == BLKmode)
6244 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6245 gen_rtx_REG (tmpmode, *intreg),
6249 case X86_64_SSESF_CLASS:
6250 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6251 gen_rtx_REG (SFmode,
6252 SSE_REGNO (sse_regno)),
6256 case X86_64_SSEDF_CLASS:
6257 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6258 gen_rtx_REG (DFmode,
6259 SSE_REGNO (sse_regno)),
6263 case X86_64_SSE_CLASS:
6271 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6281 && regclass[1] == X86_64_SSEUP_CLASS
6282 && regclass[2] == X86_64_SSEUP_CLASS
6283 && regclass[3] == X86_64_SSEUP_CLASS);
6290 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6291 gen_rtx_REG (tmpmode,
6292 SSE_REGNO (sse_regno)),
6301 /* Empty aligned struct, union or class. */
6305 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6306 for (i = 0; i < nexps; i++)
6307 XVECEXP (ret, 0, i) = exp [i];
6311 /* Update the data in CUM to advance over an argument of mode MODE
6312 and data type TYPE. (TYPE is null for libcalls where that information
6313 may not be available.) */
6316 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6317 const_tree type, HOST_WIDE_INT bytes,
6318 HOST_WIDE_INT words)
6334 cum->words += words;
6335 cum->nregs -= words;
6336 cum->regno += words;
6338 if (cum->nregs <= 0)
6346 /* OImode shouldn't be used directly. */
6350 if (cum->float_in_sse < 2)
6353 if (cum->float_in_sse < 1)
6370 if (!type || !AGGREGATE_TYPE_P (type))
6372 cum->sse_words += words;
6373 cum->sse_nregs -= 1;
6374 cum->sse_regno += 1;
6375 if (cum->sse_nregs <= 0)
6389 if (!type || !AGGREGATE_TYPE_P (type))
6391 cum->mmx_words += words;
6392 cum->mmx_nregs -= 1;
6393 cum->mmx_regno += 1;
6394 if (cum->mmx_nregs <= 0)
6405 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6406 const_tree type, HOST_WIDE_INT words, bool named)
6408 int int_nregs, sse_nregs;
6410 /* Unnamed 256bit vector mode parameters are passed on stack. */
6411 if (!named && VALID_AVX256_REG_MODE (mode))
6414 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6415 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6417 cum->nregs -= int_nregs;
6418 cum->sse_nregs -= sse_nregs;
6419 cum->regno += int_nregs;
6420 cum->sse_regno += sse_nregs;
6424 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6425 cum->words = (cum->words + align - 1) & ~(align - 1);
6426 cum->words += words;
6431 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6432 HOST_WIDE_INT words)
6434 /* Otherwise, this should be passed indirect. */
6435 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6437 cum->words += words;
6445 /* Update the data in CUM to advance over an argument of mode MODE and
6446 data type TYPE. (TYPE is null for libcalls where that information
6447 may not be available.) */
6450 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6451 const_tree type, bool named)
6453 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6454 HOST_WIDE_INT bytes, words;
6456 if (mode == BLKmode)
6457 bytes = int_size_in_bytes (type);
6459 bytes = GET_MODE_SIZE (mode);
6460 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6463 mode = type_natural_mode (type, NULL);
6465 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6466 function_arg_advance_ms_64 (cum, bytes, words);
6467 else if (TARGET_64BIT)
6468 function_arg_advance_64 (cum, mode, type, words, named);
6470 function_arg_advance_32 (cum, mode, type, bytes, words);
6473 /* Define where to put the arguments to a function.
6474 Value is zero to push the argument on the stack,
6475 or a hard register in which to store the argument.
6477 MODE is the argument's machine mode.
6478 TYPE is the data type of the argument (as a tree).
6479 This is null for libcalls where that information may
6481 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6482 the preceding args and about the function being called.
6483 NAMED is nonzero if this argument is a named parameter
6484 (otherwise it is an extra parameter matching an ellipsis). */
6487 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6488 enum machine_mode orig_mode, const_tree type,
6489 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6491 static bool warnedsse, warnedmmx;
6493 /* Avoid the AL settings for the Unix64 ABI. */
6494 if (mode == VOIDmode)
6510 if (words <= cum->nregs)
6512 int regno = cum->regno;
6514 /* Fastcall allocates the first two DWORD (SImode) or
6515 smaller arguments to ECX and EDX if it isn't an
6521 || (type && AGGREGATE_TYPE_P (type)))
6524 /* ECX not EAX is the first allocated register. */
6525 if (regno == AX_REG)
6528 return gen_rtx_REG (mode, regno);
6533 if (cum->float_in_sse < 2)
6536 if (cum->float_in_sse < 1)
6540 /* In 32bit, we pass TImode in xmm registers. */
6547 if (!type || !AGGREGATE_TYPE_P (type))
6549 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6552 warning (0, "SSE vector argument without SSE enabled "
6556 return gen_reg_or_parallel (mode, orig_mode,
6557 cum->sse_regno + FIRST_SSE_REG);
6562 /* OImode shouldn't be used directly. */
6571 if (!type || !AGGREGATE_TYPE_P (type))
6574 return gen_reg_or_parallel (mode, orig_mode,
6575 cum->sse_regno + FIRST_SSE_REG);
6585 if (!type || !AGGREGATE_TYPE_P (type))
6587 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6590 warning (0, "MMX vector argument without MMX enabled "
6594 return gen_reg_or_parallel (mode, orig_mode,
6595 cum->mmx_regno + FIRST_MMX_REG);
6604 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6605 enum machine_mode orig_mode, const_tree type, bool named)
6607 /* Handle a hidden AL argument containing number of registers
6608 for varargs x86-64 functions. */
6609 if (mode == VOIDmode)
6610 return GEN_INT (cum->maybe_vaarg
6611 ? (cum->sse_nregs < 0
6612 ? X86_64_SSE_REGPARM_MAX
6627 /* Unnamed 256bit vector mode parameters are passed on stack. */
6633 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6635 &x86_64_int_parameter_registers [cum->regno],
6640 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6641 enum machine_mode orig_mode, bool named,
6642 HOST_WIDE_INT bytes)
6646 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6647 We use value of -2 to specify that current function call is MSABI. */
6648 if (mode == VOIDmode)
6649 return GEN_INT (-2);
6651 /* If we've run out of registers, it goes on the stack. */
6652 if (cum->nregs == 0)
6655 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6657 /* Only floating point modes are passed in anything but integer regs. */
6658 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6661 regno = cum->regno + FIRST_SSE_REG;
6666 /* Unnamed floating parameters are passed in both the
6667 SSE and integer registers. */
6668 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6669 t2 = gen_rtx_REG (mode, regno);
6670 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6671 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6672 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6675 /* Handle aggregated types passed in register. */
6676 if (orig_mode == BLKmode)
6678 if (bytes > 0 && bytes <= 8)
6679 mode = (bytes > 4 ? DImode : SImode);
6680 if (mode == BLKmode)
6684 return gen_reg_or_parallel (mode, orig_mode, regno);
6687 /* Return where to put the arguments to a function.
6688 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6690 MODE is the argument's machine mode. TYPE is the data type of the
6691 argument. It is null for libcalls where that information may not be
6692 available. CUM gives information about the preceding args and about
6693 the function being called. NAMED is nonzero if this argument is a
6694 named parameter (otherwise it is an extra parameter matching an
6698 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
6699 const_tree type, bool named)
6701 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6702 enum machine_mode mode = omode;
6703 HOST_WIDE_INT bytes, words;
6706 if (mode == BLKmode)
6707 bytes = int_size_in_bytes (type);
6709 bytes = GET_MODE_SIZE (mode);
6710 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6712 /* To simplify the code below, represent vector types with a vector mode
6713 even if MMX/SSE are not active. */
6714 if (type && TREE_CODE (type) == VECTOR_TYPE)
6715 mode = type_natural_mode (type, cum);
6717 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6718 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6719 else if (TARGET_64BIT)
6720 arg = function_arg_64 (cum, mode, omode, type, named);
6722 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6724 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6726 /* This argument uses 256bit AVX modes. */
6728 cfun->machine->callee_pass_avx256_p = true;
6730 cfun->machine->caller_pass_avx256_p = true;
6736 /* A C expression that indicates when an argument must be passed by
6737 reference. If nonzero for an argument, a copy of that argument is
6738 made in memory and a pointer to the argument is passed instead of
6739 the argument itself. The pointer is passed in whatever way is
6740 appropriate for passing a pointer to that type. */
6743 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
6744 enum machine_mode mode ATTRIBUTE_UNUSED,
6745 const_tree type, bool named ATTRIBUTE_UNUSED)
6747 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6749 /* See Windows x64 Software Convention. */
6750 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6752 int msize = (int) GET_MODE_SIZE (mode);
6755 /* Arrays are passed by reference. */
6756 if (TREE_CODE (type) == ARRAY_TYPE)
6759 if (AGGREGATE_TYPE_P (type))
6761 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6762 are passed by reference. */
6763 msize = int_size_in_bytes (type);
6767 /* __m128 is passed by reference. */
6769 case 1: case 2: case 4: case 8:
6775 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6781 /* Return true when TYPE should be 128bit aligned for 32bit argument
6782 passing ABI. XXX: This function is obsolete and is only used for
6783 checking psABI compatibility with previous versions of GCC. */
6786 ix86_compat_aligned_value_p (const_tree type)
6788 enum machine_mode mode = TYPE_MODE (type);
6789 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6793 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6795 if (TYPE_ALIGN (type) < 128)
6798 if (AGGREGATE_TYPE_P (type))
6800 /* Walk the aggregates recursively. */
6801 switch (TREE_CODE (type))
6805 case QUAL_UNION_TYPE:
6809 /* Walk all the structure fields. */
6810 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6812 if (TREE_CODE (field) == FIELD_DECL
6813 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6820 /* Just for use if some languages passes arrays by value. */
6821 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6832 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6833 XXX: This function is obsolete and is only used for checking psABI
6834 compatibility with previous versions of GCC. */
6837 ix86_compat_function_arg_boundary (enum machine_mode mode,
6838 const_tree type, unsigned int align)
6840 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6841 natural boundaries. */
6842 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6844 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6845 make an exception for SSE modes since these require 128bit
6848 The handling here differs from field_alignment. ICC aligns MMX
6849 arguments to 4 byte boundaries, while structure fields are aligned
6850 to 8 byte boundaries. */
6853 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6854 align = PARM_BOUNDARY;
6858 if (!ix86_compat_aligned_value_p (type))
6859 align = PARM_BOUNDARY;
6862 if (align > BIGGEST_ALIGNMENT)
6863 align = BIGGEST_ALIGNMENT;
6867 /* Return true when TYPE should be 128bit aligned for 32bit argument
6871 ix86_contains_aligned_value_p (const_tree type)
6873 enum machine_mode mode = TYPE_MODE (type);
6875 if (mode == XFmode || mode == XCmode)
6878 if (TYPE_ALIGN (type) < 128)
6881 if (AGGREGATE_TYPE_P (type))
6883 /* Walk the aggregates recursively. */
6884 switch (TREE_CODE (type))
6888 case QUAL_UNION_TYPE:
6892 /* Walk all the structure fields. */
6893 for (field = TYPE_FIELDS (type);
6895 field = DECL_CHAIN (field))
6897 if (TREE_CODE (field) == FIELD_DECL
6898 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
6905 /* Just for use if some languages passes arrays by value. */
6906 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
6915 return TYPE_ALIGN (type) >= 128;
6920 /* Gives the alignment boundary, in bits, of an argument with the
6921 specified mode and type. */
6924 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6929 /* Since the main variant type is used for call, we convert it to
6930 the main variant type. */
6931 type = TYPE_MAIN_VARIANT (type);
6932 align = TYPE_ALIGN (type);
6935 align = GET_MODE_ALIGNMENT (mode);
6936 if (align < PARM_BOUNDARY)
6937 align = PARM_BOUNDARY;
6941 unsigned int saved_align = align;
6945 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
6948 if (mode == XFmode || mode == XCmode)
6949 align = PARM_BOUNDARY;
6951 else if (!ix86_contains_aligned_value_p (type))
6952 align = PARM_BOUNDARY;
6955 align = PARM_BOUNDARY;
6960 && align != ix86_compat_function_arg_boundary (mode, type,
6964 inform (input_location,
6965 "The ABI for passing parameters with %d-byte"
6966 " alignment has changed in GCC 4.6",
6967 align / BITS_PER_UNIT);
6974 /* Return true if N is a possible register number of function value. */
6977 ix86_function_value_regno_p (const unsigned int regno)
6984 case FIRST_FLOAT_REG:
6985 /* TODO: The function should depend on current function ABI but
6986 builtins.c would need updating then. Therefore we use the
6988 if (TARGET_64BIT && ix86_abi == MS_ABI)
6990 return TARGET_FLOAT_RETURNS_IN_80387;
6996 if (TARGET_MACHO || TARGET_64BIT)
7004 /* Define how to find the value returned by a function.
7005 VALTYPE is the data type of the value (as a tree).
7006 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7007 otherwise, FUNC is 0. */
7010 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7011 const_tree fntype, const_tree fn)
7015 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7016 we normally prevent this case when mmx is not available. However
7017 some ABIs may require the result to be returned like DImode. */
7018 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7019 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7021 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7022 we prevent this case when sse is not available. However some ABIs
7023 may require the result to be returned like integer TImode. */
7024 else if (mode == TImode
7025 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7026 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7028 /* 32-byte vector modes in %ymm0. */
7029 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7030 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7032 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7033 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7034 regno = FIRST_FLOAT_REG;
7036 /* Most things go in %eax. */
7039 /* Override FP return register with %xmm0 for local functions when
7040 SSE math is enabled or for functions with sseregparm attribute. */
7041 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7043 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7044 if ((sse_level >= 1 && mode == SFmode)
7045 || (sse_level == 2 && mode == DFmode))
7046 regno = FIRST_SSE_REG;
7049 /* OImode shouldn't be used directly. */
7050 gcc_assert (mode != OImode);
7052 return gen_rtx_REG (orig_mode, regno);
7056 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7061 /* Handle libcalls, which don't provide a type node. */
7062 if (valtype == NULL)
7074 return gen_rtx_REG (mode, FIRST_SSE_REG);
7077 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7081 return gen_rtx_REG (mode, AX_REG);
7084 else if (POINTER_TYPE_P (valtype))
7086 /* Pointers are always returned in Pmode. */
7090 ret = construct_container (mode, orig_mode, valtype, 1,
7091 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7092 x86_64_int_return_registers, 0);
7094 /* For zero sized structures, construct_container returns NULL, but we
7095 need to keep rest of compiler happy by returning meaningful value. */
7097 ret = gen_rtx_REG (orig_mode, AX_REG);
7103 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7105 unsigned int regno = AX_REG;
7109 switch (GET_MODE_SIZE (mode))
7112 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7113 && !COMPLEX_MODE_P (mode))
7114 regno = FIRST_SSE_REG;
7118 if (mode == SFmode || mode == DFmode)
7119 regno = FIRST_SSE_REG;
7125 return gen_rtx_REG (orig_mode, regno);
7129 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7130 enum machine_mode orig_mode, enum machine_mode mode)
7132 const_tree fn, fntype;
7135 if (fntype_or_decl && DECL_P (fntype_or_decl))
7136 fn = fntype_or_decl;
7137 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7139 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7140 return function_value_ms_64 (orig_mode, mode);
7141 else if (TARGET_64BIT)
7142 return function_value_64 (orig_mode, mode, valtype);
7144 return function_value_32 (orig_mode, mode, fntype, fn);
7148 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7149 bool outgoing ATTRIBUTE_UNUSED)
7151 enum machine_mode mode, orig_mode;
7153 orig_mode = TYPE_MODE (valtype);
7154 mode = type_natural_mode (valtype, NULL);
7155 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7158 /* Pointer function arguments and return values are promoted to Pmode. */
7160 static enum machine_mode
7161 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7162 int *punsignedp, const_tree fntype,
7165 if (type != NULL_TREE && POINTER_TYPE_P (type))
7167 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7170 return default_promote_function_mode (type, mode, punsignedp, fntype,
7175 ix86_libcall_value (enum machine_mode mode)
7177 return ix86_function_value_1 (NULL, NULL, mode, mode);
7180 /* Return true iff type is returned in memory. */
7182 static bool ATTRIBUTE_UNUSED
7183 return_in_memory_32 (const_tree type, enum machine_mode mode)
7187 if (mode == BLKmode)
7190 size = int_size_in_bytes (type);
7192 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7195 if (VECTOR_MODE_P (mode) || mode == TImode)
7197 /* User-created vectors small enough to fit in EAX. */
7201 /* MMX/3dNow values are returned in MM0,
7202 except when it doesn't exits or the ABI prescribes otherwise. */
7204 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7206 /* SSE values are returned in XMM0, except when it doesn't exist. */
7210 /* AVX values are returned in YMM0, except when it doesn't exist. */
7221 /* OImode shouldn't be used directly. */
7222 gcc_assert (mode != OImode);
7227 static bool ATTRIBUTE_UNUSED
7228 return_in_memory_64 (const_tree type, enum machine_mode mode)
7230 int needed_intregs, needed_sseregs;
7231 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7234 static bool ATTRIBUTE_UNUSED
7235 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7237 HOST_WIDE_INT size = int_size_in_bytes (type);
7239 /* __m128 is returned in xmm0. */
7240 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7241 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7244 /* Otherwise, the size must be exactly in [1248]. */
7245 return size != 1 && size != 2 && size != 4 && size != 8;
7249 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7251 #ifdef SUBTARGET_RETURN_IN_MEMORY
7252 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7254 const enum machine_mode mode = type_natural_mode (type, NULL);
7258 if (ix86_function_type_abi (fntype) == MS_ABI)
7259 return return_in_memory_ms_64 (type, mode);
7261 return return_in_memory_64 (type, mode);
7264 return return_in_memory_32 (type, mode);
7268 /* When returning SSE vector types, we have a choice of either
7269 (1) being abi incompatible with a -march switch, or
7270 (2) generating an error.
7271 Given no good solution, I think the safest thing is one warning.
7272 The user won't be able to use -Werror, but....
7274 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7275 called in response to actually generating a caller or callee that
7276 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7277 via aggregate_value_p for general type probing from tree-ssa. */
7280 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7282 static bool warnedsse, warnedmmx;
7284 if (!TARGET_64BIT && type)
7286 /* Look at the return type of the function, not the function type. */
7287 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7289 if (!TARGET_SSE && !warnedsse)
7292 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7295 warning (0, "SSE vector return without SSE enabled "
7300 if (!TARGET_MMX && !warnedmmx)
7302 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7305 warning (0, "MMX vector return without MMX enabled "
7315 /* Create the va_list data type. */
7317 /* Returns the calling convention specific va_list date type.
7318 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7321 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7323 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7325 /* For i386 we use plain pointer to argument area. */
7326 if (!TARGET_64BIT || abi == MS_ABI)
7327 return build_pointer_type (char_type_node);
7329 record = lang_hooks.types.make_type (RECORD_TYPE);
7330 type_decl = build_decl (BUILTINS_LOCATION,
7331 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7333 f_gpr = build_decl (BUILTINS_LOCATION,
7334 FIELD_DECL, get_identifier ("gp_offset"),
7335 unsigned_type_node);
7336 f_fpr = build_decl (BUILTINS_LOCATION,
7337 FIELD_DECL, get_identifier ("fp_offset"),
7338 unsigned_type_node);
7339 f_ovf = build_decl (BUILTINS_LOCATION,
7340 FIELD_DECL, get_identifier ("overflow_arg_area"),
7342 f_sav = build_decl (BUILTINS_LOCATION,
7343 FIELD_DECL, get_identifier ("reg_save_area"),
7346 va_list_gpr_counter_field = f_gpr;
7347 va_list_fpr_counter_field = f_fpr;
7349 DECL_FIELD_CONTEXT (f_gpr) = record;
7350 DECL_FIELD_CONTEXT (f_fpr) = record;
7351 DECL_FIELD_CONTEXT (f_ovf) = record;
7352 DECL_FIELD_CONTEXT (f_sav) = record;
7354 TYPE_STUB_DECL (record) = type_decl;
7355 TYPE_NAME (record) = type_decl;
7356 TYPE_FIELDS (record) = f_gpr;
7357 DECL_CHAIN (f_gpr) = f_fpr;
7358 DECL_CHAIN (f_fpr) = f_ovf;
7359 DECL_CHAIN (f_ovf) = f_sav;
7361 layout_type (record);
7363 /* The correct type is an array type of one element. */
7364 return build_array_type (record, build_index_type (size_zero_node));
7367 /* Setup the builtin va_list data type and for 64-bit the additional
7368 calling convention specific va_list data types. */
7371 ix86_build_builtin_va_list (void)
7373 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7375 /* Initialize abi specific va_list builtin types. */
7379 if (ix86_abi == MS_ABI)
7381 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7382 if (TREE_CODE (t) != RECORD_TYPE)
7383 t = build_variant_type_copy (t);
7384 sysv_va_list_type_node = t;
7389 if (TREE_CODE (t) != RECORD_TYPE)
7390 t = build_variant_type_copy (t);
7391 sysv_va_list_type_node = t;
7393 if (ix86_abi != MS_ABI)
7395 t = ix86_build_builtin_va_list_abi (MS_ABI);
7396 if (TREE_CODE (t) != RECORD_TYPE)
7397 t = build_variant_type_copy (t);
7398 ms_va_list_type_node = t;
7403 if (TREE_CODE (t) != RECORD_TYPE)
7404 t = build_variant_type_copy (t);
7405 ms_va_list_type_node = t;
7412 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7415 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7421 /* GPR size of varargs save area. */
7422 if (cfun->va_list_gpr_size)
7423 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7425 ix86_varargs_gpr_size = 0;
7427 /* FPR size of varargs save area. We don't need it if we don't pass
7428 anything in SSE registers. */
7429 if (TARGET_SSE && cfun->va_list_fpr_size)
7430 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7432 ix86_varargs_fpr_size = 0;
7434 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7437 save_area = frame_pointer_rtx;
7438 set = get_varargs_alias_set ();
7440 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7441 if (max > X86_64_REGPARM_MAX)
7442 max = X86_64_REGPARM_MAX;
7444 for (i = cum->regno; i < max; i++)
7446 mem = gen_rtx_MEM (Pmode,
7447 plus_constant (save_area, i * UNITS_PER_WORD));
7448 MEM_NOTRAP_P (mem) = 1;
7449 set_mem_alias_set (mem, set);
7450 emit_move_insn (mem, gen_rtx_REG (Pmode,
7451 x86_64_int_parameter_registers[i]));
7454 if (ix86_varargs_fpr_size)
7456 enum machine_mode smode;
7459 /* Now emit code to save SSE registers. The AX parameter contains number
7460 of SSE parameter registers used to call this function, though all we
7461 actually check here is the zero/non-zero status. */
7463 label = gen_label_rtx ();
7464 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7465 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7468 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7469 we used movdqa (i.e. TImode) instead? Perhaps even better would
7470 be if we could determine the real mode of the data, via a hook
7471 into pass_stdarg. Ignore all that for now. */
7473 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7474 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7476 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7477 if (max > X86_64_SSE_REGPARM_MAX)
7478 max = X86_64_SSE_REGPARM_MAX;
7480 for (i = cum->sse_regno; i < max; ++i)
7482 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7483 mem = gen_rtx_MEM (smode, mem);
7484 MEM_NOTRAP_P (mem) = 1;
7485 set_mem_alias_set (mem, set);
7486 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7488 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7496 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7498 alias_set_type set = get_varargs_alias_set ();
7501 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7505 mem = gen_rtx_MEM (Pmode,
7506 plus_constant (virtual_incoming_args_rtx,
7507 i * UNITS_PER_WORD));
7508 MEM_NOTRAP_P (mem) = 1;
7509 set_mem_alias_set (mem, set);
7511 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7512 emit_move_insn (mem, reg);
7517 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7518 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7521 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7522 CUMULATIVE_ARGS next_cum;
7525 /* This argument doesn't appear to be used anymore. Which is good,
7526 because the old code here didn't suppress rtl generation. */
7527 gcc_assert (!no_rtl);
7532 fntype = TREE_TYPE (current_function_decl);
7534 /* For varargs, we do not want to skip the dummy va_dcl argument.
7535 For stdargs, we do want to skip the last named argument. */
7537 if (stdarg_p (fntype))
7538 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7541 if (cum->call_abi == MS_ABI)
7542 setup_incoming_varargs_ms_64 (&next_cum);
7544 setup_incoming_varargs_64 (&next_cum);
7547 /* Checks if TYPE is of kind va_list char *. */
7550 is_va_list_char_pointer (tree type)
7554 /* For 32-bit it is always true. */
7557 canonic = ix86_canonical_va_list_type (type);
7558 return (canonic == ms_va_list_type_node
7559 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7562 /* Implement va_start. */
7565 ix86_va_start (tree valist, rtx nextarg)
7567 HOST_WIDE_INT words, n_gpr, n_fpr;
7568 tree f_gpr, f_fpr, f_ovf, f_sav;
7569 tree gpr, fpr, ovf, sav, t;
7573 if (flag_split_stack
7574 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7576 unsigned int scratch_regno;
7578 /* When we are splitting the stack, we can't refer to the stack
7579 arguments using internal_arg_pointer, because they may be on
7580 the old stack. The split stack prologue will arrange to
7581 leave a pointer to the old stack arguments in a scratch
7582 register, which we here copy to a pseudo-register. The split
7583 stack prologue can't set the pseudo-register directly because
7584 it (the prologue) runs before any registers have been saved. */
7586 scratch_regno = split_stack_prologue_scratch_regno ();
7587 if (scratch_regno != INVALID_REGNUM)
7591 reg = gen_reg_rtx (Pmode);
7592 cfun->machine->split_stack_varargs_pointer = reg;
7595 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7599 push_topmost_sequence ();
7600 emit_insn_after (seq, entry_of_function ());
7601 pop_topmost_sequence ();
7605 /* Only 64bit target needs something special. */
7606 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7608 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7609 std_expand_builtin_va_start (valist, nextarg);
7614 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7615 next = expand_binop (ptr_mode, add_optab,
7616 cfun->machine->split_stack_varargs_pointer,
7617 crtl->args.arg_offset_rtx,
7618 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7619 convert_move (va_r, next, 0);
7624 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7625 f_fpr = DECL_CHAIN (f_gpr);
7626 f_ovf = DECL_CHAIN (f_fpr);
7627 f_sav = DECL_CHAIN (f_ovf);
7629 valist = build_simple_mem_ref (valist);
7630 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7631 /* The following should be folded into the MEM_REF offset. */
7632 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7634 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7636 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7638 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7641 /* Count number of gp and fp argument registers used. */
7642 words = crtl->args.info.words;
7643 n_gpr = crtl->args.info.regno;
7644 n_fpr = crtl->args.info.sse_regno;
7646 if (cfun->va_list_gpr_size)
7648 type = TREE_TYPE (gpr);
7649 t = build2 (MODIFY_EXPR, type,
7650 gpr, build_int_cst (type, n_gpr * 8));
7651 TREE_SIDE_EFFECTS (t) = 1;
7652 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7655 if (TARGET_SSE && cfun->va_list_fpr_size)
7657 type = TREE_TYPE (fpr);
7658 t = build2 (MODIFY_EXPR, type, fpr,
7659 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7660 TREE_SIDE_EFFECTS (t) = 1;
7661 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7664 /* Find the overflow area. */
7665 type = TREE_TYPE (ovf);
7666 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7667 ovf_rtx = crtl->args.internal_arg_pointer;
7669 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7670 t = make_tree (type, ovf_rtx);
7672 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
7673 t = build2 (MODIFY_EXPR, type, ovf, t);
7674 TREE_SIDE_EFFECTS (t) = 1;
7675 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7677 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7679 /* Find the register save area.
7680 Prologue of the function save it right above stack frame. */
7681 type = TREE_TYPE (sav);
7682 t = make_tree (type, frame_pointer_rtx);
7683 if (!ix86_varargs_gpr_size)
7684 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
7685 t = build2 (MODIFY_EXPR, type, sav, t);
7686 TREE_SIDE_EFFECTS (t) = 1;
7687 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7691 /* Implement va_arg. */
7694 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7697 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7698 tree f_gpr, f_fpr, f_ovf, f_sav;
7699 tree gpr, fpr, ovf, sav, t;
7701 tree lab_false, lab_over = NULL_TREE;
7706 enum machine_mode nat_mode;
7707 unsigned int arg_boundary;
7709 /* Only 64bit target needs something special. */
7710 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7711 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7713 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7714 f_fpr = DECL_CHAIN (f_gpr);
7715 f_ovf = DECL_CHAIN (f_fpr);
7716 f_sav = DECL_CHAIN (f_ovf);
7718 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7719 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7720 valist = build_va_arg_indirect_ref (valist);
7721 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7722 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7723 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7725 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7727 type = build_pointer_type (type);
7728 size = int_size_in_bytes (type);
7729 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7731 nat_mode = type_natural_mode (type, NULL);
7740 /* Unnamed 256bit vector mode parameters are passed on stack. */
7741 if (!TARGET_64BIT_MS_ABI)
7748 container = construct_container (nat_mode, TYPE_MODE (type),
7749 type, 0, X86_64_REGPARM_MAX,
7750 X86_64_SSE_REGPARM_MAX, intreg,
7755 /* Pull the value out of the saved registers. */
7757 addr = create_tmp_var (ptr_type_node, "addr");
7761 int needed_intregs, needed_sseregs;
7763 tree int_addr, sse_addr;
7765 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7766 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7768 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7770 need_temp = (!REG_P (container)
7771 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7772 || TYPE_ALIGN (type) > 128));
7774 /* In case we are passing structure, verify that it is consecutive block
7775 on the register save area. If not we need to do moves. */
7776 if (!need_temp && !REG_P (container))
7778 /* Verify that all registers are strictly consecutive */
7779 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7783 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7785 rtx slot = XVECEXP (container, 0, i);
7786 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7787 || INTVAL (XEXP (slot, 1)) != i * 16)
7795 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7797 rtx slot = XVECEXP (container, 0, i);
7798 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7799 || INTVAL (XEXP (slot, 1)) != i * 8)
7811 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7812 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7815 /* First ensure that we fit completely in registers. */
7818 t = build_int_cst (TREE_TYPE (gpr),
7819 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7820 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7821 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7822 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7823 gimplify_and_add (t, pre_p);
7827 t = build_int_cst (TREE_TYPE (fpr),
7828 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7829 + X86_64_REGPARM_MAX * 8);
7830 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7831 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7832 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7833 gimplify_and_add (t, pre_p);
7836 /* Compute index to start of area used for integer regs. */
7839 /* int_addr = gpr + sav; */
7840 t = fold_build_pointer_plus (sav, gpr);
7841 gimplify_assign (int_addr, t, pre_p);
7845 /* sse_addr = fpr + sav; */
7846 t = fold_build_pointer_plus (sav, fpr);
7847 gimplify_assign (sse_addr, t, pre_p);
7851 int i, prev_size = 0;
7852 tree temp = create_tmp_var (type, "va_arg_tmp");
7855 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7856 gimplify_assign (addr, t, pre_p);
7858 for (i = 0; i < XVECLEN (container, 0); i++)
7860 rtx slot = XVECEXP (container, 0, i);
7861 rtx reg = XEXP (slot, 0);
7862 enum machine_mode mode = GET_MODE (reg);
7868 tree dest_addr, dest;
7869 int cur_size = GET_MODE_SIZE (mode);
7871 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7872 prev_size = INTVAL (XEXP (slot, 1));
7873 if (prev_size + cur_size > size)
7875 cur_size = size - prev_size;
7876 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7877 if (mode == BLKmode)
7880 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7881 if (mode == GET_MODE (reg))
7882 addr_type = build_pointer_type (piece_type);
7884 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7886 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7889 if (SSE_REGNO_P (REGNO (reg)))
7891 src_addr = sse_addr;
7892 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7896 src_addr = int_addr;
7897 src_offset = REGNO (reg) * 8;
7899 src_addr = fold_convert (addr_type, src_addr);
7900 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
7902 dest_addr = fold_convert (daddr_type, addr);
7903 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
7904 if (cur_size == GET_MODE_SIZE (mode))
7906 src = build_va_arg_indirect_ref (src_addr);
7907 dest = build_va_arg_indirect_ref (dest_addr);
7909 gimplify_assign (dest, src, pre_p);
7914 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7915 3, dest_addr, src_addr,
7916 size_int (cur_size));
7917 gimplify_and_add (copy, pre_p);
7919 prev_size += cur_size;
7925 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7926 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7927 gimplify_assign (gpr, t, pre_p);
7932 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7933 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7934 gimplify_assign (fpr, t, pre_p);
7937 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7939 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7942 /* ... otherwise out of the overflow area. */
7944 /* When we align parameter on stack for caller, if the parameter
7945 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7946 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7947 here with caller. */
7948 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
7949 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7950 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7952 /* Care for on-stack alignment if needed. */
7953 if (arg_boundary <= 64 || size == 0)
7957 HOST_WIDE_INT align = arg_boundary / 8;
7958 t = fold_build_pointer_plus_hwi (ovf, align - 1);
7959 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7960 build_int_cst (TREE_TYPE (t), -align));
7963 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7964 gimplify_assign (addr, t, pre_p);
7966 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
7967 gimplify_assign (unshare_expr (ovf), t, pre_p);
7970 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7972 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7973 addr = fold_convert (ptrtype, addr);
7976 addr = build_va_arg_indirect_ref (addr);
7977 return build_va_arg_indirect_ref (addr);
7980 /* Return true if OPNUM's MEM should be matched
7981 in movabs* patterns. */
7984 ix86_check_movabs (rtx insn, int opnum)
7988 set = PATTERN (insn);
7989 if (GET_CODE (set) == PARALLEL)
7990 set = XVECEXP (set, 0, 0);
7991 gcc_assert (GET_CODE (set) == SET);
7992 mem = XEXP (set, opnum);
7993 while (GET_CODE (mem) == SUBREG)
7994 mem = SUBREG_REG (mem);
7995 gcc_assert (MEM_P (mem));
7996 return volatile_ok || !MEM_VOLATILE_P (mem);
7999 /* Initialize the table of extra 80387 mathematical constants. */
8002 init_ext_80387_constants (void)
8004 static const char * cst[5] =
8006 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8007 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8008 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8009 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8010 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8014 for (i = 0; i < 5; i++)
8016 real_from_string (&ext_80387_constants_table[i], cst[i]);
8017 /* Ensure each constant is rounded to XFmode precision. */
8018 real_convert (&ext_80387_constants_table[i],
8019 XFmode, &ext_80387_constants_table[i]);
8022 ext_80387_constants_init = 1;
8025 /* Return non-zero if the constant is something that
8026 can be loaded with a special instruction. */
8029 standard_80387_constant_p (rtx x)
8031 enum machine_mode mode = GET_MODE (x);
8035 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8038 if (x == CONST0_RTX (mode))
8040 if (x == CONST1_RTX (mode))
8043 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8045 /* For XFmode constants, try to find a special 80387 instruction when
8046 optimizing for size or on those CPUs that benefit from them. */
8048 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8052 if (! ext_80387_constants_init)
8053 init_ext_80387_constants ();
8055 for (i = 0; i < 5; i++)
8056 if (real_identical (&r, &ext_80387_constants_table[i]))
8060 /* Load of the constant -0.0 or -1.0 will be split as
8061 fldz;fchs or fld1;fchs sequence. */
8062 if (real_isnegzero (&r))
8064 if (real_identical (&r, &dconstm1))
8070 /* Return the opcode of the special instruction to be used to load
8074 standard_80387_constant_opcode (rtx x)
8076 switch (standard_80387_constant_p (x))
8100 /* Return the CONST_DOUBLE representing the 80387 constant that is
8101 loaded by the specified special instruction. The argument IDX
8102 matches the return value from standard_80387_constant_p. */
8105 standard_80387_constant_rtx (int idx)
8109 if (! ext_80387_constants_init)
8110 init_ext_80387_constants ();
8126 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8130 /* Return 1 if X is all 0s and 2 if x is all 1s
8131 in supported SSE vector mode. */
8134 standard_sse_constant_p (rtx x)
8136 enum machine_mode mode = GET_MODE (x);
8138 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8140 if (vector_all_ones_operand (x, mode))
8156 /* Return the opcode of the special instruction to be used to load
8160 standard_sse_constant_opcode (rtx insn, rtx x)
8162 switch (standard_sse_constant_p (x))
8165 switch (get_attr_mode (insn))
8168 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8169 return "%vpxor\t%0, %d0";
8171 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8172 return "%vxorpd\t%0, %d0";
8174 return "%vxorps\t%0, %d0";
8177 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8178 return "vpxor\t%x0, %x0, %x0";
8180 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8181 return "vxorpd\t%x0, %x0, %x0";
8183 return "vxorps\t%x0, %x0, %x0";
8190 return "%vpcmpeqd\t%0, %d0";
8197 /* Returns true if OP contains a symbol reference */
8200 symbolic_reference_mentioned_p (rtx op)
8205 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8208 fmt = GET_RTX_FORMAT (GET_CODE (op));
8209 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8215 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8216 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8220 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8227 /* Return true if it is appropriate to emit `ret' instructions in the
8228 body of a function. Do this only if the epilogue is simple, needing a
8229 couple of insns. Prior to reloading, we can't tell how many registers
8230 must be saved, so return false then. Return false if there is no frame
8231 marker to de-allocate. */
8234 ix86_can_use_return_insn_p (void)
8236 struct ix86_frame frame;
8238 if (! reload_completed || frame_pointer_needed)
8241 /* Don't allow more than 32k pop, since that's all we can do
8242 with one instruction. */
8243 if (crtl->args.pops_args && crtl->args.size >= 32768)
8246 ix86_compute_frame_layout (&frame);
8247 return (frame.stack_pointer_offset == UNITS_PER_WORD
8248 && (frame.nregs + frame.nsseregs) == 0);
8251 /* Value should be nonzero if functions must have frame pointers.
8252 Zero means the frame pointer need not be set up (and parms may
8253 be accessed via the stack pointer) in functions that seem suitable. */
8256 ix86_frame_pointer_required (void)
8258 /* If we accessed previous frames, then the generated code expects
8259 to be able to access the saved ebp value in our frame. */
8260 if (cfun->machine->accesses_prev_frame)
8263 /* Several x86 os'es need a frame pointer for other reasons,
8264 usually pertaining to setjmp. */
8265 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8268 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8269 turns off the frame pointer by default. Turn it back on now if
8270 we've not got a leaf function. */
8271 if (TARGET_OMIT_LEAF_FRAME_POINTER
8272 && (!current_function_is_leaf
8273 || ix86_current_function_calls_tls_descriptor))
8276 if (crtl->profile && !flag_fentry)
8282 /* Record that the current function accesses previous call frames. */
8285 ix86_setup_frame_addresses (void)
8287 cfun->machine->accesses_prev_frame = 1;
8290 #ifndef USE_HIDDEN_LINKONCE
8291 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8292 # define USE_HIDDEN_LINKONCE 1
8294 # define USE_HIDDEN_LINKONCE 0
8298 static int pic_labels_used;
8300 /* Fills in the label name that should be used for a pc thunk for
8301 the given register. */
8304 get_pc_thunk_name (char name[32], unsigned int regno)
8306 gcc_assert (!TARGET_64BIT);
8308 if (USE_HIDDEN_LINKONCE)
8309 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8311 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8315 /* This function generates code for -fpic that loads %ebx with
8316 the return address of the caller and then returns. */
8319 ix86_code_end (void)
8324 #ifdef TARGET_SOLARIS
8325 solaris_code_end ();
8328 for (regno = AX_REG; regno <= SP_REG; regno++)
8333 if (!(pic_labels_used & (1 << regno)))
8336 get_pc_thunk_name (name, regno);
8338 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8339 get_identifier (name),
8340 build_function_type_list (void_type_node, NULL_TREE));
8341 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8342 NULL_TREE, void_type_node);
8343 TREE_PUBLIC (decl) = 1;
8344 TREE_STATIC (decl) = 1;
8349 switch_to_section (darwin_sections[text_coal_section]);
8350 fputs ("\t.weak_definition\t", asm_out_file);
8351 assemble_name (asm_out_file, name);
8352 fputs ("\n\t.private_extern\t", asm_out_file);
8353 assemble_name (asm_out_file, name);
8354 putc ('\n', asm_out_file);
8355 ASM_OUTPUT_LABEL (asm_out_file, name);
8356 DECL_WEAK (decl) = 1;
8360 if (USE_HIDDEN_LINKONCE)
8362 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8364 targetm.asm_out.unique_section (decl, 0);
8365 switch_to_section (get_named_section (decl, NULL, 0));
8367 targetm.asm_out.globalize_label (asm_out_file, name);
8368 fputs ("\t.hidden\t", asm_out_file);
8369 assemble_name (asm_out_file, name);
8370 putc ('\n', asm_out_file);
8371 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8375 switch_to_section (text_section);
8376 ASM_OUTPUT_LABEL (asm_out_file, name);
8379 DECL_INITIAL (decl) = make_node (BLOCK);
8380 current_function_decl = decl;
8381 init_function_start (decl);
8382 first_function_block_is_cold = false;
8383 /* Make sure unwind info is emitted for the thunk if needed. */
8384 final_start_function (emit_barrier (), asm_out_file, 1);
8386 /* Pad stack IP move with 4 instructions (two NOPs count
8387 as one instruction). */
8388 if (TARGET_PAD_SHORT_FUNCTION)
8393 fputs ("\tnop\n", asm_out_file);
8396 xops[0] = gen_rtx_REG (Pmode, regno);
8397 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8398 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8399 fputs ("\tret\n", asm_out_file);
8400 final_end_function ();
8401 init_insn_lengths ();
8402 free_after_compilation (cfun);
8404 current_function_decl = NULL;
8407 if (flag_split_stack)
8408 file_end_indicate_split_stack ();
8411 /* Emit code for the SET_GOT patterns. */
8414 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8420 if (TARGET_VXWORKS_RTP && flag_pic)
8422 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8423 xops[2] = gen_rtx_MEM (Pmode,
8424 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8425 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8427 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8428 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8429 an unadorned address. */
8430 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8431 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8432 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8436 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8440 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8442 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8445 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8446 is what will be referenced by the Mach-O PIC subsystem. */
8448 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8451 targetm.asm_out.internal_label (asm_out_file, "L",
8452 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8457 get_pc_thunk_name (name, REGNO (dest));
8458 pic_labels_used |= 1 << REGNO (dest);
8460 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8461 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8462 output_asm_insn ("call\t%X2", xops);
8463 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8464 is what will be referenced by the Mach-O PIC subsystem. */
8467 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8469 targetm.asm_out.internal_label (asm_out_file, "L",
8470 CODE_LABEL_NUMBER (label));
8475 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8480 /* Generate an "push" pattern for input ARG. */
8485 struct machine_function *m = cfun->machine;
8487 if (m->fs.cfa_reg == stack_pointer_rtx)
8488 m->fs.cfa_offset += UNITS_PER_WORD;
8489 m->fs.sp_offset += UNITS_PER_WORD;
8491 return gen_rtx_SET (VOIDmode,
8493 gen_rtx_PRE_DEC (Pmode,
8494 stack_pointer_rtx)),
8498 /* Generate an "pop" pattern for input ARG. */
8503 return gen_rtx_SET (VOIDmode,
8506 gen_rtx_POST_INC (Pmode,
8507 stack_pointer_rtx)));
8510 /* Return >= 0 if there is an unused call-clobbered register available
8511 for the entire function. */
8514 ix86_select_alt_pic_regnum (void)
8516 if (current_function_is_leaf
8518 && !ix86_current_function_calls_tls_descriptor)
8521 /* Can't use the same register for both PIC and DRAP. */
8523 drap = REGNO (crtl->drap_reg);
8526 for (i = 2; i >= 0; --i)
8527 if (i != drap && !df_regs_ever_live_p (i))
8531 return INVALID_REGNUM;
8534 /* Return TRUE if we need to save REGNO. */
8537 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8539 if (pic_offset_table_rtx
8540 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8541 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8543 || crtl->calls_eh_return
8544 || crtl->uses_const_pool))
8545 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8547 if (crtl->calls_eh_return && maybe_eh_return)
8552 unsigned test = EH_RETURN_DATA_REGNO (i);
8553 if (test == INVALID_REGNUM)
8560 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8563 return (df_regs_ever_live_p (regno)
8564 && !call_used_regs[regno]
8565 && !fixed_regs[regno]
8566 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8569 /* Return number of saved general prupose registers. */
8572 ix86_nsaved_regs (void)
8577 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8578 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8583 /* Return number of saved SSE registrers. */
8586 ix86_nsaved_sseregs (void)
8591 if (!TARGET_64BIT_MS_ABI)
8593 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8594 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8599 /* Given FROM and TO register numbers, say whether this elimination is
8600 allowed. If stack alignment is needed, we can only replace argument
8601 pointer with hard frame pointer, or replace frame pointer with stack
8602 pointer. Otherwise, frame pointer elimination is automatically
8603 handled and all other eliminations are valid. */
8606 ix86_can_eliminate (const int from, const int to)
8608 if (stack_realign_fp)
8609 return ((from == ARG_POINTER_REGNUM
8610 && to == HARD_FRAME_POINTER_REGNUM)
8611 || (from == FRAME_POINTER_REGNUM
8612 && to == STACK_POINTER_REGNUM));
8614 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8617 /* Return the offset between two registers, one to be eliminated, and the other
8618 its replacement, at the start of a routine. */
8621 ix86_initial_elimination_offset (int from, int to)
8623 struct ix86_frame frame;
8624 ix86_compute_frame_layout (&frame);
8626 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8627 return frame.hard_frame_pointer_offset;
8628 else if (from == FRAME_POINTER_REGNUM
8629 && to == HARD_FRAME_POINTER_REGNUM)
8630 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8633 gcc_assert (to == STACK_POINTER_REGNUM);
8635 if (from == ARG_POINTER_REGNUM)
8636 return frame.stack_pointer_offset;
8638 gcc_assert (from == FRAME_POINTER_REGNUM);
8639 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8643 /* In a dynamically-aligned function, we can't know the offset from
8644 stack pointer to frame pointer, so we must ensure that setjmp
8645 eliminates fp against the hard fp (%ebp) rather than trying to
8646 index from %esp up to the top of the frame across a gap that is
8647 of unknown (at compile-time) size. */
8649 ix86_builtin_setjmp_frame_value (void)
8651 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8654 /* When using -fsplit-stack, the allocation routines set a field in
8655 the TCB to the bottom of the stack plus this much space, measured
8658 #define SPLIT_STACK_AVAILABLE 256
8660 /* Fill structure ix86_frame about frame of currently computed function. */
8663 ix86_compute_frame_layout (struct ix86_frame *frame)
8665 unsigned int stack_alignment_needed;
8666 HOST_WIDE_INT offset;
8667 unsigned int preferred_alignment;
8668 HOST_WIDE_INT size = get_frame_size ();
8669 HOST_WIDE_INT to_allocate;
8671 frame->nregs = ix86_nsaved_regs ();
8672 frame->nsseregs = ix86_nsaved_sseregs ();
8674 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8675 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8677 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8678 function prologues and leaf. */
8679 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8680 && (!current_function_is_leaf || cfun->calls_alloca != 0
8681 || ix86_current_function_calls_tls_descriptor))
8683 preferred_alignment = 16;
8684 stack_alignment_needed = 16;
8685 crtl->preferred_stack_boundary = 128;
8686 crtl->stack_alignment_needed = 128;
8689 gcc_assert (!size || stack_alignment_needed);
8690 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8691 gcc_assert (preferred_alignment <= stack_alignment_needed);
8693 /* For SEH we have to limit the amount of code movement into the prologue.
8694 At present we do this via a BLOCKAGE, at which point there's very little
8695 scheduling that can be done, which means that there's very little point
8696 in doing anything except PUSHs. */
8698 cfun->machine->use_fast_prologue_epilogue = false;
8700 /* During reload iteration the amount of registers saved can change.
8701 Recompute the value as needed. Do not recompute when amount of registers
8702 didn't change as reload does multiple calls to the function and does not
8703 expect the decision to change within single iteration. */
8704 else if (!optimize_function_for_size_p (cfun)
8705 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8707 int count = frame->nregs;
8708 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8710 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8712 /* The fast prologue uses move instead of push to save registers. This
8713 is significantly longer, but also executes faster as modern hardware
8714 can execute the moves in parallel, but can't do that for push/pop.
8716 Be careful about choosing what prologue to emit: When function takes
8717 many instructions to execute we may use slow version as well as in
8718 case function is known to be outside hot spot (this is known with
8719 feedback only). Weight the size of function by number of registers
8720 to save as it is cheap to use one or two push instructions but very
8721 slow to use many of them. */
8723 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8724 if (node->frequency < NODE_FREQUENCY_NORMAL
8725 || (flag_branch_probabilities
8726 && node->frequency < NODE_FREQUENCY_HOT))
8727 cfun->machine->use_fast_prologue_epilogue = false;
8729 cfun->machine->use_fast_prologue_epilogue
8730 = !expensive_function_p (count);
8732 if (TARGET_PROLOGUE_USING_MOVE
8733 && cfun->machine->use_fast_prologue_epilogue)
8734 frame->save_regs_using_mov = true;
8736 frame->save_regs_using_mov = false;
8738 /* If static stack checking is enabled and done with probes, the registers
8739 need to be saved before allocating the frame. */
8740 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8741 frame->save_regs_using_mov = false;
8743 /* Skip return address. */
8744 offset = UNITS_PER_WORD;
8746 /* Skip pushed static chain. */
8747 if (ix86_static_chain_on_stack)
8748 offset += UNITS_PER_WORD;
8750 /* Skip saved base pointer. */
8751 if (frame_pointer_needed)
8752 offset += UNITS_PER_WORD;
8753 frame->hfp_save_offset = offset;
8755 /* The traditional frame pointer location is at the top of the frame. */
8756 frame->hard_frame_pointer_offset = offset;
8758 /* Register save area */
8759 offset += frame->nregs * UNITS_PER_WORD;
8760 frame->reg_save_offset = offset;
8762 /* Align and set SSE register save area. */
8763 if (frame->nsseregs)
8765 /* The only ABI that has saved SSE registers (Win64) also has a
8766 16-byte aligned default stack, and thus we don't need to be
8767 within the re-aligned local stack frame to save them. */
8768 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8769 offset = (offset + 16 - 1) & -16;
8770 offset += frame->nsseregs * 16;
8772 frame->sse_reg_save_offset = offset;
8774 /* The re-aligned stack starts here. Values before this point are not
8775 directly comparable with values below this point. In order to make
8776 sure that no value happens to be the same before and after, force
8777 the alignment computation below to add a non-zero value. */
8778 if (stack_realign_fp)
8779 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8782 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8783 offset += frame->va_arg_size;
8785 /* Align start of frame for local function. */
8786 if (stack_realign_fp
8787 || offset != frame->sse_reg_save_offset
8789 || !current_function_is_leaf
8790 || cfun->calls_alloca
8791 || ix86_current_function_calls_tls_descriptor)
8792 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8794 /* Frame pointer points here. */
8795 frame->frame_pointer_offset = offset;
8799 /* Add outgoing arguments area. Can be skipped if we eliminated
8800 all the function calls as dead code.
8801 Skipping is however impossible when function calls alloca. Alloca
8802 expander assumes that last crtl->outgoing_args_size
8803 of stack frame are unused. */
8804 if (ACCUMULATE_OUTGOING_ARGS
8805 && (!current_function_is_leaf || cfun->calls_alloca
8806 || ix86_current_function_calls_tls_descriptor))
8808 offset += crtl->outgoing_args_size;
8809 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8812 frame->outgoing_arguments_size = 0;
8814 /* Align stack boundary. Only needed if we're calling another function
8816 if (!current_function_is_leaf || cfun->calls_alloca
8817 || ix86_current_function_calls_tls_descriptor)
8818 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8820 /* We've reached end of stack frame. */
8821 frame->stack_pointer_offset = offset;
8823 /* Size prologue needs to allocate. */
8824 to_allocate = offset - frame->sse_reg_save_offset;
8826 if ((!to_allocate && frame->nregs <= 1)
8827 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8828 frame->save_regs_using_mov = false;
8830 if (ix86_using_red_zone ()
8831 && current_function_sp_is_unchanging
8832 && current_function_is_leaf
8833 && !ix86_current_function_calls_tls_descriptor)
8835 frame->red_zone_size = to_allocate;
8836 if (frame->save_regs_using_mov)
8837 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8838 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8839 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8842 frame->red_zone_size = 0;
8843 frame->stack_pointer_offset -= frame->red_zone_size;
8845 /* The SEH frame pointer location is near the bottom of the frame.
8846 This is enforced by the fact that the difference between the
8847 stack pointer and the frame pointer is limited to 240 bytes in
8848 the unwind data structure. */
8853 /* If we can leave the frame pointer where it is, do so. */
8854 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
8855 if (diff > 240 || (diff & 15) != 0)
8857 /* Ideally we'd determine what portion of the local stack frame
8858 (within the constraint of the lowest 240) is most heavily used.
8859 But without that complication, simply bias the frame pointer
8860 by 128 bytes so as to maximize the amount of the local stack
8861 frame that is addressable with 8-bit offsets. */
8862 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
8867 /* This is semi-inlined memory_address_length, but simplified
8868 since we know that we're always dealing with reg+offset, and
8869 to avoid having to create and discard all that rtl. */
8872 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8878 /* EBP and R13 cannot be encoded without an offset. */
8879 len = (regno == BP_REG || regno == R13_REG);
8881 else if (IN_RANGE (offset, -128, 127))
8884 /* ESP and R12 must be encoded with a SIB byte. */
8885 if (regno == SP_REG || regno == R12_REG)
8891 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8892 The valid base registers are taken from CFUN->MACHINE->FS. */
8895 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8897 const struct machine_function *m = cfun->machine;
8898 rtx base_reg = NULL;
8899 HOST_WIDE_INT base_offset = 0;
8901 if (m->use_fast_prologue_epilogue)
8903 /* Choose the base register most likely to allow the most scheduling
8904 opportunities. Generally FP is valid througout the function,
8905 while DRAP must be reloaded within the epilogue. But choose either
8906 over the SP due to increased encoding size. */
8910 base_reg = hard_frame_pointer_rtx;
8911 base_offset = m->fs.fp_offset - cfa_offset;
8913 else if (m->fs.drap_valid)
8915 base_reg = crtl->drap_reg;
8916 base_offset = 0 - cfa_offset;
8918 else if (m->fs.sp_valid)
8920 base_reg = stack_pointer_rtx;
8921 base_offset = m->fs.sp_offset - cfa_offset;
8926 HOST_WIDE_INT toffset;
8929 /* Choose the base register with the smallest address encoding.
8930 With a tie, choose FP > DRAP > SP. */
8933 base_reg = stack_pointer_rtx;
8934 base_offset = m->fs.sp_offset - cfa_offset;
8935 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8937 if (m->fs.drap_valid)
8939 toffset = 0 - cfa_offset;
8940 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8943 base_reg = crtl->drap_reg;
8944 base_offset = toffset;
8950 toffset = m->fs.fp_offset - cfa_offset;
8951 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8954 base_reg = hard_frame_pointer_rtx;
8955 base_offset = toffset;
8960 gcc_assert (base_reg != NULL);
8962 return plus_constant (base_reg, base_offset);
8965 /* Emit code to save registers in the prologue. */
8968 ix86_emit_save_regs (void)
8973 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8974 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8976 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8977 RTX_FRAME_RELATED_P (insn) = 1;
8981 /* Emit a single register save at CFA - CFA_OFFSET. */
8984 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8985 HOST_WIDE_INT cfa_offset)
8987 struct machine_function *m = cfun->machine;
8988 rtx reg = gen_rtx_REG (mode, regno);
8989 rtx mem, addr, base, insn;
8991 addr = choose_baseaddr (cfa_offset);
8992 mem = gen_frame_mem (mode, addr);
8994 /* For SSE saves, we need to indicate the 128-bit alignment. */
8995 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
8997 insn = emit_move_insn (mem, reg);
8998 RTX_FRAME_RELATED_P (insn) = 1;
9001 if (GET_CODE (base) == PLUS)
9002 base = XEXP (base, 0);
9003 gcc_checking_assert (REG_P (base));
9005 /* When saving registers into a re-aligned local stack frame, avoid
9006 any tricky guessing by dwarf2out. */
9007 if (m->fs.realigned)
9009 gcc_checking_assert (stack_realign_drap);
9011 if (regno == REGNO (crtl->drap_reg))
9013 /* A bit of a hack. We force the DRAP register to be saved in
9014 the re-aligned stack frame, which provides us with a copy
9015 of the CFA that will last past the prologue. Install it. */
9016 gcc_checking_assert (cfun->machine->fs.fp_valid);
9017 addr = plus_constant (hard_frame_pointer_rtx,
9018 cfun->machine->fs.fp_offset - cfa_offset);
9019 mem = gen_rtx_MEM (mode, addr);
9020 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9024 /* The frame pointer is a stable reference within the
9025 aligned frame. Use it. */
9026 gcc_checking_assert (cfun->machine->fs.fp_valid);
9027 addr = plus_constant (hard_frame_pointer_rtx,
9028 cfun->machine->fs.fp_offset - cfa_offset);
9029 mem = gen_rtx_MEM (mode, addr);
9030 add_reg_note (insn, REG_CFA_EXPRESSION,
9031 gen_rtx_SET (VOIDmode, mem, reg));
9035 /* The memory may not be relative to the current CFA register,
9036 which means that we may need to generate a new pattern for
9037 use by the unwind info. */
9038 else if (base != m->fs.cfa_reg)
9040 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9041 mem = gen_rtx_MEM (mode, addr);
9042 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9046 /* Emit code to save registers using MOV insns.
9047 First register is stored at CFA - CFA_OFFSET. */
9049 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9053 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9054 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9056 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9057 cfa_offset -= UNITS_PER_WORD;
9061 /* Emit code to save SSE registers using MOV insns.
9062 First register is stored at CFA - CFA_OFFSET. */
9064 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9068 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9069 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9071 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9076 static GTY(()) rtx queued_cfa_restores;
9078 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9079 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9080 Don't add the note if the previously saved value will be left untouched
9081 within stack red-zone till return, as unwinders can find the same value
9082 in the register and on the stack. */
9085 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9087 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9092 add_reg_note (insn, REG_CFA_RESTORE, reg);
9093 RTX_FRAME_RELATED_P (insn) = 1;
9097 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9100 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9103 ix86_add_queued_cfa_restore_notes (rtx insn)
9106 if (!queued_cfa_restores)
9108 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9110 XEXP (last, 1) = REG_NOTES (insn);
9111 REG_NOTES (insn) = queued_cfa_restores;
9112 queued_cfa_restores = NULL_RTX;
9113 RTX_FRAME_RELATED_P (insn) = 1;
9116 /* Expand prologue or epilogue stack adjustment.
9117 The pattern exist to put a dependency on all ebp-based memory accesses.
9118 STYLE should be negative if instructions should be marked as frame related,
9119 zero if %r11 register is live and cannot be freely used and positive
9123 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9124 int style, bool set_cfa)
9126 struct machine_function *m = cfun->machine;
9128 bool add_frame_related_expr = false;
9131 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9132 else if (x86_64_immediate_operand (offset, DImode))
9133 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9137 /* r11 is used by indirect sibcall return as well, set before the
9138 epilogue and used after the epilogue. */
9140 tmp = gen_rtx_REG (DImode, R11_REG);
9143 gcc_assert (src != hard_frame_pointer_rtx
9144 && dest != hard_frame_pointer_rtx);
9145 tmp = hard_frame_pointer_rtx;
9147 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9149 add_frame_related_expr = true;
9151 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9154 insn = emit_insn (insn);
9156 ix86_add_queued_cfa_restore_notes (insn);
9162 gcc_assert (m->fs.cfa_reg == src);
9163 m->fs.cfa_offset += INTVAL (offset);
9164 m->fs.cfa_reg = dest;
9166 r = gen_rtx_PLUS (Pmode, src, offset);
9167 r = gen_rtx_SET (VOIDmode, dest, r);
9168 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9169 RTX_FRAME_RELATED_P (insn) = 1;
9173 RTX_FRAME_RELATED_P (insn) = 1;
9174 if (add_frame_related_expr)
9176 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9177 r = gen_rtx_SET (VOIDmode, dest, r);
9178 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9182 if (dest == stack_pointer_rtx)
9184 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9185 bool valid = m->fs.sp_valid;
9187 if (src == hard_frame_pointer_rtx)
9189 valid = m->fs.fp_valid;
9190 ooffset = m->fs.fp_offset;
9192 else if (src == crtl->drap_reg)
9194 valid = m->fs.drap_valid;
9199 /* Else there are two possibilities: SP itself, which we set
9200 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9201 taken care of this by hand along the eh_return path. */
9202 gcc_checking_assert (src == stack_pointer_rtx
9203 || offset == const0_rtx);
9206 m->fs.sp_offset = ooffset - INTVAL (offset);
9207 m->fs.sp_valid = valid;
9211 /* Find an available register to be used as dynamic realign argument
9212 pointer regsiter. Such a register will be written in prologue and
9213 used in begin of body, so it must not be
9214 1. parameter passing register.
9216 We reuse static-chain register if it is available. Otherwise, we
9217 use DI for i386 and R13 for x86-64. We chose R13 since it has
9220 Return: the regno of chosen register. */
9223 find_drap_reg (void)
9225 tree decl = cfun->decl;
9229 /* Use R13 for nested function or function need static chain.
9230 Since function with tail call may use any caller-saved
9231 registers in epilogue, DRAP must not use caller-saved
9232 register in such case. */
9233 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9240 /* Use DI for nested function or function need static chain.
9241 Since function with tail call may use any caller-saved
9242 registers in epilogue, DRAP must not use caller-saved
9243 register in such case. */
9244 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9247 /* Reuse static chain register if it isn't used for parameter
9249 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9251 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9252 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9259 /* Return minimum incoming stack alignment. */
9262 ix86_minimum_incoming_stack_boundary (bool sibcall)
9264 unsigned int incoming_stack_boundary;
9266 /* Prefer the one specified at command line. */
9267 if (ix86_user_incoming_stack_boundary)
9268 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9269 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9270 if -mstackrealign is used, it isn't used for sibcall check and
9271 estimated stack alignment is 128bit. */
9274 && ix86_force_align_arg_pointer
9275 && crtl->stack_alignment_estimated == 128)
9276 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9278 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9280 /* Incoming stack alignment can be changed on individual functions
9281 via force_align_arg_pointer attribute. We use the smallest
9282 incoming stack boundary. */
9283 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9284 && lookup_attribute (ix86_force_align_arg_pointer_string,
9285 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9286 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9288 /* The incoming stack frame has to be aligned at least at
9289 parm_stack_boundary. */
9290 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9291 incoming_stack_boundary = crtl->parm_stack_boundary;
9293 /* Stack at entrance of main is aligned by runtime. We use the
9294 smallest incoming stack boundary. */
9295 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9296 && DECL_NAME (current_function_decl)
9297 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9298 && DECL_FILE_SCOPE_P (current_function_decl))
9299 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9301 return incoming_stack_boundary;
9304 /* Update incoming stack boundary and estimated stack alignment. */
9307 ix86_update_stack_boundary (void)
9309 ix86_incoming_stack_boundary
9310 = ix86_minimum_incoming_stack_boundary (false);
9312 /* x86_64 vararg needs 16byte stack alignment for register save
9316 && crtl->stack_alignment_estimated < 128)
9317 crtl->stack_alignment_estimated = 128;
9320 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9321 needed or an rtx for DRAP otherwise. */
9324 ix86_get_drap_rtx (void)
9326 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9327 crtl->need_drap = true;
9329 if (stack_realign_drap)
9331 /* Assign DRAP to vDRAP and returns vDRAP */
9332 unsigned int regno = find_drap_reg ();
9337 arg_ptr = gen_rtx_REG (Pmode, regno);
9338 crtl->drap_reg = arg_ptr;
9341 drap_vreg = copy_to_reg (arg_ptr);
9345 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9348 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9349 RTX_FRAME_RELATED_P (insn) = 1;
9357 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9360 ix86_internal_arg_pointer (void)
9362 return virtual_incoming_args_rtx;
9365 struct scratch_reg {
9370 /* Return a short-lived scratch register for use on function entry.
9371 In 32-bit mode, it is valid only after the registers are saved
9372 in the prologue. This register must be released by means of
9373 release_scratch_register_on_entry once it is dead. */
9376 get_scratch_register_on_entry (struct scratch_reg *sr)
9384 /* We always use R11 in 64-bit mode. */
9389 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9391 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9392 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9393 int regparm = ix86_function_regparm (fntype, decl);
9395 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9397 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9398 for the static chain register. */
9399 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9400 && drap_regno != AX_REG)
9402 else if (regparm < 2 && drap_regno != DX_REG)
9404 /* ecx is the static chain register. */
9405 else if (regparm < 3 && !fastcall_p && !static_chain_p
9406 && drap_regno != CX_REG)
9408 else if (ix86_save_reg (BX_REG, true))
9410 /* esi is the static chain register. */
9411 else if (!(regparm == 3 && static_chain_p)
9412 && ix86_save_reg (SI_REG, true))
9414 else if (ix86_save_reg (DI_REG, true))
9418 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9423 sr->reg = gen_rtx_REG (Pmode, regno);
9426 rtx insn = emit_insn (gen_push (sr->reg));
9427 RTX_FRAME_RELATED_P (insn) = 1;
9431 /* Release a scratch register obtained from the preceding function. */
9434 release_scratch_register_on_entry (struct scratch_reg *sr)
9438 rtx x, insn = emit_insn (gen_pop (sr->reg));
9440 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9441 RTX_FRAME_RELATED_P (insn) = 1;
9442 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9443 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9444 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9448 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9450 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9453 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9455 /* We skip the probe for the first interval + a small dope of 4 words and
9456 probe that many bytes past the specified size to maintain a protection
9457 area at the botton of the stack. */
9458 const int dope = 4 * UNITS_PER_WORD;
9459 rtx size_rtx = GEN_INT (size), last;
9461 /* See if we have a constant small number of probes to generate. If so,
9462 that's the easy case. The run-time loop is made up of 11 insns in the
9463 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9464 for n # of intervals. */
9465 if (size <= 5 * PROBE_INTERVAL)
9467 HOST_WIDE_INT i, adjust;
9468 bool first_probe = true;
9470 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9471 values of N from 1 until it exceeds SIZE. If only one probe is
9472 needed, this will not generate any code. Then adjust and probe
9473 to PROBE_INTERVAL + SIZE. */
9474 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9478 adjust = 2 * PROBE_INTERVAL + dope;
9479 first_probe = false;
9482 adjust = PROBE_INTERVAL;
9484 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9485 plus_constant (stack_pointer_rtx, -adjust)));
9486 emit_stack_probe (stack_pointer_rtx);
9490 adjust = size + PROBE_INTERVAL + dope;
9492 adjust = size + PROBE_INTERVAL - i;
9494 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9495 plus_constant (stack_pointer_rtx, -adjust)));
9496 emit_stack_probe (stack_pointer_rtx);
9498 /* Adjust back to account for the additional first interval. */
9499 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9500 plus_constant (stack_pointer_rtx,
9501 PROBE_INTERVAL + dope)));
9504 /* Otherwise, do the same as above, but in a loop. Note that we must be
9505 extra careful with variables wrapping around because we might be at
9506 the very top (or the very bottom) of the address space and we have
9507 to be able to handle this case properly; in particular, we use an
9508 equality test for the loop condition. */
9511 HOST_WIDE_INT rounded_size;
9512 struct scratch_reg sr;
9514 get_scratch_register_on_entry (&sr);
9517 /* Step 1: round SIZE to the previous multiple of the interval. */
9519 rounded_size = size & -PROBE_INTERVAL;
9522 /* Step 2: compute initial and final value of the loop counter. */
9524 /* SP = SP_0 + PROBE_INTERVAL. */
9525 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9526 plus_constant (stack_pointer_rtx,
9527 - (PROBE_INTERVAL + dope))));
9529 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9530 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9531 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9532 gen_rtx_PLUS (Pmode, sr.reg,
9533 stack_pointer_rtx)));
9538 while (SP != LAST_ADDR)
9540 SP = SP + PROBE_INTERVAL
9544 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9545 values of N from 1 until it is equal to ROUNDED_SIZE. */
9547 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9550 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9551 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9553 if (size != rounded_size)
9555 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9556 plus_constant (stack_pointer_rtx,
9557 rounded_size - size)));
9558 emit_stack_probe (stack_pointer_rtx);
9561 /* Adjust back to account for the additional first interval. */
9562 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9563 plus_constant (stack_pointer_rtx,
9564 PROBE_INTERVAL + dope)));
9566 release_scratch_register_on_entry (&sr);
9569 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9571 /* Even if the stack pointer isn't the CFA register, we need to correctly
9572 describe the adjustments made to it, in particular differentiate the
9573 frame-related ones from the frame-unrelated ones. */
9576 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9577 XVECEXP (expr, 0, 0)
9578 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9579 plus_constant (stack_pointer_rtx, -size));
9580 XVECEXP (expr, 0, 1)
9581 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9582 plus_constant (stack_pointer_rtx,
9583 PROBE_INTERVAL + dope + size));
9584 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9585 RTX_FRAME_RELATED_P (last) = 1;
9587 cfun->machine->fs.sp_offset += size;
9590 /* Make sure nothing is scheduled before we are done. */
9591 emit_insn (gen_blockage ());
9594 /* Adjust the stack pointer up to REG while probing it. */
9597 output_adjust_stack_and_probe (rtx reg)
9599 static int labelno = 0;
9600 char loop_lab[32], end_lab[32];
9603 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9604 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9606 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9608 /* Jump to END_LAB if SP == LAST_ADDR. */
9609 xops[0] = stack_pointer_rtx;
9611 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9612 fputs ("\tje\t", asm_out_file);
9613 assemble_name_raw (asm_out_file, end_lab);
9614 fputc ('\n', asm_out_file);
9616 /* SP = SP + PROBE_INTERVAL. */
9617 xops[1] = GEN_INT (PROBE_INTERVAL);
9618 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9621 xops[1] = const0_rtx;
9622 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9624 fprintf (asm_out_file, "\tjmp\t");
9625 assemble_name_raw (asm_out_file, loop_lab);
9626 fputc ('\n', asm_out_file);
9628 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9633 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9634 inclusive. These are offsets from the current stack pointer. */
9637 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9639 /* See if we have a constant small number of probes to generate. If so,
9640 that's the easy case. The run-time loop is made up of 7 insns in the
9641 generic case while the compile-time loop is made up of n insns for n #
9643 if (size <= 7 * PROBE_INTERVAL)
9647 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9648 it exceeds SIZE. If only one probe is needed, this will not
9649 generate any code. Then probe at FIRST + SIZE. */
9650 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9651 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9653 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9656 /* Otherwise, do the same as above, but in a loop. Note that we must be
9657 extra careful with variables wrapping around because we might be at
9658 the very top (or the very bottom) of the address space and we have
9659 to be able to handle this case properly; in particular, we use an
9660 equality test for the loop condition. */
9663 HOST_WIDE_INT rounded_size, last;
9664 struct scratch_reg sr;
9666 get_scratch_register_on_entry (&sr);
9669 /* Step 1: round SIZE to the previous multiple of the interval. */
9671 rounded_size = size & -PROBE_INTERVAL;
9674 /* Step 2: compute initial and final value of the loop counter. */
9676 /* TEST_OFFSET = FIRST. */
9677 emit_move_insn (sr.reg, GEN_INT (-first));
9679 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9680 last = first + rounded_size;
9685 while (TEST_ADDR != LAST_ADDR)
9687 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9691 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9692 until it is equal to ROUNDED_SIZE. */
9694 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9697 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9698 that SIZE is equal to ROUNDED_SIZE. */
9700 if (size != rounded_size)
9701 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9704 rounded_size - size));
9706 release_scratch_register_on_entry (&sr);
9709 /* Make sure nothing is scheduled before we are done. */
9710 emit_insn (gen_blockage ());
9713 /* Probe a range of stack addresses from REG to END, inclusive. These are
9714 offsets from the current stack pointer. */
9717 output_probe_stack_range (rtx reg, rtx end)
9719 static int labelno = 0;
9720 char loop_lab[32], end_lab[32];
9723 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9724 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9726 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9728 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9731 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9732 fputs ("\tje\t", asm_out_file);
9733 assemble_name_raw (asm_out_file, end_lab);
9734 fputc ('\n', asm_out_file);
9736 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9737 xops[1] = GEN_INT (PROBE_INTERVAL);
9738 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9740 /* Probe at TEST_ADDR. */
9741 xops[0] = stack_pointer_rtx;
9743 xops[2] = const0_rtx;
9744 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9746 fprintf (asm_out_file, "\tjmp\t");
9747 assemble_name_raw (asm_out_file, loop_lab);
9748 fputc ('\n', asm_out_file);
9750 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9755 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9756 to be generated in correct form. */
9758 ix86_finalize_stack_realign_flags (void)
9760 /* Check if stack realign is really needed after reload, and
9761 stores result in cfun */
9762 unsigned int incoming_stack_boundary
9763 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9764 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9765 unsigned int stack_realign = (incoming_stack_boundary
9766 < (current_function_is_leaf
9767 ? crtl->max_used_stack_slot_alignment
9768 : crtl->stack_alignment_needed));
9770 if (crtl->stack_realign_finalized)
9772 /* After stack_realign_needed is finalized, we can't no longer
9774 gcc_assert (crtl->stack_realign_needed == stack_realign);
9778 crtl->stack_realign_needed = stack_realign;
9779 crtl->stack_realign_finalized = true;
9783 /* Expand the prologue into a bunch of separate insns. */
9786 ix86_expand_prologue (void)
9788 struct machine_function *m = cfun->machine;
9791 struct ix86_frame frame;
9792 HOST_WIDE_INT allocate;
9793 bool int_registers_saved;
9795 ix86_finalize_stack_realign_flags ();
9797 /* DRAP should not coexist with stack_realign_fp */
9798 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9800 memset (&m->fs, 0, sizeof (m->fs));
9802 /* Initialize CFA state for before the prologue. */
9803 m->fs.cfa_reg = stack_pointer_rtx;
9804 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9806 /* Track SP offset to the CFA. We continue tracking this after we've
9807 swapped the CFA register away from SP. In the case of re-alignment
9808 this is fudged; we're interested to offsets within the local frame. */
9809 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9810 m->fs.sp_valid = true;
9812 ix86_compute_frame_layout (&frame);
9814 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9816 /* We should have already generated an error for any use of
9817 ms_hook on a nested function. */
9818 gcc_checking_assert (!ix86_static_chain_on_stack);
9820 /* Check if profiling is active and we shall use profiling before
9821 prologue variant. If so sorry. */
9822 if (crtl->profile && flag_fentry != 0)
9823 sorry ("ms_hook_prologue attribute isn%'t compatible "
9824 "with -mfentry for 32-bit");
9826 /* In ix86_asm_output_function_label we emitted:
9827 8b ff movl.s %edi,%edi
9829 8b ec movl.s %esp,%ebp
9831 This matches the hookable function prologue in Win32 API
9832 functions in Microsoft Windows XP Service Pack 2 and newer.
9833 Wine uses this to enable Windows apps to hook the Win32 API
9834 functions provided by Wine.
9836 What that means is that we've already set up the frame pointer. */
9838 if (frame_pointer_needed
9839 && !(crtl->drap_reg && crtl->stack_realign_needed))
9843 /* We've decided to use the frame pointer already set up.
9844 Describe this to the unwinder by pretending that both
9845 push and mov insns happen right here.
9847 Putting the unwind info here at the end of the ms_hook
9848 is done so that we can make absolutely certain we get
9849 the required byte sequence at the start of the function,
9850 rather than relying on an assembler that can produce
9851 the exact encoding required.
9853 However it does mean (in the unpatched case) that we have
9854 a 1 insn window where the asynchronous unwind info is
9855 incorrect. However, if we placed the unwind info at
9856 its correct location we would have incorrect unwind info
9857 in the patched case. Which is probably all moot since
9858 I don't expect Wine generates dwarf2 unwind info for the
9859 system libraries that use this feature. */
9861 insn = emit_insn (gen_blockage ());
9863 push = gen_push (hard_frame_pointer_rtx);
9864 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9866 RTX_FRAME_RELATED_P (push) = 1;
9867 RTX_FRAME_RELATED_P (mov) = 1;
9869 RTX_FRAME_RELATED_P (insn) = 1;
9870 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9871 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9873 /* Note that gen_push incremented m->fs.cfa_offset, even
9874 though we didn't emit the push insn here. */
9875 m->fs.cfa_reg = hard_frame_pointer_rtx;
9876 m->fs.fp_offset = m->fs.cfa_offset;
9877 m->fs.fp_valid = true;
9881 /* The frame pointer is not needed so pop %ebp again.
9882 This leaves us with a pristine state. */
9883 emit_insn (gen_pop (hard_frame_pointer_rtx));
9887 /* The first insn of a function that accepts its static chain on the
9888 stack is to push the register that would be filled in by a direct
9889 call. This insn will be skipped by the trampoline. */
9890 else if (ix86_static_chain_on_stack)
9892 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9893 emit_insn (gen_blockage ());
9895 /* We don't want to interpret this push insn as a register save,
9896 only as a stack adjustment. The real copy of the register as
9897 a save will be done later, if needed. */
9898 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9899 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9900 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9901 RTX_FRAME_RELATED_P (insn) = 1;
9904 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9905 of DRAP is needed and stack realignment is really needed after reload */
9906 if (stack_realign_drap)
9908 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9910 /* Only need to push parameter pointer reg if it is caller saved. */
9911 if (!call_used_regs[REGNO (crtl->drap_reg)])
9913 /* Push arg pointer reg */
9914 insn = emit_insn (gen_push (crtl->drap_reg));
9915 RTX_FRAME_RELATED_P (insn) = 1;
9918 /* Grab the argument pointer. */
9919 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9920 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9921 RTX_FRAME_RELATED_P (insn) = 1;
9922 m->fs.cfa_reg = crtl->drap_reg;
9923 m->fs.cfa_offset = 0;
9925 /* Align the stack. */
9926 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9928 GEN_INT (-align_bytes)));
9929 RTX_FRAME_RELATED_P (insn) = 1;
9931 /* Replicate the return address on the stack so that return
9932 address can be reached via (argp - 1) slot. This is needed
9933 to implement macro RETURN_ADDR_RTX and intrinsic function
9934 expand_builtin_return_addr etc. */
9935 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9936 t = gen_frame_mem (Pmode, t);
9937 insn = emit_insn (gen_push (t));
9938 RTX_FRAME_RELATED_P (insn) = 1;
9940 /* For the purposes of frame and register save area addressing,
9941 we've started over with a new frame. */
9942 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9943 m->fs.realigned = true;
9946 if (frame_pointer_needed && !m->fs.fp_valid)
9948 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9949 slower on all targets. Also sdb doesn't like it. */
9950 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9951 RTX_FRAME_RELATED_P (insn) = 1;
9953 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
9955 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9956 RTX_FRAME_RELATED_P (insn) = 1;
9958 if (m->fs.cfa_reg == stack_pointer_rtx)
9959 m->fs.cfa_reg = hard_frame_pointer_rtx;
9960 m->fs.fp_offset = m->fs.sp_offset;
9961 m->fs.fp_valid = true;
9965 int_registers_saved = (frame.nregs == 0);
9967 if (!int_registers_saved)
9969 /* If saving registers via PUSH, do so now. */
9970 if (!frame.save_regs_using_mov)
9972 ix86_emit_save_regs ();
9973 int_registers_saved = true;
9974 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9977 /* When using red zone we may start register saving before allocating
9978 the stack frame saving one cycle of the prologue. However, avoid
9979 doing this if we have to probe the stack; at least on x86_64 the
9980 stack probe can turn into a call that clobbers a red zone location. */
9981 else if (ix86_using_red_zone ()
9982 && (! TARGET_STACK_PROBE
9983 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9985 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9986 int_registers_saved = true;
9990 if (stack_realign_fp)
9992 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9993 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9995 /* The computation of the size of the re-aligned stack frame means
9996 that we must allocate the size of the register save area before
9997 performing the actual alignment. Otherwise we cannot guarantee
9998 that there's enough storage above the realignment point. */
9999 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10000 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10001 GEN_INT (m->fs.sp_offset
10002 - frame.sse_reg_save_offset),
10005 /* Align the stack. */
10006 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10008 GEN_INT (-align_bytes)));
10010 /* For the purposes of register save area addressing, the stack
10011 pointer is no longer valid. As for the value of sp_offset,
10012 see ix86_compute_frame_layout, which we need to match in order
10013 to pass verification of stack_pointer_offset at the end. */
10014 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10015 m->fs.sp_valid = false;
10018 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10020 if (flag_stack_usage_info)
10022 /* We start to count from ARG_POINTER. */
10023 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10025 /* If it was realigned, take into account the fake frame. */
10026 if (stack_realign_drap)
10028 if (ix86_static_chain_on_stack)
10029 stack_size += UNITS_PER_WORD;
10031 if (!call_used_regs[REGNO (crtl->drap_reg)])
10032 stack_size += UNITS_PER_WORD;
10034 /* This over-estimates by 1 minimal-stack-alignment-unit but
10035 mitigates that by counting in the new return address slot. */
10036 current_function_dynamic_stack_size
10037 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10040 current_function_static_stack_size = stack_size;
10043 /* The stack has already been decremented by the instruction calling us
10044 so probe if the size is non-negative to preserve the protection area. */
10045 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10047 /* We expect the registers to be saved when probes are used. */
10048 gcc_assert (int_registers_saved);
10050 if (STACK_CHECK_MOVING_SP)
10052 ix86_adjust_stack_and_probe (allocate);
10057 HOST_WIDE_INT size = allocate;
10059 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10060 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10062 if (TARGET_STACK_PROBE)
10063 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10065 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10071 else if (!ix86_target_stack_probe ()
10072 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10074 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10075 GEN_INT (-allocate), -1,
10076 m->fs.cfa_reg == stack_pointer_rtx);
10080 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10082 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10084 bool eax_live = false;
10085 bool r10_live = false;
10088 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10089 if (!TARGET_64BIT_MS_ABI)
10090 eax_live = ix86_eax_live_at_start_p ();
10094 emit_insn (gen_push (eax));
10095 allocate -= UNITS_PER_WORD;
10099 r10 = gen_rtx_REG (Pmode, R10_REG);
10100 emit_insn (gen_push (r10));
10101 allocate -= UNITS_PER_WORD;
10104 emit_move_insn (eax, GEN_INT (allocate));
10105 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10107 /* Use the fact that AX still contains ALLOCATE. */
10108 adjust_stack_insn = (TARGET_64BIT
10109 ? gen_pro_epilogue_adjust_stack_di_sub
10110 : gen_pro_epilogue_adjust_stack_si_sub);
10112 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10113 stack_pointer_rtx, eax));
10115 /* Note that SEH directives need to continue tracking the stack
10116 pointer even after the frame pointer has been set up. */
10117 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10119 if (m->fs.cfa_reg == stack_pointer_rtx)
10120 m->fs.cfa_offset += allocate;
10122 RTX_FRAME_RELATED_P (insn) = 1;
10123 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10124 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10125 plus_constant (stack_pointer_rtx,
10128 m->fs.sp_offset += allocate;
10130 if (r10_live && eax_live)
10132 t = choose_baseaddr (m->fs.sp_offset - allocate);
10133 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10134 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10135 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10137 else if (eax_live || r10_live)
10139 t = choose_baseaddr (m->fs.sp_offset - allocate);
10140 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10143 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10145 /* If we havn't already set up the frame pointer, do so now. */
10146 if (frame_pointer_needed && !m->fs.fp_valid)
10148 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10149 GEN_INT (frame.stack_pointer_offset
10150 - frame.hard_frame_pointer_offset));
10151 insn = emit_insn (insn);
10152 RTX_FRAME_RELATED_P (insn) = 1;
10153 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10155 if (m->fs.cfa_reg == stack_pointer_rtx)
10156 m->fs.cfa_reg = hard_frame_pointer_rtx;
10157 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10158 m->fs.fp_valid = true;
10161 if (!int_registers_saved)
10162 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10163 if (frame.nsseregs)
10164 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10166 pic_reg_used = false;
10167 if (pic_offset_table_rtx
10168 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10171 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10173 if (alt_pic_reg_used != INVALID_REGNUM)
10174 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10176 pic_reg_used = true;
10183 if (ix86_cmodel == CM_LARGE_PIC)
10185 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10186 rtx label = gen_label_rtx ();
10187 emit_label (label);
10188 LABEL_PRESERVE_P (label) = 1;
10189 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10190 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10191 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10192 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10193 pic_offset_table_rtx, tmp_reg));
10196 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10200 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10201 RTX_FRAME_RELATED_P (insn) = 1;
10202 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10206 /* In the pic_reg_used case, make sure that the got load isn't deleted
10207 when mcount needs it. Blockage to avoid call movement across mcount
10208 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10210 if (crtl->profile && !flag_fentry && pic_reg_used)
10211 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10213 if (crtl->drap_reg && !crtl->stack_realign_needed)
10215 /* vDRAP is setup but after reload it turns out stack realign
10216 isn't necessary, here we will emit prologue to setup DRAP
10217 without stack realign adjustment */
10218 t = choose_baseaddr (0);
10219 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10222 /* Prevent instructions from being scheduled into register save push
10223 sequence when access to the redzone area is done through frame pointer.
10224 The offset between the frame pointer and the stack pointer is calculated
10225 relative to the value of the stack pointer at the end of the function
10226 prologue, and moving instructions that access redzone area via frame
10227 pointer inside push sequence violates this assumption. */
10228 if (frame_pointer_needed && frame.red_zone_size)
10229 emit_insn (gen_memory_blockage ());
10231 /* Emit cld instruction if stringops are used in the function. */
10232 if (TARGET_CLD && ix86_current_function_needs_cld)
10233 emit_insn (gen_cld ());
10235 /* SEH requires that the prologue end within 256 bytes of the start of
10236 the function. Prevent instruction schedules that would extend that. */
10238 emit_insn (gen_blockage ());
10241 /* Emit code to restore REG using a POP insn. */
10244 ix86_emit_restore_reg_using_pop (rtx reg)
10246 struct machine_function *m = cfun->machine;
10247 rtx insn = emit_insn (gen_pop (reg));
10249 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10250 m->fs.sp_offset -= UNITS_PER_WORD;
10252 if (m->fs.cfa_reg == crtl->drap_reg
10253 && REGNO (reg) == REGNO (crtl->drap_reg))
10255 /* Previously we'd represented the CFA as an expression
10256 like *(%ebp - 8). We've just popped that value from
10257 the stack, which means we need to reset the CFA to
10258 the drap register. This will remain until we restore
10259 the stack pointer. */
10260 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10261 RTX_FRAME_RELATED_P (insn) = 1;
10263 /* This means that the DRAP register is valid for addressing too. */
10264 m->fs.drap_valid = true;
10268 if (m->fs.cfa_reg == stack_pointer_rtx)
10270 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10271 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10272 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10273 RTX_FRAME_RELATED_P (insn) = 1;
10275 m->fs.cfa_offset -= UNITS_PER_WORD;
10278 /* When the frame pointer is the CFA, and we pop it, we are
10279 swapping back to the stack pointer as the CFA. This happens
10280 for stack frames that don't allocate other data, so we assume
10281 the stack pointer is now pointing at the return address, i.e.
10282 the function entry state, which makes the offset be 1 word. */
10283 if (reg == hard_frame_pointer_rtx)
10285 m->fs.fp_valid = false;
10286 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10288 m->fs.cfa_reg = stack_pointer_rtx;
10289 m->fs.cfa_offset -= UNITS_PER_WORD;
10291 add_reg_note (insn, REG_CFA_DEF_CFA,
10292 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10293 GEN_INT (m->fs.cfa_offset)));
10294 RTX_FRAME_RELATED_P (insn) = 1;
10299 /* Emit code to restore saved registers using POP insns. */
10302 ix86_emit_restore_regs_using_pop (void)
10304 unsigned int regno;
10306 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10307 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10308 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10311 /* Emit code and notes for the LEAVE instruction. */
10314 ix86_emit_leave (void)
10316 struct machine_function *m = cfun->machine;
10317 rtx insn = emit_insn (ix86_gen_leave ());
10319 ix86_add_queued_cfa_restore_notes (insn);
10321 gcc_assert (m->fs.fp_valid);
10322 m->fs.sp_valid = true;
10323 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10324 m->fs.fp_valid = false;
10326 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10328 m->fs.cfa_reg = stack_pointer_rtx;
10329 m->fs.cfa_offset = m->fs.sp_offset;
10331 add_reg_note (insn, REG_CFA_DEF_CFA,
10332 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10333 RTX_FRAME_RELATED_P (insn) = 1;
10334 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10339 /* Emit code to restore saved registers using MOV insns.
10340 First register is restored from CFA - CFA_OFFSET. */
10342 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10343 bool maybe_eh_return)
10345 struct machine_function *m = cfun->machine;
10346 unsigned int regno;
10348 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10349 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10351 rtx reg = gen_rtx_REG (Pmode, regno);
10354 mem = choose_baseaddr (cfa_offset);
10355 mem = gen_frame_mem (Pmode, mem);
10356 insn = emit_move_insn (reg, mem);
10358 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10360 /* Previously we'd represented the CFA as an expression
10361 like *(%ebp - 8). We've just popped that value from
10362 the stack, which means we need to reset the CFA to
10363 the drap register. This will remain until we restore
10364 the stack pointer. */
10365 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10366 RTX_FRAME_RELATED_P (insn) = 1;
10368 /* This means that the DRAP register is valid for addressing. */
10369 m->fs.drap_valid = true;
10372 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10374 cfa_offset -= UNITS_PER_WORD;
10378 /* Emit code to restore saved registers using MOV insns.
10379 First register is restored from CFA - CFA_OFFSET. */
10381 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10382 bool maybe_eh_return)
10384 unsigned int regno;
10386 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10387 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10389 rtx reg = gen_rtx_REG (V4SFmode, regno);
10392 mem = choose_baseaddr (cfa_offset);
10393 mem = gen_rtx_MEM (V4SFmode, mem);
10394 set_mem_align (mem, 128);
10395 emit_move_insn (reg, mem);
10397 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10403 /* Restore function stack, frame, and registers. */
10406 ix86_expand_epilogue (int style)
10408 struct machine_function *m = cfun->machine;
10409 struct machine_frame_state frame_state_save = m->fs;
10410 struct ix86_frame frame;
10411 bool restore_regs_via_mov;
10414 ix86_finalize_stack_realign_flags ();
10415 ix86_compute_frame_layout (&frame);
10417 m->fs.sp_valid = (!frame_pointer_needed
10418 || (current_function_sp_is_unchanging
10419 && !stack_realign_fp));
10420 gcc_assert (!m->fs.sp_valid
10421 || m->fs.sp_offset == frame.stack_pointer_offset);
10423 /* The FP must be valid if the frame pointer is present. */
10424 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10425 gcc_assert (!m->fs.fp_valid
10426 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10428 /* We must have *some* valid pointer to the stack frame. */
10429 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10431 /* The DRAP is never valid at this point. */
10432 gcc_assert (!m->fs.drap_valid);
10434 /* See the comment about red zone and frame
10435 pointer usage in ix86_expand_prologue. */
10436 if (frame_pointer_needed && frame.red_zone_size)
10437 emit_insn (gen_memory_blockage ());
10439 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10440 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10442 /* Determine the CFA offset of the end of the red-zone. */
10443 m->fs.red_zone_offset = 0;
10444 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10446 /* The red-zone begins below the return address. */
10447 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10449 /* When the register save area is in the aligned portion of
10450 the stack, determine the maximum runtime displacement that
10451 matches up with the aligned frame. */
10452 if (stack_realign_drap)
10453 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10457 /* Special care must be taken for the normal return case of a function
10458 using eh_return: the eax and edx registers are marked as saved, but
10459 not restored along this path. Adjust the save location to match. */
10460 if (crtl->calls_eh_return && style != 2)
10461 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10463 /* EH_RETURN requires the use of moves to function properly. */
10464 if (crtl->calls_eh_return)
10465 restore_regs_via_mov = true;
10466 /* SEH requires the use of pops to identify the epilogue. */
10467 else if (TARGET_SEH)
10468 restore_regs_via_mov = false;
10469 /* If we're only restoring one register and sp is not valid then
10470 using a move instruction to restore the register since it's
10471 less work than reloading sp and popping the register. */
10472 else if (!m->fs.sp_valid && frame.nregs <= 1)
10473 restore_regs_via_mov = true;
10474 else if (TARGET_EPILOGUE_USING_MOVE
10475 && cfun->machine->use_fast_prologue_epilogue
10476 && (frame.nregs > 1
10477 || m->fs.sp_offset != frame.reg_save_offset))
10478 restore_regs_via_mov = true;
10479 else if (frame_pointer_needed
10481 && m->fs.sp_offset != frame.reg_save_offset)
10482 restore_regs_via_mov = true;
10483 else if (frame_pointer_needed
10484 && TARGET_USE_LEAVE
10485 && cfun->machine->use_fast_prologue_epilogue
10486 && frame.nregs == 1)
10487 restore_regs_via_mov = true;
10489 restore_regs_via_mov = false;
10491 if (restore_regs_via_mov || frame.nsseregs)
10493 /* Ensure that the entire register save area is addressable via
10494 the stack pointer, if we will restore via sp. */
10496 && m->fs.sp_offset > 0x7fffffff
10497 && !(m->fs.fp_valid || m->fs.drap_valid)
10498 && (frame.nsseregs + frame.nregs) != 0)
10500 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10501 GEN_INT (m->fs.sp_offset
10502 - frame.sse_reg_save_offset),
10504 m->fs.cfa_reg == stack_pointer_rtx);
10508 /* If there are any SSE registers to restore, then we have to do it
10509 via moves, since there's obviously no pop for SSE regs. */
10510 if (frame.nsseregs)
10511 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10514 if (restore_regs_via_mov)
10519 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10521 /* eh_return epilogues need %ecx added to the stack pointer. */
10524 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10526 /* Stack align doesn't work with eh_return. */
10527 gcc_assert (!stack_realign_drap);
10528 /* Neither does regparm nested functions. */
10529 gcc_assert (!ix86_static_chain_on_stack);
10531 if (frame_pointer_needed)
10533 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10534 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10535 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10537 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10538 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10540 /* Note that we use SA as a temporary CFA, as the return
10541 address is at the proper place relative to it. We
10542 pretend this happens at the FP restore insn because
10543 prior to this insn the FP would be stored at the wrong
10544 offset relative to SA, and after this insn we have no
10545 other reasonable register to use for the CFA. We don't
10546 bother resetting the CFA to the SP for the duration of
10547 the return insn. */
10548 add_reg_note (insn, REG_CFA_DEF_CFA,
10549 plus_constant (sa, UNITS_PER_WORD));
10550 ix86_add_queued_cfa_restore_notes (insn);
10551 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10552 RTX_FRAME_RELATED_P (insn) = 1;
10554 m->fs.cfa_reg = sa;
10555 m->fs.cfa_offset = UNITS_PER_WORD;
10556 m->fs.fp_valid = false;
10558 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10559 const0_rtx, style, false);
10563 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10564 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10565 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10566 ix86_add_queued_cfa_restore_notes (insn);
10568 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10569 if (m->fs.cfa_offset != UNITS_PER_WORD)
10571 m->fs.cfa_offset = UNITS_PER_WORD;
10572 add_reg_note (insn, REG_CFA_DEF_CFA,
10573 plus_constant (stack_pointer_rtx,
10575 RTX_FRAME_RELATED_P (insn) = 1;
10578 m->fs.sp_offset = UNITS_PER_WORD;
10579 m->fs.sp_valid = true;
10584 /* SEH requires that the function end with (1) a stack adjustment
10585 if necessary, (2) a sequence of pops, and (3) a return or
10586 jump instruction. Prevent insns from the function body from
10587 being scheduled into this sequence. */
10590 /* Prevent a catch region from being adjacent to the standard
10591 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10592 several other flags that would be interesting to test are
10594 if (flag_non_call_exceptions)
10595 emit_insn (gen_nops (const1_rtx));
10597 emit_insn (gen_blockage ());
10600 /* First step is to deallocate the stack frame so that we can
10601 pop the registers. */
10602 if (!m->fs.sp_valid)
10604 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10605 GEN_INT (m->fs.fp_offset
10606 - frame.reg_save_offset),
10609 else if (m->fs.sp_offset != frame.reg_save_offset)
10611 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10612 GEN_INT (m->fs.sp_offset
10613 - frame.reg_save_offset),
10615 m->fs.cfa_reg == stack_pointer_rtx);
10618 ix86_emit_restore_regs_using_pop ();
10621 /* If we used a stack pointer and haven't already got rid of it,
10623 if (m->fs.fp_valid)
10625 /* If the stack pointer is valid and pointing at the frame
10626 pointer store address, then we only need a pop. */
10627 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10628 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10629 /* Leave results in shorter dependency chains on CPUs that are
10630 able to grok it fast. */
10631 else if (TARGET_USE_LEAVE
10632 || optimize_function_for_size_p (cfun)
10633 || !cfun->machine->use_fast_prologue_epilogue)
10634 ix86_emit_leave ();
10637 pro_epilogue_adjust_stack (stack_pointer_rtx,
10638 hard_frame_pointer_rtx,
10639 const0_rtx, style, !using_drap);
10640 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10646 int param_ptr_offset = UNITS_PER_WORD;
10649 gcc_assert (stack_realign_drap);
10651 if (ix86_static_chain_on_stack)
10652 param_ptr_offset += UNITS_PER_WORD;
10653 if (!call_used_regs[REGNO (crtl->drap_reg)])
10654 param_ptr_offset += UNITS_PER_WORD;
10656 insn = emit_insn (gen_rtx_SET
10657 (VOIDmode, stack_pointer_rtx,
10658 gen_rtx_PLUS (Pmode,
10660 GEN_INT (-param_ptr_offset))));
10661 m->fs.cfa_reg = stack_pointer_rtx;
10662 m->fs.cfa_offset = param_ptr_offset;
10663 m->fs.sp_offset = param_ptr_offset;
10664 m->fs.realigned = false;
10666 add_reg_note (insn, REG_CFA_DEF_CFA,
10667 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10668 GEN_INT (param_ptr_offset)));
10669 RTX_FRAME_RELATED_P (insn) = 1;
10671 if (!call_used_regs[REGNO (crtl->drap_reg)])
10672 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10675 /* At this point the stack pointer must be valid, and we must have
10676 restored all of the registers. We may not have deallocated the
10677 entire stack frame. We've delayed this until now because it may
10678 be possible to merge the local stack deallocation with the
10679 deallocation forced by ix86_static_chain_on_stack. */
10680 gcc_assert (m->fs.sp_valid);
10681 gcc_assert (!m->fs.fp_valid);
10682 gcc_assert (!m->fs.realigned);
10683 if (m->fs.sp_offset != UNITS_PER_WORD)
10685 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10686 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10690 /* Sibcall epilogues don't want a return instruction. */
10693 m->fs = frame_state_save;
10697 /* Emit vzeroupper if needed. */
10698 if (TARGET_VZEROUPPER
10699 && !TREE_THIS_VOLATILE (cfun->decl)
10700 && !cfun->machine->caller_return_avx256_p)
10701 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10703 if (crtl->args.pops_args && crtl->args.size)
10705 rtx popc = GEN_INT (crtl->args.pops_args);
10707 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10708 address, do explicit add, and jump indirectly to the caller. */
10710 if (crtl->args.pops_args >= 65536)
10712 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10715 /* There is no "pascal" calling convention in any 64bit ABI. */
10716 gcc_assert (!TARGET_64BIT);
10718 insn = emit_insn (gen_pop (ecx));
10719 m->fs.cfa_offset -= UNITS_PER_WORD;
10720 m->fs.sp_offset -= UNITS_PER_WORD;
10722 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10723 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10724 add_reg_note (insn, REG_CFA_REGISTER,
10725 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10726 RTX_FRAME_RELATED_P (insn) = 1;
10728 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10730 emit_jump_insn (gen_return_indirect_internal (ecx));
10733 emit_jump_insn (gen_return_pop_internal (popc));
10736 emit_jump_insn (gen_return_internal ());
10738 /* Restore the state back to the state from the prologue,
10739 so that it's correct for the next epilogue. */
10740 m->fs = frame_state_save;
10743 /* Reset from the function's potential modifications. */
10746 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10747 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10749 if (pic_offset_table_rtx)
10750 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10752 /* Mach-O doesn't support labels at the end of objects, so if
10753 it looks like we might want one, insert a NOP. */
10755 rtx insn = get_last_insn ();
10758 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10759 insn = PREV_INSN (insn);
10763 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10764 fputs ("\tnop\n", file);
10770 /* Return a scratch register to use in the split stack prologue. The
10771 split stack prologue is used for -fsplit-stack. It is the first
10772 instructions in the function, even before the regular prologue.
10773 The scratch register can be any caller-saved register which is not
10774 used for parameters or for the static chain. */
10776 static unsigned int
10777 split_stack_prologue_scratch_regno (void)
10786 is_fastcall = (lookup_attribute ("fastcall",
10787 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10789 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10793 if (DECL_STATIC_CHAIN (cfun->decl))
10795 sorry ("-fsplit-stack does not support fastcall with "
10796 "nested function");
10797 return INVALID_REGNUM;
10801 else if (regparm < 3)
10803 if (!DECL_STATIC_CHAIN (cfun->decl))
10809 sorry ("-fsplit-stack does not support 2 register "
10810 " parameters for a nested function");
10811 return INVALID_REGNUM;
10818 /* FIXME: We could make this work by pushing a register
10819 around the addition and comparison. */
10820 sorry ("-fsplit-stack does not support 3 register parameters");
10821 return INVALID_REGNUM;
10826 /* A SYMBOL_REF for the function which allocates new stackspace for
10829 static GTY(()) rtx split_stack_fn;
10831 /* A SYMBOL_REF for the more stack function when using the large
10834 static GTY(()) rtx split_stack_fn_large;
10836 /* Handle -fsplit-stack. These are the first instructions in the
10837 function, even before the regular prologue. */
10840 ix86_expand_split_stack_prologue (void)
10842 struct ix86_frame frame;
10843 HOST_WIDE_INT allocate;
10844 unsigned HOST_WIDE_INT args_size;
10845 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10846 rtx scratch_reg = NULL_RTX;
10847 rtx varargs_label = NULL_RTX;
10850 gcc_assert (flag_split_stack && reload_completed);
10852 ix86_finalize_stack_realign_flags ();
10853 ix86_compute_frame_layout (&frame);
10854 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10856 /* This is the label we will branch to if we have enough stack
10857 space. We expect the basic block reordering pass to reverse this
10858 branch if optimizing, so that we branch in the unlikely case. */
10859 label = gen_label_rtx ();
10861 /* We need to compare the stack pointer minus the frame size with
10862 the stack boundary in the TCB. The stack boundary always gives
10863 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10864 can compare directly. Otherwise we need to do an addition. */
10866 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10867 UNSPEC_STACK_CHECK);
10868 limit = gen_rtx_CONST (Pmode, limit);
10869 limit = gen_rtx_MEM (Pmode, limit);
10870 if (allocate < SPLIT_STACK_AVAILABLE)
10871 current = stack_pointer_rtx;
10874 unsigned int scratch_regno;
10877 /* We need a scratch register to hold the stack pointer minus
10878 the required frame size. Since this is the very start of the
10879 function, the scratch register can be any caller-saved
10880 register which is not used for parameters. */
10881 offset = GEN_INT (- allocate);
10882 scratch_regno = split_stack_prologue_scratch_regno ();
10883 if (scratch_regno == INVALID_REGNUM)
10885 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10886 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10888 /* We don't use ix86_gen_add3 in this case because it will
10889 want to split to lea, but when not optimizing the insn
10890 will not be split after this point. */
10891 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10892 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10897 emit_move_insn (scratch_reg, offset);
10898 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
10899 stack_pointer_rtx));
10901 current = scratch_reg;
10904 ix86_expand_branch (GEU, current, limit, label);
10905 jump_insn = get_last_insn ();
10906 JUMP_LABEL (jump_insn) = label;
10908 /* Mark the jump as very likely to be taken. */
10909 add_reg_note (jump_insn, REG_BR_PROB,
10910 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
10912 if (split_stack_fn == NULL_RTX)
10913 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10914 fn = split_stack_fn;
10916 /* Get more stack space. We pass in the desired stack space and the
10917 size of the arguments to copy to the new stack. In 32-bit mode
10918 we push the parameters; __morestack will return on a new stack
10919 anyhow. In 64-bit mode we pass the parameters in r10 and
10921 allocate_rtx = GEN_INT (allocate);
10922 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
10923 call_fusage = NULL_RTX;
10928 reg10 = gen_rtx_REG (Pmode, R10_REG);
10929 reg11 = gen_rtx_REG (Pmode, R11_REG);
10931 /* If this function uses a static chain, it will be in %r10.
10932 Preserve it across the call to __morestack. */
10933 if (DECL_STATIC_CHAIN (cfun->decl))
10937 rax = gen_rtx_REG (Pmode, AX_REG);
10938 emit_move_insn (rax, reg10);
10939 use_reg (&call_fusage, rax);
10942 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10944 HOST_WIDE_INT argval;
10946 /* When using the large model we need to load the address
10947 into a register, and we've run out of registers. So we
10948 switch to a different calling convention, and we call a
10949 different function: __morestack_large. We pass the
10950 argument size in the upper 32 bits of r10 and pass the
10951 frame size in the lower 32 bits. */
10952 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
10953 gcc_assert ((args_size & 0xffffffff) == args_size);
10955 if (split_stack_fn_large == NULL_RTX)
10956 split_stack_fn_large =
10957 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
10959 if (ix86_cmodel == CM_LARGE_PIC)
10963 label = gen_label_rtx ();
10964 emit_label (label);
10965 LABEL_PRESERVE_P (label) = 1;
10966 emit_insn (gen_set_rip_rex64 (reg10, label));
10967 emit_insn (gen_set_got_offset_rex64 (reg11, label));
10968 emit_insn (gen_adddi3 (reg10, reg10, reg11));
10969 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
10971 x = gen_rtx_CONST (Pmode, x);
10972 emit_move_insn (reg11, x);
10973 x = gen_rtx_PLUS (Pmode, reg10, reg11);
10974 x = gen_const_mem (Pmode, x);
10975 emit_move_insn (reg11, x);
10978 emit_move_insn (reg11, split_stack_fn_large);
10982 argval = ((args_size << 16) << 16) + allocate;
10983 emit_move_insn (reg10, GEN_INT (argval));
10987 emit_move_insn (reg10, allocate_rtx);
10988 emit_move_insn (reg11, GEN_INT (args_size));
10989 use_reg (&call_fusage, reg11);
10992 use_reg (&call_fusage, reg10);
10996 emit_insn (gen_push (GEN_INT (args_size)));
10997 emit_insn (gen_push (allocate_rtx));
10999 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11000 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11002 add_function_usage_to (call_insn, call_fusage);
11004 /* In order to make call/return prediction work right, we now need
11005 to execute a return instruction. See
11006 libgcc/config/i386/morestack.S for the details on how this works.
11008 For flow purposes gcc must not see this as a return
11009 instruction--we need control flow to continue at the subsequent
11010 label. Therefore, we use an unspec. */
11011 gcc_assert (crtl->args.pops_args < 65536);
11012 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11014 /* If we are in 64-bit mode and this function uses a static chain,
11015 we saved %r10 in %rax before calling _morestack. */
11016 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11017 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11018 gen_rtx_REG (Pmode, AX_REG));
11020 /* If this function calls va_start, we need to store a pointer to
11021 the arguments on the old stack, because they may not have been
11022 all copied to the new stack. At this point the old stack can be
11023 found at the frame pointer value used by __morestack, because
11024 __morestack has set that up before calling back to us. Here we
11025 store that pointer in a scratch register, and in
11026 ix86_expand_prologue we store the scratch register in a stack
11028 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11030 unsigned int scratch_regno;
11034 scratch_regno = split_stack_prologue_scratch_regno ();
11035 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11036 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11040 return address within this function
11041 return address of caller of this function
11043 So we add three words to get to the stack arguments.
11047 return address within this function
11048 first argument to __morestack
11049 second argument to __morestack
11050 return address of caller of this function
11052 So we add five words to get to the stack arguments.
11054 words = TARGET_64BIT ? 3 : 5;
11055 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11056 gen_rtx_PLUS (Pmode, frame_reg,
11057 GEN_INT (words * UNITS_PER_WORD))));
11059 varargs_label = gen_label_rtx ();
11060 emit_jump_insn (gen_jump (varargs_label));
11061 JUMP_LABEL (get_last_insn ()) = varargs_label;
11066 emit_label (label);
11067 LABEL_NUSES (label) = 1;
11069 /* If this function calls va_start, we now have to set the scratch
11070 register for the case where we do not call __morestack. In this
11071 case we need to set it based on the stack pointer. */
11072 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11074 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11075 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11076 GEN_INT (UNITS_PER_WORD))));
11078 emit_label (varargs_label);
11079 LABEL_NUSES (varargs_label) = 1;
11083 /* We may have to tell the dataflow pass that the split stack prologue
11084 is initializing a scratch register. */
11087 ix86_live_on_entry (bitmap regs)
11089 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11091 gcc_assert (flag_split_stack);
11092 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11096 /* Extract the parts of an RTL expression that is a valid memory address
11097 for an instruction. Return 0 if the structure of the address is
11098 grossly off. Return -1 if the address contains ASHIFT, so it is not
11099 strictly valid, but still used for computing length of lea instruction. */
11102 ix86_decompose_address (rtx addr, struct ix86_address *out)
11104 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11105 rtx base_reg, index_reg;
11106 HOST_WIDE_INT scale = 1;
11107 rtx scale_rtx = NULL_RTX;
11110 enum ix86_address_seg seg = SEG_DEFAULT;
11114 else if (GET_CODE (addr) == SUBREG)
11116 /* Allow only subregs of DImode hard regs. */
11117 if (register_no_elim_operand (SUBREG_REG (addr), DImode))
11122 else if (GET_CODE (addr) == PLUS)
11124 rtx addends[4], op;
11132 addends[n++] = XEXP (op, 1);
11135 while (GET_CODE (op) == PLUS);
11140 for (i = n; i >= 0; --i)
11143 switch (GET_CODE (op))
11148 index = XEXP (op, 0);
11149 scale_rtx = XEXP (op, 1);
11155 index = XEXP (op, 0);
11156 tmp = XEXP (op, 1);
11157 if (!CONST_INT_P (tmp))
11159 scale = INTVAL (tmp);
11160 if ((unsigned HOST_WIDE_INT) scale > 3)
11162 scale = 1 << scale;
11166 if (XINT (op, 1) == UNSPEC_TP
11167 && TARGET_TLS_DIRECT_SEG_REFS
11168 && seg == SEG_DEFAULT)
11169 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11175 /* Allow only subregs of DImode hard regs in PLUS chains. */
11176 if (!register_no_elim_operand (SUBREG_REG (op), DImode))
11203 else if (GET_CODE (addr) == MULT)
11205 index = XEXP (addr, 0); /* index*scale */
11206 scale_rtx = XEXP (addr, 1);
11208 else if (GET_CODE (addr) == ASHIFT)
11210 /* We're called for lea too, which implements ashift on occasion. */
11211 index = XEXP (addr, 0);
11212 tmp = XEXP (addr, 1);
11213 if (!CONST_INT_P (tmp))
11215 scale = INTVAL (tmp);
11216 if ((unsigned HOST_WIDE_INT) scale > 3)
11218 scale = 1 << scale;
11222 disp = addr; /* displacement */
11228 /* Allow only subregs of DImode hard regs. */
11229 else if (GET_CODE (index) == SUBREG
11230 && register_no_elim_operand (SUBREG_REG (index), DImode))
11236 /* Extract the integral value of scale. */
11239 if (!CONST_INT_P (scale_rtx))
11241 scale = INTVAL (scale_rtx);
11244 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11245 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11247 /* Avoid useless 0 displacement. */
11248 if (disp == const0_rtx && (base || index))
11251 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11252 if (base_reg && index_reg && scale == 1
11253 && (index_reg == arg_pointer_rtx
11254 || index_reg == frame_pointer_rtx
11255 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11258 tmp = base, base = index, index = tmp;
11259 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11262 /* Special case: %ebp cannot be encoded as a base without a displacement.
11266 && (base_reg == hard_frame_pointer_rtx
11267 || base_reg == frame_pointer_rtx
11268 || base_reg == arg_pointer_rtx
11269 || (REG_P (base_reg)
11270 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11271 || REGNO (base_reg) == R13_REG))))
11274 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11275 Avoid this by transforming to [%esi+0].
11276 Reload calls address legitimization without cfun defined, so we need
11277 to test cfun for being non-NULL. */
11278 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11279 && base_reg && !index_reg && !disp
11280 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11283 /* Special case: encode reg+reg instead of reg*2. */
11284 if (!base && index && scale == 2)
11285 base = index, base_reg = index_reg, scale = 1;
11287 /* Special case: scaling cannot be encoded without base or displacement. */
11288 if (!base && !disp && index && scale != 1)
11292 out->index = index;
11294 out->scale = scale;
11300 /* Return cost of the memory address x.
11301 For i386, it is better to use a complex address than let gcc copy
11302 the address into a reg and make a new pseudo. But not if the address
11303 requires to two regs - that would mean more pseudos with longer
11306 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11308 struct ix86_address parts;
11310 int ok = ix86_decompose_address (x, &parts);
11314 if (parts.base && GET_CODE (parts.base) == SUBREG)
11315 parts.base = SUBREG_REG (parts.base);
11316 if (parts.index && GET_CODE (parts.index) == SUBREG)
11317 parts.index = SUBREG_REG (parts.index);
11319 /* Attempt to minimize number of registers in the address. */
11321 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11323 && (!REG_P (parts.index)
11324 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11328 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11330 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11331 && parts.base != parts.index)
11334 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11335 since it's predecode logic can't detect the length of instructions
11336 and it degenerates to vector decoded. Increase cost of such
11337 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11338 to split such addresses or even refuse such addresses at all.
11340 Following addressing modes are affected:
11345 The first and last case may be avoidable by explicitly coding the zero in
11346 memory address, but I don't have AMD-K6 machine handy to check this
11350 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11351 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11352 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11358 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11359 this is used for to form addresses to local data when -fPIC is in
11363 darwin_local_data_pic (rtx disp)
11365 return (GET_CODE (disp) == UNSPEC
11366 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11369 /* Determine if a given RTX is a valid constant. We already know this
11370 satisfies CONSTANT_P. */
11373 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11375 switch (GET_CODE (x))
11380 if (GET_CODE (x) == PLUS)
11382 if (!CONST_INT_P (XEXP (x, 1)))
11387 if (TARGET_MACHO && darwin_local_data_pic (x))
11390 /* Only some unspecs are valid as "constants". */
11391 if (GET_CODE (x) == UNSPEC)
11392 switch (XINT (x, 1))
11395 case UNSPEC_GOTOFF:
11396 case UNSPEC_PLTOFF:
11397 return TARGET_64BIT;
11399 case UNSPEC_NTPOFF:
11400 x = XVECEXP (x, 0, 0);
11401 return (GET_CODE (x) == SYMBOL_REF
11402 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11403 case UNSPEC_DTPOFF:
11404 x = XVECEXP (x, 0, 0);
11405 return (GET_CODE (x) == SYMBOL_REF
11406 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11411 /* We must have drilled down to a symbol. */
11412 if (GET_CODE (x) == LABEL_REF)
11414 if (GET_CODE (x) != SYMBOL_REF)
11419 /* TLS symbols are never valid. */
11420 if (SYMBOL_REF_TLS_MODEL (x))
11423 /* DLLIMPORT symbols are never valid. */
11424 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11425 && SYMBOL_REF_DLLIMPORT_P (x))
11429 /* mdynamic-no-pic */
11430 if (MACHO_DYNAMIC_NO_PIC_P)
11431 return machopic_symbol_defined_p (x);
11436 if (GET_MODE (x) == TImode
11437 && x != CONST0_RTX (TImode)
11443 if (!standard_sse_constant_p (x))
11450 /* Otherwise we handle everything else in the move patterns. */
11454 /* Determine if it's legal to put X into the constant pool. This
11455 is not possible for the address of thread-local symbols, which
11456 is checked above. */
11459 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11461 /* We can always put integral constants and vectors in memory. */
11462 switch (GET_CODE (x))
11472 return !ix86_legitimate_constant_p (mode, x);
11476 /* Nonzero if the constant value X is a legitimate general operand
11477 when generating PIC code. It is given that flag_pic is on and
11478 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11481 legitimate_pic_operand_p (rtx x)
11485 switch (GET_CODE (x))
11488 inner = XEXP (x, 0);
11489 if (GET_CODE (inner) == PLUS
11490 && CONST_INT_P (XEXP (inner, 1)))
11491 inner = XEXP (inner, 0);
11493 /* Only some unspecs are valid as "constants". */
11494 if (GET_CODE (inner) == UNSPEC)
11495 switch (XINT (inner, 1))
11498 case UNSPEC_GOTOFF:
11499 case UNSPEC_PLTOFF:
11500 return TARGET_64BIT;
11502 x = XVECEXP (inner, 0, 0);
11503 return (GET_CODE (x) == SYMBOL_REF
11504 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11505 case UNSPEC_MACHOPIC_OFFSET:
11506 return legitimate_pic_address_disp_p (x);
11514 return legitimate_pic_address_disp_p (x);
11521 /* Determine if a given CONST RTX is a valid memory displacement
11525 legitimate_pic_address_disp_p (rtx disp)
11529 /* In 64bit mode we can allow direct addresses of symbols and labels
11530 when they are not dynamic symbols. */
11533 rtx op0 = disp, op1;
11535 switch (GET_CODE (disp))
11541 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11543 op0 = XEXP (XEXP (disp, 0), 0);
11544 op1 = XEXP (XEXP (disp, 0), 1);
11545 if (!CONST_INT_P (op1)
11546 || INTVAL (op1) >= 16*1024*1024
11547 || INTVAL (op1) < -16*1024*1024)
11549 if (GET_CODE (op0) == LABEL_REF)
11551 if (GET_CODE (op0) != SYMBOL_REF)
11556 /* TLS references should always be enclosed in UNSPEC. */
11557 if (SYMBOL_REF_TLS_MODEL (op0))
11559 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11560 && ix86_cmodel != CM_LARGE_PIC)
11568 if (GET_CODE (disp) != CONST)
11570 disp = XEXP (disp, 0);
11574 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11575 of GOT tables. We should not need these anyway. */
11576 if (GET_CODE (disp) != UNSPEC
11577 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11578 && XINT (disp, 1) != UNSPEC_GOTOFF
11579 && XINT (disp, 1) != UNSPEC_PCREL
11580 && XINT (disp, 1) != UNSPEC_PLTOFF))
11583 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11584 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11590 if (GET_CODE (disp) == PLUS)
11592 if (!CONST_INT_P (XEXP (disp, 1)))
11594 disp = XEXP (disp, 0);
11598 if (TARGET_MACHO && darwin_local_data_pic (disp))
11601 if (GET_CODE (disp) != UNSPEC)
11604 switch (XINT (disp, 1))
11609 /* We need to check for both symbols and labels because VxWorks loads
11610 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11612 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11613 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11614 case UNSPEC_GOTOFF:
11615 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11616 While ABI specify also 32bit relocation but we don't produce it in
11617 small PIC model at all. */
11618 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11619 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11621 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11623 case UNSPEC_GOTTPOFF:
11624 case UNSPEC_GOTNTPOFF:
11625 case UNSPEC_INDNTPOFF:
11628 disp = XVECEXP (disp, 0, 0);
11629 return (GET_CODE (disp) == SYMBOL_REF
11630 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11631 case UNSPEC_NTPOFF:
11632 disp = XVECEXP (disp, 0, 0);
11633 return (GET_CODE (disp) == SYMBOL_REF
11634 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11635 case UNSPEC_DTPOFF:
11636 disp = XVECEXP (disp, 0, 0);
11637 return (GET_CODE (disp) == SYMBOL_REF
11638 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11644 /* Recognizes RTL expressions that are valid memory addresses for an
11645 instruction. The MODE argument is the machine mode for the MEM
11646 expression that wants to use this address.
11648 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11649 convert common non-canonical forms to canonical form so that they will
11653 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11654 rtx addr, bool strict)
11656 struct ix86_address parts;
11657 rtx base, index, disp;
11658 HOST_WIDE_INT scale;
11660 if (ix86_decompose_address (addr, &parts) <= 0)
11661 /* Decomposition failed. */
11665 index = parts.index;
11667 scale = parts.scale;
11669 /* Validate base register. */
11676 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
11678 reg = SUBREG_REG (base);
11679 gcc_assert (register_no_elim_operand (reg, DImode));
11682 /* Base is not a register. */
11685 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
11688 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11689 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11690 /* Base is not valid. */
11694 /* Validate index register. */
11701 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
11703 reg = SUBREG_REG (index);
11704 gcc_assert (register_no_elim_operand (reg, DImode));
11707 /* Index is not a register. */
11710 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
11713 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11714 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11715 /* Index is not valid. */
11719 /* Index and base should have the same mode. */
11721 && GET_MODE (base) != GET_MODE (index))
11724 /* Validate scale factor. */
11728 /* Scale without index. */
11731 if (scale != 2 && scale != 4 && scale != 8)
11732 /* Scale is not a valid multiplier. */
11736 /* Validate displacement. */
11739 if (GET_CODE (disp) == CONST
11740 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11741 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11742 switch (XINT (XEXP (disp, 0), 1))
11744 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11745 used. While ABI specify also 32bit relocations, we don't produce
11746 them at all and use IP relative instead. */
11748 case UNSPEC_GOTOFF:
11749 gcc_assert (flag_pic);
11751 goto is_legitimate_pic;
11753 /* 64bit address unspec. */
11756 case UNSPEC_GOTPCREL:
11758 gcc_assert (flag_pic);
11759 goto is_legitimate_pic;
11761 case UNSPEC_GOTTPOFF:
11762 case UNSPEC_GOTNTPOFF:
11763 case UNSPEC_INDNTPOFF:
11764 case UNSPEC_NTPOFF:
11765 case UNSPEC_DTPOFF:
11768 case UNSPEC_STACK_CHECK:
11769 gcc_assert (flag_split_stack);
11773 /* Invalid address unspec. */
11777 else if (SYMBOLIC_CONST (disp)
11781 && MACHOPIC_INDIRECT
11782 && !machopic_operand_p (disp)
11788 if (TARGET_64BIT && (index || base))
11790 /* foo@dtpoff(%rX) is ok. */
11791 if (GET_CODE (disp) != CONST
11792 || GET_CODE (XEXP (disp, 0)) != PLUS
11793 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11794 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11795 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11796 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11797 /* Non-constant pic memory reference. */
11800 else if ((!TARGET_MACHO || flag_pic)
11801 && ! legitimate_pic_address_disp_p (disp))
11802 /* Displacement is an invalid pic construct. */
11805 else if (MACHO_DYNAMIC_NO_PIC_P
11806 && !ix86_legitimate_constant_p (Pmode, disp))
11807 /* displacment must be referenced via non_lazy_pointer */
11811 /* This code used to verify that a symbolic pic displacement
11812 includes the pic_offset_table_rtx register.
11814 While this is good idea, unfortunately these constructs may
11815 be created by "adds using lea" optimization for incorrect
11824 This code is nonsensical, but results in addressing
11825 GOT table with pic_offset_table_rtx base. We can't
11826 just refuse it easily, since it gets matched by
11827 "addsi3" pattern, that later gets split to lea in the
11828 case output register differs from input. While this
11829 can be handled by separate addsi pattern for this case
11830 that never results in lea, this seems to be easier and
11831 correct fix for crash to disable this test. */
11833 else if (GET_CODE (disp) != LABEL_REF
11834 && !CONST_INT_P (disp)
11835 && (GET_CODE (disp) != CONST
11836 || !ix86_legitimate_constant_p (Pmode, disp))
11837 && (GET_CODE (disp) != SYMBOL_REF
11838 || !ix86_legitimate_constant_p (Pmode, disp)))
11839 /* Displacement is not constant. */
11841 else if (TARGET_64BIT
11842 && !x86_64_immediate_operand (disp, VOIDmode))
11843 /* Displacement is out of range. */
11847 /* Everything looks valid. */
11851 /* Determine if a given RTX is a valid constant address. */
11854 constant_address_p (rtx x)
11856 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11859 /* Return a unique alias set for the GOT. */
11861 static alias_set_type
11862 ix86_GOT_alias_set (void)
11864 static alias_set_type set = -1;
11866 set = new_alias_set ();
11870 /* Return a legitimate reference for ORIG (an address) using the
11871 register REG. If REG is 0, a new pseudo is generated.
11873 There are two types of references that must be handled:
11875 1. Global data references must load the address from the GOT, via
11876 the PIC reg. An insn is emitted to do this load, and the reg is
11879 2. Static data references, constant pool addresses, and code labels
11880 compute the address as an offset from the GOT, whose base is in
11881 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11882 differentiate them from global data objects. The returned
11883 address is the PIC reg + an unspec constant.
11885 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11886 reg also appears in the address. */
11889 legitimize_pic_address (rtx orig, rtx reg)
11892 rtx new_rtx = orig;
11896 if (TARGET_MACHO && !TARGET_64BIT)
11899 reg = gen_reg_rtx (Pmode);
11900 /* Use the generic Mach-O PIC machinery. */
11901 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11905 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11907 else if (TARGET_64BIT
11908 && ix86_cmodel != CM_SMALL_PIC
11909 && gotoff_operand (addr, Pmode))
11912 /* This symbol may be referenced via a displacement from the PIC
11913 base address (@GOTOFF). */
11915 if (reload_in_progress)
11916 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11917 if (GET_CODE (addr) == CONST)
11918 addr = XEXP (addr, 0);
11919 if (GET_CODE (addr) == PLUS)
11921 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11923 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11926 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11927 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11929 tmpreg = gen_reg_rtx (Pmode);
11932 emit_move_insn (tmpreg, new_rtx);
11936 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11937 tmpreg, 1, OPTAB_DIRECT);
11940 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11942 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11944 /* This symbol may be referenced via a displacement from the PIC
11945 base address (@GOTOFF). */
11947 if (reload_in_progress)
11948 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11949 if (GET_CODE (addr) == CONST)
11950 addr = XEXP (addr, 0);
11951 if (GET_CODE (addr) == PLUS)
11953 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11955 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11958 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11959 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11960 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11964 emit_move_insn (reg, new_rtx);
11968 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11969 /* We can't use @GOTOFF for text labels on VxWorks;
11970 see gotoff_operand. */
11971 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11973 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11975 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11976 return legitimize_dllimport_symbol (addr, true);
11977 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
11978 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11979 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11981 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
11982 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11986 /* For x64 PE-COFF there is no GOT table. So we use address
11988 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
11990 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
11991 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11994 reg = gen_reg_rtx (Pmode);
11995 emit_move_insn (reg, new_rtx);
11998 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12000 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12001 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12002 new_rtx = gen_const_mem (Pmode, new_rtx);
12003 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12006 reg = gen_reg_rtx (Pmode);
12007 /* Use directly gen_movsi, otherwise the address is loaded
12008 into register for CSE. We don't want to CSE this addresses,
12009 instead we CSE addresses from the GOT table, so skip this. */
12010 emit_insn (gen_movsi (reg, new_rtx));
12015 /* This symbol must be referenced via a load from the
12016 Global Offset Table (@GOT). */
12018 if (reload_in_progress)
12019 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12020 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12021 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12023 new_rtx = force_reg (Pmode, new_rtx);
12024 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12025 new_rtx = gen_const_mem (Pmode, new_rtx);
12026 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12029 reg = gen_reg_rtx (Pmode);
12030 emit_move_insn (reg, new_rtx);
12036 if (CONST_INT_P (addr)
12037 && !x86_64_immediate_operand (addr, VOIDmode))
12041 emit_move_insn (reg, addr);
12045 new_rtx = force_reg (Pmode, addr);
12047 else if (GET_CODE (addr) == CONST)
12049 addr = XEXP (addr, 0);
12051 /* We must match stuff we generate before. Assume the only
12052 unspecs that can get here are ours. Not that we could do
12053 anything with them anyway.... */
12054 if (GET_CODE (addr) == UNSPEC
12055 || (GET_CODE (addr) == PLUS
12056 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12058 gcc_assert (GET_CODE (addr) == PLUS);
12060 if (GET_CODE (addr) == PLUS)
12062 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12064 /* Check first to see if this is a constant offset from a @GOTOFF
12065 symbol reference. */
12066 if (gotoff_operand (op0, Pmode)
12067 && CONST_INT_P (op1))
12071 if (reload_in_progress)
12072 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12073 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12075 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12076 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12077 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12081 emit_move_insn (reg, new_rtx);
12087 if (INTVAL (op1) < -16*1024*1024
12088 || INTVAL (op1) >= 16*1024*1024)
12090 if (!x86_64_immediate_operand (op1, Pmode))
12091 op1 = force_reg (Pmode, op1);
12092 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12098 base = legitimize_pic_address (XEXP (addr, 0), reg);
12099 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12100 base == reg ? NULL_RTX : reg);
12102 if (CONST_INT_P (new_rtx))
12103 new_rtx = plus_constant (base, INTVAL (new_rtx));
12106 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12108 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12109 new_rtx = XEXP (new_rtx, 1);
12111 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12119 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12122 get_thread_pointer (bool to_reg)
12126 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12130 reg = gen_reg_rtx (Pmode);
12131 insn = gen_rtx_SET (VOIDmode, reg, tp);
12132 insn = emit_insn (insn);
12137 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12139 static GTY(()) rtx ix86_tls_symbol;
12142 ix86_tls_get_addr (void)
12144 if (!ix86_tls_symbol)
12147 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12148 ? "___tls_get_addr" : "__tls_get_addr");
12150 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12153 return ix86_tls_symbol;
12156 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12158 static GTY(()) rtx ix86_tls_module_base_symbol;
12161 ix86_tls_module_base (void)
12163 if (!ix86_tls_module_base_symbol)
12165 ix86_tls_module_base_symbol
12166 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12168 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12169 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12172 return ix86_tls_module_base_symbol;
12175 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12176 false if we expect this to be used for a memory address and true if
12177 we expect to load the address into a register. */
12180 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12182 rtx dest, base, off;
12183 rtx pic = NULL_RTX, tp = NULL_RTX;
12188 case TLS_MODEL_GLOBAL_DYNAMIC:
12189 dest = gen_reg_rtx (Pmode);
12194 pic = pic_offset_table_rtx;
12197 pic = gen_reg_rtx (Pmode);
12198 emit_insn (gen_set_got (pic));
12202 if (TARGET_GNU2_TLS)
12205 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12207 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12209 tp = get_thread_pointer (true);
12210 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12212 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12216 rtx caddr = ix86_tls_get_addr ();
12220 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12223 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12224 insns = get_insns ();
12227 RTL_CONST_CALL_P (insns) = 1;
12228 emit_libcall_block (insns, dest, rax, x);
12231 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12235 case TLS_MODEL_LOCAL_DYNAMIC:
12236 base = gen_reg_rtx (Pmode);
12241 pic = pic_offset_table_rtx;
12244 pic = gen_reg_rtx (Pmode);
12245 emit_insn (gen_set_got (pic));
12249 if (TARGET_GNU2_TLS)
12251 rtx tmp = ix86_tls_module_base ();
12254 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12256 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12258 tp = get_thread_pointer (true);
12259 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12260 gen_rtx_MINUS (Pmode, tmp, tp));
12264 rtx caddr = ix86_tls_get_addr ();
12268 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12271 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12272 insns = get_insns ();
12275 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
12276 share the LD_BASE result with other LD model accesses. */
12277 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12278 UNSPEC_TLS_LD_BASE);
12280 RTL_CONST_CALL_P (insns) = 1;
12281 emit_libcall_block (insns, base, rax, eqv);
12284 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12287 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12288 off = gen_rtx_CONST (Pmode, off);
12290 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12292 if (TARGET_GNU2_TLS)
12294 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12296 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12300 case TLS_MODEL_INITIAL_EXEC:
12303 if (TARGET_SUN_TLS)
12305 /* The Sun linker took the AMD64 TLS spec literally
12306 and can only handle %rax as destination of the
12307 initial executable code sequence. */
12309 dest = gen_reg_rtx (Pmode);
12310 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12315 type = UNSPEC_GOTNTPOFF;
12319 if (reload_in_progress)
12320 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12321 pic = pic_offset_table_rtx;
12322 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12324 else if (!TARGET_ANY_GNU_TLS)
12326 pic = gen_reg_rtx (Pmode);
12327 emit_insn (gen_set_got (pic));
12328 type = UNSPEC_GOTTPOFF;
12333 type = UNSPEC_INDNTPOFF;
12336 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12337 off = gen_rtx_CONST (Pmode, off);
12339 off = gen_rtx_PLUS (Pmode, pic, off);
12340 off = gen_const_mem (Pmode, off);
12341 set_mem_alias_set (off, ix86_GOT_alias_set ());
12343 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12345 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12346 off = force_reg (Pmode, off);
12347 return gen_rtx_PLUS (Pmode, base, off);
12351 base = get_thread_pointer (true);
12352 dest = gen_reg_rtx (Pmode);
12353 emit_insn (gen_subsi3 (dest, base, off));
12357 case TLS_MODEL_LOCAL_EXEC:
12358 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12359 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12360 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12361 off = gen_rtx_CONST (Pmode, off);
12363 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12365 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12366 return gen_rtx_PLUS (Pmode, base, off);
12370 base = get_thread_pointer (true);
12371 dest = gen_reg_rtx (Pmode);
12372 emit_insn (gen_subsi3 (dest, base, off));
12377 gcc_unreachable ();
12383 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12386 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12387 htab_t dllimport_map;
12390 get_dllimport_decl (tree decl)
12392 struct tree_map *h, in;
12395 const char *prefix;
12396 size_t namelen, prefixlen;
12401 if (!dllimport_map)
12402 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12404 in.hash = htab_hash_pointer (decl);
12405 in.base.from = decl;
12406 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12407 h = (struct tree_map *) *loc;
12411 *loc = h = ggc_alloc_tree_map ();
12413 h->base.from = decl;
12414 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12415 VAR_DECL, NULL, ptr_type_node);
12416 DECL_ARTIFICIAL (to) = 1;
12417 DECL_IGNORED_P (to) = 1;
12418 DECL_EXTERNAL (to) = 1;
12419 TREE_READONLY (to) = 1;
12421 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12422 name = targetm.strip_name_encoding (name);
12423 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12424 ? "*__imp_" : "*__imp__";
12425 namelen = strlen (name);
12426 prefixlen = strlen (prefix);
12427 imp_name = (char *) alloca (namelen + prefixlen + 1);
12428 memcpy (imp_name, prefix, prefixlen);
12429 memcpy (imp_name + prefixlen, name, namelen + 1);
12431 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12432 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12433 SET_SYMBOL_REF_DECL (rtl, to);
12434 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12436 rtl = gen_const_mem (Pmode, rtl);
12437 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12439 SET_DECL_RTL (to, rtl);
12440 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12445 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12446 true if we require the result be a register. */
12449 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12454 gcc_assert (SYMBOL_REF_DECL (symbol));
12455 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12457 x = DECL_RTL (imp_decl);
12459 x = force_reg (Pmode, x);
12463 /* Try machine-dependent ways of modifying an illegitimate address
12464 to be legitimate. If we find one, return the new, valid address.
12465 This macro is used in only one place: `memory_address' in explow.c.
12467 OLDX is the address as it was before break_out_memory_refs was called.
12468 In some cases it is useful to look at this to decide what needs to be done.
12470 It is always safe for this macro to do nothing. It exists to recognize
12471 opportunities to optimize the output.
12473 For the 80386, we handle X+REG by loading X into a register R and
12474 using R+REG. R will go in a general reg and indexing will be used.
12475 However, if REG is a broken-out memory address or multiplication,
12476 nothing needs to be done because REG can certainly go in a general reg.
12478 When -fpic is used, special handling is needed for symbolic references.
12479 See comments by legitimize_pic_address in i386.c for details. */
12482 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12483 enum machine_mode mode)
12488 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12490 return legitimize_tls_address (x, (enum tls_model) log, false);
12491 if (GET_CODE (x) == CONST
12492 && GET_CODE (XEXP (x, 0)) == PLUS
12493 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12494 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12496 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12497 (enum tls_model) log, false);
12498 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12501 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12503 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12504 return legitimize_dllimport_symbol (x, true);
12505 if (GET_CODE (x) == CONST
12506 && GET_CODE (XEXP (x, 0)) == PLUS
12507 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12508 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12510 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12511 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12515 if (flag_pic && SYMBOLIC_CONST (x))
12516 return legitimize_pic_address (x, 0);
12519 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12520 return machopic_indirect_data_reference (x, 0);
12523 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12524 if (GET_CODE (x) == ASHIFT
12525 && CONST_INT_P (XEXP (x, 1))
12526 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12529 log = INTVAL (XEXP (x, 1));
12530 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12531 GEN_INT (1 << log));
12534 if (GET_CODE (x) == PLUS)
12536 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12538 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12539 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12540 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12543 log = INTVAL (XEXP (XEXP (x, 0), 1));
12544 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12545 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12546 GEN_INT (1 << log));
12549 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12550 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12551 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12554 log = INTVAL (XEXP (XEXP (x, 1), 1));
12555 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12556 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12557 GEN_INT (1 << log));
12560 /* Put multiply first if it isn't already. */
12561 if (GET_CODE (XEXP (x, 1)) == MULT)
12563 rtx tmp = XEXP (x, 0);
12564 XEXP (x, 0) = XEXP (x, 1);
12569 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12570 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12571 created by virtual register instantiation, register elimination, and
12572 similar optimizations. */
12573 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12576 x = gen_rtx_PLUS (Pmode,
12577 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12578 XEXP (XEXP (x, 1), 0)),
12579 XEXP (XEXP (x, 1), 1));
12583 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12584 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12585 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12586 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12587 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12588 && CONSTANT_P (XEXP (x, 1)))
12591 rtx other = NULL_RTX;
12593 if (CONST_INT_P (XEXP (x, 1)))
12595 constant = XEXP (x, 1);
12596 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12598 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12600 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12601 other = XEXP (x, 1);
12609 x = gen_rtx_PLUS (Pmode,
12610 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12611 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12612 plus_constant (other, INTVAL (constant)));
12616 if (changed && ix86_legitimate_address_p (mode, x, false))
12619 if (GET_CODE (XEXP (x, 0)) == MULT)
12622 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12625 if (GET_CODE (XEXP (x, 1)) == MULT)
12628 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12632 && REG_P (XEXP (x, 1))
12633 && REG_P (XEXP (x, 0)))
12636 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12639 x = legitimize_pic_address (x, 0);
12642 if (changed && ix86_legitimate_address_p (mode, x, false))
12645 if (REG_P (XEXP (x, 0)))
12647 rtx temp = gen_reg_rtx (Pmode);
12648 rtx val = force_operand (XEXP (x, 1), temp);
12651 if (GET_MODE (val) != Pmode)
12652 val = convert_to_mode (Pmode, val, 1);
12653 emit_move_insn (temp, val);
12656 XEXP (x, 1) = temp;
12660 else if (REG_P (XEXP (x, 1)))
12662 rtx temp = gen_reg_rtx (Pmode);
12663 rtx val = force_operand (XEXP (x, 0), temp);
12666 if (GET_MODE (val) != Pmode)
12667 val = convert_to_mode (Pmode, val, 1);
12668 emit_move_insn (temp, val);
12671 XEXP (x, 0) = temp;
12679 /* Print an integer constant expression in assembler syntax. Addition
12680 and subtraction are the only arithmetic that may appear in these
12681 expressions. FILE is the stdio stream to write to, X is the rtx, and
12682 CODE is the operand print code from the output string. */
12685 output_pic_addr_const (FILE *file, rtx x, int code)
12689 switch (GET_CODE (x))
12692 gcc_assert (flag_pic);
12697 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12698 output_addr_const (file, x);
12701 const char *name = XSTR (x, 0);
12703 /* Mark the decl as referenced so that cgraph will
12704 output the function. */
12705 if (SYMBOL_REF_DECL (x))
12706 mark_decl_referenced (SYMBOL_REF_DECL (x));
12709 if (MACHOPIC_INDIRECT
12710 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12711 name = machopic_indirection_name (x, /*stub_p=*/true);
12713 assemble_name (file, name);
12715 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12716 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12717 fputs ("@PLT", file);
12724 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12725 assemble_name (asm_out_file, buf);
12729 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12733 /* This used to output parentheses around the expression,
12734 but that does not work on the 386 (either ATT or BSD assembler). */
12735 output_pic_addr_const (file, XEXP (x, 0), code);
12739 if (GET_MODE (x) == VOIDmode)
12741 /* We can use %d if the number is <32 bits and positive. */
12742 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12743 fprintf (file, "0x%lx%08lx",
12744 (unsigned long) CONST_DOUBLE_HIGH (x),
12745 (unsigned long) CONST_DOUBLE_LOW (x));
12747 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12750 /* We can't handle floating point constants;
12751 TARGET_PRINT_OPERAND must handle them. */
12752 output_operand_lossage ("floating constant misused");
12756 /* Some assemblers need integer constants to appear first. */
12757 if (CONST_INT_P (XEXP (x, 0)))
12759 output_pic_addr_const (file, XEXP (x, 0), code);
12761 output_pic_addr_const (file, XEXP (x, 1), code);
12765 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12766 output_pic_addr_const (file, XEXP (x, 1), code);
12768 output_pic_addr_const (file, XEXP (x, 0), code);
12774 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12775 output_pic_addr_const (file, XEXP (x, 0), code);
12777 output_pic_addr_const (file, XEXP (x, 1), code);
12779 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12783 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12785 bool f = i386_asm_output_addr_const_extra (file, x);
12790 gcc_assert (XVECLEN (x, 0) == 1);
12791 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12792 switch (XINT (x, 1))
12795 fputs ("@GOT", file);
12797 case UNSPEC_GOTOFF:
12798 fputs ("@GOTOFF", file);
12800 case UNSPEC_PLTOFF:
12801 fputs ("@PLTOFF", file);
12804 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12805 "(%rip)" : "[rip]", file);
12807 case UNSPEC_GOTPCREL:
12808 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12809 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12811 case UNSPEC_GOTTPOFF:
12812 /* FIXME: This might be @TPOFF in Sun ld too. */
12813 fputs ("@gottpoff", file);
12816 fputs ("@tpoff", file);
12818 case UNSPEC_NTPOFF:
12820 fputs ("@tpoff", file);
12822 fputs ("@ntpoff", file);
12824 case UNSPEC_DTPOFF:
12825 fputs ("@dtpoff", file);
12827 case UNSPEC_GOTNTPOFF:
12829 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12830 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12832 fputs ("@gotntpoff", file);
12834 case UNSPEC_INDNTPOFF:
12835 fputs ("@indntpoff", file);
12838 case UNSPEC_MACHOPIC_OFFSET:
12840 machopic_output_function_base_name (file);
12844 output_operand_lossage ("invalid UNSPEC as operand");
12850 output_operand_lossage ("invalid expression as operand");
12854 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12855 We need to emit DTP-relative relocations. */
12857 static void ATTRIBUTE_UNUSED
12858 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12860 fputs (ASM_LONG, file);
12861 output_addr_const (file, x);
12862 fputs ("@dtpoff", file);
12868 fputs (", 0", file);
12871 gcc_unreachable ();
12875 /* Return true if X is a representation of the PIC register. This copes
12876 with calls from ix86_find_base_term, where the register might have
12877 been replaced by a cselib value. */
12880 ix86_pic_register_p (rtx x)
12882 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12883 return (pic_offset_table_rtx
12884 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12886 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12889 /* Helper function for ix86_delegitimize_address.
12890 Attempt to delegitimize TLS local-exec accesses. */
12893 ix86_delegitimize_tls_address (rtx orig_x)
12895 rtx x = orig_x, unspec;
12896 struct ix86_address addr;
12898 if (!TARGET_TLS_DIRECT_SEG_REFS)
12902 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12904 if (ix86_decompose_address (x, &addr) == 0
12905 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
12906 || addr.disp == NULL_RTX
12907 || GET_CODE (addr.disp) != CONST)
12909 unspec = XEXP (addr.disp, 0);
12910 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12911 unspec = XEXP (unspec, 0);
12912 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12914 x = XVECEXP (unspec, 0, 0);
12915 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12916 if (unspec != XEXP (addr.disp, 0))
12917 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12920 rtx idx = addr.index;
12921 if (addr.scale != 1)
12922 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12923 x = gen_rtx_PLUS (Pmode, idx, x);
12926 x = gen_rtx_PLUS (Pmode, addr.base, x);
12927 if (MEM_P (orig_x))
12928 x = replace_equiv_address_nv (orig_x, x);
12932 /* In the name of slightly smaller debug output, and to cater to
12933 general assembler lossage, recognize PIC+GOTOFF and turn it back
12934 into a direct symbol reference.
12936 On Darwin, this is necessary to avoid a crash, because Darwin
12937 has a different PIC label for each routine but the DWARF debugging
12938 information is not associated with any particular routine, so it's
12939 necessary to remove references to the PIC label from RTL stored by
12940 the DWARF output code. */
12943 ix86_delegitimize_address (rtx x)
12945 rtx orig_x = delegitimize_mem_from_attrs (x);
12946 /* addend is NULL or some rtx if x is something+GOTOFF where
12947 something doesn't include the PIC register. */
12948 rtx addend = NULL_RTX;
12949 /* reg_addend is NULL or a multiple of some register. */
12950 rtx reg_addend = NULL_RTX;
12951 /* const_addend is NULL or a const_int. */
12952 rtx const_addend = NULL_RTX;
12953 /* This is the result, or NULL. */
12954 rtx result = NULL_RTX;
12963 if (GET_CODE (x) != CONST
12964 || GET_CODE (XEXP (x, 0)) != UNSPEC
12965 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
12966 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
12967 || !MEM_P (orig_x))
12968 return ix86_delegitimize_tls_address (orig_x);
12969 x = XVECEXP (XEXP (x, 0), 0, 0);
12970 if (GET_MODE (orig_x) != Pmode)
12972 x = simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
12979 if (GET_CODE (x) != PLUS
12980 || GET_CODE (XEXP (x, 1)) != CONST)
12981 return ix86_delegitimize_tls_address (orig_x);
12983 if (ix86_pic_register_p (XEXP (x, 0)))
12984 /* %ebx + GOT/GOTOFF */
12986 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12988 /* %ebx + %reg * scale + GOT/GOTOFF */
12989 reg_addend = XEXP (x, 0);
12990 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12991 reg_addend = XEXP (reg_addend, 1);
12992 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12993 reg_addend = XEXP (reg_addend, 0);
12996 reg_addend = NULL_RTX;
12997 addend = XEXP (x, 0);
13001 addend = XEXP (x, 0);
13003 x = XEXP (XEXP (x, 1), 0);
13004 if (GET_CODE (x) == PLUS
13005 && CONST_INT_P (XEXP (x, 1)))
13007 const_addend = XEXP (x, 1);
13011 if (GET_CODE (x) == UNSPEC
13012 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13013 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13014 result = XVECEXP (x, 0, 0);
13016 if (TARGET_MACHO && darwin_local_data_pic (x)
13017 && !MEM_P (orig_x))
13018 result = XVECEXP (x, 0, 0);
13021 return ix86_delegitimize_tls_address (orig_x);
13024 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13026 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13029 /* If the rest of original X doesn't involve the PIC register, add
13030 addend and subtract pic_offset_table_rtx. This can happen e.g.
13032 leal (%ebx, %ecx, 4), %ecx
13034 movl foo@GOTOFF(%ecx), %edx
13035 in which case we return (%ecx - %ebx) + foo. */
13036 if (pic_offset_table_rtx)
13037 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13038 pic_offset_table_rtx),
13043 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13045 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13046 if (result == NULL_RTX)
13052 /* If X is a machine specific address (i.e. a symbol or label being
13053 referenced as a displacement from the GOT implemented using an
13054 UNSPEC), then return the base term. Otherwise return X. */
13057 ix86_find_base_term (rtx x)
13063 if (GET_CODE (x) != CONST)
13065 term = XEXP (x, 0);
13066 if (GET_CODE (term) == PLUS
13067 && (CONST_INT_P (XEXP (term, 1))
13068 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13069 term = XEXP (term, 0);
13070 if (GET_CODE (term) != UNSPEC
13071 || (XINT (term, 1) != UNSPEC_GOTPCREL
13072 && XINT (term, 1) != UNSPEC_PCREL))
13075 return XVECEXP (term, 0, 0);
13078 return ix86_delegitimize_address (x);
13082 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13083 int fp, FILE *file)
13085 const char *suffix;
13087 if (mode == CCFPmode || mode == CCFPUmode)
13089 code = ix86_fp_compare_code_to_integer (code);
13093 code = reverse_condition (code);
13144 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13148 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13149 Those same assemblers have the same but opposite lossage on cmov. */
13150 if (mode == CCmode)
13151 suffix = fp ? "nbe" : "a";
13152 else if (mode == CCCmode)
13155 gcc_unreachable ();
13171 gcc_unreachable ();
13175 gcc_assert (mode == CCmode || mode == CCCmode);
13192 gcc_unreachable ();
13196 /* ??? As above. */
13197 gcc_assert (mode == CCmode || mode == CCCmode);
13198 suffix = fp ? "nb" : "ae";
13201 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13205 /* ??? As above. */
13206 if (mode == CCmode)
13208 else if (mode == CCCmode)
13209 suffix = fp ? "nb" : "ae";
13211 gcc_unreachable ();
13214 suffix = fp ? "u" : "p";
13217 suffix = fp ? "nu" : "np";
13220 gcc_unreachable ();
13222 fputs (suffix, file);
13225 /* Print the name of register X to FILE based on its machine mode and number.
13226 If CODE is 'w', pretend the mode is HImode.
13227 If CODE is 'b', pretend the mode is QImode.
13228 If CODE is 'k', pretend the mode is SImode.
13229 If CODE is 'q', pretend the mode is DImode.
13230 If CODE is 'x', pretend the mode is V4SFmode.
13231 If CODE is 't', pretend the mode is V8SFmode.
13232 If CODE is 'h', pretend the reg is the 'high' byte register.
13233 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13234 If CODE is 'd', duplicate the operand for AVX instruction.
13238 print_reg (rtx x, int code, FILE *file)
13241 bool duplicated = code == 'd' && TARGET_AVX;
13243 gcc_assert (x == pc_rtx
13244 || (REGNO (x) != ARG_POINTER_REGNUM
13245 && REGNO (x) != FRAME_POINTER_REGNUM
13246 && REGNO (x) != FLAGS_REG
13247 && REGNO (x) != FPSR_REG
13248 && REGNO (x) != FPCR_REG));
13250 if (ASSEMBLER_DIALECT == ASM_ATT)
13255 gcc_assert (TARGET_64BIT);
13256 fputs ("rip", file);
13260 if (code == 'w' || MMX_REG_P (x))
13262 else if (code == 'b')
13264 else if (code == 'k')
13266 else if (code == 'q')
13268 else if (code == 'y')
13270 else if (code == 'h')
13272 else if (code == 'x')
13274 else if (code == 't')
13277 code = GET_MODE_SIZE (GET_MODE (x));
13279 /* Irritatingly, AMD extended registers use different naming convention
13280 from the normal registers. */
13281 if (REX_INT_REG_P (x))
13283 gcc_assert (TARGET_64BIT);
13287 error ("extended registers have no high halves");
13290 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13293 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13296 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13299 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13302 error ("unsupported operand size for extended register");
13312 if (STACK_TOP_P (x))
13321 if (! ANY_FP_REG_P (x))
13322 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13327 reg = hi_reg_name[REGNO (x)];
13330 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13332 reg = qi_reg_name[REGNO (x)];
13335 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13337 reg = qi_high_reg_name[REGNO (x)];
13342 gcc_assert (!duplicated);
13344 fputs (hi_reg_name[REGNO (x)] + 1, file);
13349 gcc_unreachable ();
13355 if (ASSEMBLER_DIALECT == ASM_ATT)
13356 fprintf (file, ", %%%s", reg);
13358 fprintf (file, ", %s", reg);
13362 /* Locate some local-dynamic symbol still in use by this function
13363 so that we can print its name in some tls_local_dynamic_base
13367 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13371 if (GET_CODE (x) == SYMBOL_REF
13372 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13374 cfun->machine->some_ld_name = XSTR (x, 0);
13381 static const char *
13382 get_some_local_dynamic_name (void)
13386 if (cfun->machine->some_ld_name)
13387 return cfun->machine->some_ld_name;
13389 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13390 if (NONDEBUG_INSN_P (insn)
13391 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13392 return cfun->machine->some_ld_name;
13397 /* Meaning of CODE:
13398 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13399 C -- print opcode suffix for set/cmov insn.
13400 c -- like C, but print reversed condition
13401 F,f -- likewise, but for floating-point.
13402 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13404 R -- print the prefix for register names.
13405 z -- print the opcode suffix for the size of the current operand.
13406 Z -- likewise, with special suffixes for x87 instructions.
13407 * -- print a star (in certain assembler syntax)
13408 A -- print an absolute memory reference.
13409 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13410 s -- print a shift double count, followed by the assemblers argument
13412 b -- print the QImode name of the register for the indicated operand.
13413 %b0 would print %al if operands[0] is reg 0.
13414 w -- likewise, print the HImode name of the register.
13415 k -- likewise, print the SImode name of the register.
13416 q -- likewise, print the DImode name of the register.
13417 x -- likewise, print the V4SFmode name of the register.
13418 t -- likewise, print the V8SFmode name of the register.
13419 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13420 y -- print "st(0)" instead of "st" as a register.
13421 d -- print duplicated register operand for AVX instruction.
13422 D -- print condition for SSE cmp instruction.
13423 P -- if PIC, print an @PLT suffix.
13424 p -- print raw symbol name.
13425 X -- don't print any sort of PIC '@' suffix for a symbol.
13426 & -- print some in-use local-dynamic symbol name.
13427 H -- print a memory address offset by 8; used for sse high-parts
13428 Y -- print condition for XOP pcom* instruction.
13429 + -- print a branch hint as 'cs' or 'ds' prefix
13430 ; -- print a semicolon (after prefixes due to bug in older gas).
13431 @ -- print a segment register of thread base pointer load
13435 ix86_print_operand (FILE *file, rtx x, int code)
13442 if (ASSEMBLER_DIALECT == ASM_ATT)
13448 const char *name = get_some_local_dynamic_name ();
13450 output_operand_lossage ("'%%&' used without any "
13451 "local dynamic TLS references");
13453 assemble_name (file, name);
13458 switch (ASSEMBLER_DIALECT)
13465 /* Intel syntax. For absolute addresses, registers should not
13466 be surrounded by braces. */
13470 ix86_print_operand (file, x, 0);
13477 gcc_unreachable ();
13480 ix86_print_operand (file, x, 0);
13485 if (ASSEMBLER_DIALECT == ASM_ATT)
13490 if (ASSEMBLER_DIALECT == ASM_ATT)
13495 if (ASSEMBLER_DIALECT == ASM_ATT)
13500 if (ASSEMBLER_DIALECT == ASM_ATT)
13505 if (ASSEMBLER_DIALECT == ASM_ATT)
13510 if (ASSEMBLER_DIALECT == ASM_ATT)
13515 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13517 /* Opcodes don't get size suffixes if using Intel opcodes. */
13518 if (ASSEMBLER_DIALECT == ASM_INTEL)
13521 switch (GET_MODE_SIZE (GET_MODE (x)))
13540 output_operand_lossage
13541 ("invalid operand size for operand code '%c'", code);
13546 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13548 (0, "non-integer operand used with operand code '%c'", code);
13552 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13553 if (ASSEMBLER_DIALECT == ASM_INTEL)
13556 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13558 switch (GET_MODE_SIZE (GET_MODE (x)))
13561 #ifdef HAVE_AS_IX86_FILDS
13571 #ifdef HAVE_AS_IX86_FILDQ
13574 fputs ("ll", file);
13582 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13584 /* 387 opcodes don't get size suffixes
13585 if the operands are registers. */
13586 if (STACK_REG_P (x))
13589 switch (GET_MODE_SIZE (GET_MODE (x)))
13610 output_operand_lossage
13611 ("invalid operand type used with operand code '%c'", code);
13615 output_operand_lossage
13616 ("invalid operand size for operand code '%c'", code);
13634 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13636 ix86_print_operand (file, x, 0);
13637 fputs (", ", file);
13642 /* Little bit of braindamage here. The SSE compare instructions
13643 does use completely different names for the comparisons that the
13644 fp conditional moves. */
13647 switch (GET_CODE (x))
13650 fputs ("eq", file);
13653 fputs ("eq_us", file);
13656 fputs ("lt", file);
13659 fputs ("nge", file);
13662 fputs ("le", file);
13665 fputs ("ngt", file);
13668 fputs ("unord", file);
13671 fputs ("neq", file);
13674 fputs ("neq_oq", file);
13677 fputs ("ge", file);
13680 fputs ("nlt", file);
13683 fputs ("gt", file);
13686 fputs ("nle", file);
13689 fputs ("ord", file);
13692 output_operand_lossage ("operand is not a condition code, "
13693 "invalid operand code 'D'");
13699 switch (GET_CODE (x))
13703 fputs ("eq", file);
13707 fputs ("lt", file);
13711 fputs ("le", file);
13714 fputs ("unord", file);
13718 fputs ("neq", file);
13722 fputs ("nlt", file);
13726 fputs ("nle", file);
13729 fputs ("ord", file);
13732 output_operand_lossage ("operand is not a condition code, "
13733 "invalid operand code 'D'");
13739 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13740 if (ASSEMBLER_DIALECT == ASM_ATT)
13742 switch (GET_MODE (x))
13744 case HImode: putc ('w', file); break;
13746 case SFmode: putc ('l', file); break;
13748 case DFmode: putc ('q', file); break;
13749 default: gcc_unreachable ();
13756 if (!COMPARISON_P (x))
13758 output_operand_lossage ("operand is neither a constant nor a "
13759 "condition code, invalid operand code "
13763 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13766 if (!COMPARISON_P (x))
13768 output_operand_lossage ("operand is neither a constant nor a "
13769 "condition code, invalid operand code "
13773 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13774 if (ASSEMBLER_DIALECT == ASM_ATT)
13777 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13780 /* Like above, but reverse condition */
13782 /* Check to see if argument to %c is really a constant
13783 and not a condition code which needs to be reversed. */
13784 if (!COMPARISON_P (x))
13786 output_operand_lossage ("operand is neither a constant nor a "
13787 "condition code, invalid operand "
13791 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13794 if (!COMPARISON_P (x))
13796 output_operand_lossage ("operand is neither a constant nor a "
13797 "condition code, invalid operand "
13801 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13802 if (ASSEMBLER_DIALECT == ASM_ATT)
13805 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13809 /* It doesn't actually matter what mode we use here, as we're
13810 only going to use this for printing. */
13811 x = adjust_address_nv (x, DImode, 8);
13819 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13822 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13825 int pred_val = INTVAL (XEXP (x, 0));
13827 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13828 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13830 int taken = pred_val > REG_BR_PROB_BASE / 2;
13831 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13833 /* Emit hints only in the case default branch prediction
13834 heuristics would fail. */
13835 if (taken != cputaken)
13837 /* We use 3e (DS) prefix for taken branches and
13838 2e (CS) prefix for not taken branches. */
13840 fputs ("ds ; ", file);
13842 fputs ("cs ; ", file);
13850 switch (GET_CODE (x))
13853 fputs ("neq", file);
13856 fputs ("eq", file);
13860 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13864 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13868 fputs ("le", file);
13872 fputs ("lt", file);
13875 fputs ("unord", file);
13878 fputs ("ord", file);
13881 fputs ("ueq", file);
13884 fputs ("nlt", file);
13887 fputs ("nle", file);
13890 fputs ("ule", file);
13893 fputs ("ult", file);
13896 fputs ("une", file);
13899 output_operand_lossage ("operand is not a condition code, "
13900 "invalid operand code 'Y'");
13906 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13912 if (ASSEMBLER_DIALECT == ASM_ATT)
13915 /* The kernel uses a different segment register for performance
13916 reasons; a system call would not have to trash the userspace
13917 segment register, which would be expensive. */
13918 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13919 fputs ("fs", file);
13921 fputs ("gs", file);
13925 output_operand_lossage ("invalid operand code '%c'", code);
13930 print_reg (x, code, file);
13932 else if (MEM_P (x))
13934 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13935 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
13936 && GET_MODE (x) != BLKmode)
13939 switch (GET_MODE_SIZE (GET_MODE (x)))
13941 case 1: size = "BYTE"; break;
13942 case 2: size = "WORD"; break;
13943 case 4: size = "DWORD"; break;
13944 case 8: size = "QWORD"; break;
13945 case 12: size = "TBYTE"; break;
13947 if (GET_MODE (x) == XFmode)
13952 case 32: size = "YMMWORD"; break;
13954 gcc_unreachable ();
13957 /* Check for explicit size override (codes 'b', 'w' and 'k') */
13960 else if (code == 'w')
13962 else if (code == 'k')
13965 fputs (size, file);
13966 fputs (" PTR ", file);
13970 /* Avoid (%rip) for call operands. */
13971 if (CONSTANT_ADDRESS_P (x) && code == 'P'
13972 && !CONST_INT_P (x))
13973 output_addr_const (file, x);
13974 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
13975 output_operand_lossage ("invalid constraints for operand");
13977 output_address (x);
13980 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
13985 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13986 REAL_VALUE_TO_TARGET_SINGLE (r, l);
13988 if (ASSEMBLER_DIALECT == ASM_ATT)
13990 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13992 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
13994 fprintf (file, "0x%08x", (unsigned int) l);
13997 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14002 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14003 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14005 if (ASSEMBLER_DIALECT == ASM_ATT)
14007 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14010 /* These float cases don't actually occur as immediate operands. */
14011 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14015 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14016 fputs (dstr, file);
14021 /* We have patterns that allow zero sets of memory, for instance.
14022 In 64-bit mode, we should probably support all 8-byte vectors,
14023 since we can in fact encode that into an immediate. */
14024 if (GET_CODE (x) == CONST_VECTOR)
14026 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14030 if (code != 'P' && code != 'p')
14032 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14034 if (ASSEMBLER_DIALECT == ASM_ATT)
14037 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14038 || GET_CODE (x) == LABEL_REF)
14040 if (ASSEMBLER_DIALECT == ASM_ATT)
14043 fputs ("OFFSET FLAT:", file);
14046 if (CONST_INT_P (x))
14047 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14048 else if (flag_pic || MACHOPIC_INDIRECT)
14049 output_pic_addr_const (file, x, code);
14051 output_addr_const (file, x);
14056 ix86_print_operand_punct_valid_p (unsigned char code)
14058 return (code == '@' || code == '*' || code == '+'
14059 || code == '&' || code == ';');
14062 /* Print a memory operand whose address is ADDR. */
14065 ix86_print_operand_address (FILE *file, rtx addr)
14067 struct ix86_address parts;
14068 rtx base, index, disp;
14070 int ok = ix86_decompose_address (addr, &parts);
14075 index = parts.index;
14077 scale = parts.scale;
14085 if (ASSEMBLER_DIALECT == ASM_ATT)
14087 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14090 gcc_unreachable ();
14093 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14094 if (TARGET_64BIT && !base && !index)
14098 if (GET_CODE (disp) == CONST
14099 && GET_CODE (XEXP (disp, 0)) == PLUS
14100 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14101 symbol = XEXP (XEXP (disp, 0), 0);
14103 if (GET_CODE (symbol) == LABEL_REF
14104 || (GET_CODE (symbol) == SYMBOL_REF
14105 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14108 if (!base && !index)
14110 /* Displacement only requires special attention. */
14112 if (CONST_INT_P (disp))
14114 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14115 fputs ("ds:", file);
14116 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14119 output_pic_addr_const (file, disp, 0);
14121 output_addr_const (file, disp);
14125 /* Print DImode registers on 64bit targets to avoid addr32 prefixes. */
14126 int code = TARGET_64BIT ? 'q' : 0;
14128 if (ASSEMBLER_DIALECT == ASM_ATT)
14133 output_pic_addr_const (file, disp, 0);
14134 else if (GET_CODE (disp) == LABEL_REF)
14135 output_asm_label (disp);
14137 output_addr_const (file, disp);
14142 print_reg (base, code, file);
14146 print_reg (index, code, file);
14148 fprintf (file, ",%d", scale);
14154 rtx offset = NULL_RTX;
14158 /* Pull out the offset of a symbol; print any symbol itself. */
14159 if (GET_CODE (disp) == CONST
14160 && GET_CODE (XEXP (disp, 0)) == PLUS
14161 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14163 offset = XEXP (XEXP (disp, 0), 1);
14164 disp = gen_rtx_CONST (VOIDmode,
14165 XEXP (XEXP (disp, 0), 0));
14169 output_pic_addr_const (file, disp, 0);
14170 else if (GET_CODE (disp) == LABEL_REF)
14171 output_asm_label (disp);
14172 else if (CONST_INT_P (disp))
14175 output_addr_const (file, disp);
14181 print_reg (base, code, file);
14184 if (INTVAL (offset) >= 0)
14186 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14190 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14197 print_reg (index, code, file);
14199 fprintf (file, "*%d", scale);
14206 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14209 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14213 if (GET_CODE (x) != UNSPEC)
14216 op = XVECEXP (x, 0, 0);
14217 switch (XINT (x, 1))
14219 case UNSPEC_GOTTPOFF:
14220 output_addr_const (file, op);
14221 /* FIXME: This might be @TPOFF in Sun ld. */
14222 fputs ("@gottpoff", file);
14225 output_addr_const (file, op);
14226 fputs ("@tpoff", file);
14228 case UNSPEC_NTPOFF:
14229 output_addr_const (file, op);
14231 fputs ("@tpoff", file);
14233 fputs ("@ntpoff", file);
14235 case UNSPEC_DTPOFF:
14236 output_addr_const (file, op);
14237 fputs ("@dtpoff", file);
14239 case UNSPEC_GOTNTPOFF:
14240 output_addr_const (file, op);
14242 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14243 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14245 fputs ("@gotntpoff", file);
14247 case UNSPEC_INDNTPOFF:
14248 output_addr_const (file, op);
14249 fputs ("@indntpoff", file);
14252 case UNSPEC_MACHOPIC_OFFSET:
14253 output_addr_const (file, op);
14255 machopic_output_function_base_name (file);
14259 case UNSPEC_STACK_CHECK:
14263 gcc_assert (flag_split_stack);
14265 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14266 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14268 gcc_unreachable ();
14271 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14282 /* Split one or more double-mode RTL references into pairs of half-mode
14283 references. The RTL can be REG, offsettable MEM, integer constant, or
14284 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14285 split and "num" is its length. lo_half and hi_half are output arrays
14286 that parallel "operands". */
14289 split_double_mode (enum machine_mode mode, rtx operands[],
14290 int num, rtx lo_half[], rtx hi_half[])
14292 enum machine_mode half_mode;
14298 half_mode = DImode;
14301 half_mode = SImode;
14304 gcc_unreachable ();
14307 byte = GET_MODE_SIZE (half_mode);
14311 rtx op = operands[num];
14313 /* simplify_subreg refuse to split volatile memory addresses,
14314 but we still have to handle it. */
14317 lo_half[num] = adjust_address (op, half_mode, 0);
14318 hi_half[num] = adjust_address (op, half_mode, byte);
14322 lo_half[num] = simplify_gen_subreg (half_mode, op,
14323 GET_MODE (op) == VOIDmode
14324 ? mode : GET_MODE (op), 0);
14325 hi_half[num] = simplify_gen_subreg (half_mode, op,
14326 GET_MODE (op) == VOIDmode
14327 ? mode : GET_MODE (op), byte);
14332 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14333 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14334 is the expression of the binary operation. The output may either be
14335 emitted here, or returned to the caller, like all output_* functions.
14337 There is no guarantee that the operands are the same mode, as they
14338 might be within FLOAT or FLOAT_EXTEND expressions. */
14340 #ifndef SYSV386_COMPAT
14341 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14342 wants to fix the assemblers because that causes incompatibility
14343 with gcc. No-one wants to fix gcc because that causes
14344 incompatibility with assemblers... You can use the option of
14345 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14346 #define SYSV386_COMPAT 1
14350 output_387_binary_op (rtx insn, rtx *operands)
14352 static char buf[40];
14355 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14357 #ifdef ENABLE_CHECKING
14358 /* Even if we do not want to check the inputs, this documents input
14359 constraints. Which helps in understanding the following code. */
14360 if (STACK_REG_P (operands[0])
14361 && ((REG_P (operands[1])
14362 && REGNO (operands[0]) == REGNO (operands[1])
14363 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14364 || (REG_P (operands[2])
14365 && REGNO (operands[0]) == REGNO (operands[2])
14366 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14367 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14370 gcc_assert (is_sse);
14373 switch (GET_CODE (operands[3]))
14376 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14377 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14385 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14386 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14394 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14395 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14403 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14404 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14412 gcc_unreachable ();
14419 strcpy (buf, ssep);
14420 if (GET_MODE (operands[0]) == SFmode)
14421 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14423 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14427 strcpy (buf, ssep + 1);
14428 if (GET_MODE (operands[0]) == SFmode)
14429 strcat (buf, "ss\t{%2, %0|%0, %2}");
14431 strcat (buf, "sd\t{%2, %0|%0, %2}");
14437 switch (GET_CODE (operands[3]))
14441 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14443 rtx temp = operands[2];
14444 operands[2] = operands[1];
14445 operands[1] = temp;
14448 /* know operands[0] == operands[1]. */
14450 if (MEM_P (operands[2]))
14456 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14458 if (STACK_TOP_P (operands[0]))
14459 /* How is it that we are storing to a dead operand[2]?
14460 Well, presumably operands[1] is dead too. We can't
14461 store the result to st(0) as st(0) gets popped on this
14462 instruction. Instead store to operands[2] (which I
14463 think has to be st(1)). st(1) will be popped later.
14464 gcc <= 2.8.1 didn't have this check and generated
14465 assembly code that the Unixware assembler rejected. */
14466 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14468 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14472 if (STACK_TOP_P (operands[0]))
14473 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14475 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14480 if (MEM_P (operands[1]))
14486 if (MEM_P (operands[2]))
14492 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14495 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14496 derived assemblers, confusingly reverse the direction of
14497 the operation for fsub{r} and fdiv{r} when the
14498 destination register is not st(0). The Intel assembler
14499 doesn't have this brain damage. Read !SYSV386_COMPAT to
14500 figure out what the hardware really does. */
14501 if (STACK_TOP_P (operands[0]))
14502 p = "{p\t%0, %2|rp\t%2, %0}";
14504 p = "{rp\t%2, %0|p\t%0, %2}";
14506 if (STACK_TOP_P (operands[0]))
14507 /* As above for fmul/fadd, we can't store to st(0). */
14508 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14510 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14515 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14518 if (STACK_TOP_P (operands[0]))
14519 p = "{rp\t%0, %1|p\t%1, %0}";
14521 p = "{p\t%1, %0|rp\t%0, %1}";
14523 if (STACK_TOP_P (operands[0]))
14524 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14526 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14531 if (STACK_TOP_P (operands[0]))
14533 if (STACK_TOP_P (operands[1]))
14534 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14536 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14539 else if (STACK_TOP_P (operands[1]))
14542 p = "{\t%1, %0|r\t%0, %1}";
14544 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14550 p = "{r\t%2, %0|\t%0, %2}";
14552 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14558 gcc_unreachable ();
14565 /* Return needed mode for entity in optimize_mode_switching pass. */
14568 ix86_mode_needed (int entity, rtx insn)
14570 enum attr_i387_cw mode;
14572 /* The mode UNINITIALIZED is used to store control word after a
14573 function call or ASM pattern. The mode ANY specify that function
14574 has no requirements on the control word and make no changes in the
14575 bits we are interested in. */
14578 || (NONJUMP_INSN_P (insn)
14579 && (asm_noperands (PATTERN (insn)) >= 0
14580 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14581 return I387_CW_UNINITIALIZED;
14583 if (recog_memoized (insn) < 0)
14584 return I387_CW_ANY;
14586 mode = get_attr_i387_cw (insn);
14591 if (mode == I387_CW_TRUNC)
14596 if (mode == I387_CW_FLOOR)
14601 if (mode == I387_CW_CEIL)
14606 if (mode == I387_CW_MASK_PM)
14611 gcc_unreachable ();
14614 return I387_CW_ANY;
14617 /* Output code to initialize control word copies used by trunc?f?i and
14618 rounding patterns. CURRENT_MODE is set to current control word,
14619 while NEW_MODE is set to new control word. */
14622 emit_i387_cw_initialization (int mode)
14624 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14627 enum ix86_stack_slot slot;
14629 rtx reg = gen_reg_rtx (HImode);
14631 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14632 emit_move_insn (reg, copy_rtx (stored_mode));
14634 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14635 || optimize_function_for_size_p (cfun))
14639 case I387_CW_TRUNC:
14640 /* round toward zero (truncate) */
14641 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14642 slot = SLOT_CW_TRUNC;
14645 case I387_CW_FLOOR:
14646 /* round down toward -oo */
14647 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14648 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14649 slot = SLOT_CW_FLOOR;
14653 /* round up toward +oo */
14654 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14655 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14656 slot = SLOT_CW_CEIL;
14659 case I387_CW_MASK_PM:
14660 /* mask precision exception for nearbyint() */
14661 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14662 slot = SLOT_CW_MASK_PM;
14666 gcc_unreachable ();
14673 case I387_CW_TRUNC:
14674 /* round toward zero (truncate) */
14675 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14676 slot = SLOT_CW_TRUNC;
14679 case I387_CW_FLOOR:
14680 /* round down toward -oo */
14681 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14682 slot = SLOT_CW_FLOOR;
14686 /* round up toward +oo */
14687 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14688 slot = SLOT_CW_CEIL;
14691 case I387_CW_MASK_PM:
14692 /* mask precision exception for nearbyint() */
14693 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14694 slot = SLOT_CW_MASK_PM;
14698 gcc_unreachable ();
14702 gcc_assert (slot < MAX_386_STACK_LOCALS);
14704 new_mode = assign_386_stack_local (HImode, slot);
14705 emit_move_insn (new_mode, reg);
14708 /* Output code for INSN to convert a float to a signed int. OPERANDS
14709 are the insn operands. The output may be [HSD]Imode and the input
14710 operand may be [SDX]Fmode. */
14713 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
14715 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14716 int dimode_p = GET_MODE (operands[0]) == DImode;
14717 int round_mode = get_attr_i387_cw (insn);
14719 /* Jump through a hoop or two for DImode, since the hardware has no
14720 non-popping instruction. We used to do this a different way, but
14721 that was somewhat fragile and broke with post-reload splitters. */
14722 if ((dimode_p || fisttp) && !stack_top_dies)
14723 output_asm_insn ("fld\t%y1", operands);
14725 gcc_assert (STACK_TOP_P (operands[1]));
14726 gcc_assert (MEM_P (operands[0]));
14727 gcc_assert (GET_MODE (operands[1]) != TFmode);
14730 output_asm_insn ("fisttp%Z0\t%0", operands);
14733 if (round_mode != I387_CW_ANY)
14734 output_asm_insn ("fldcw\t%3", operands);
14735 if (stack_top_dies || dimode_p)
14736 output_asm_insn ("fistp%Z0\t%0", operands);
14738 output_asm_insn ("fist%Z0\t%0", operands);
14739 if (round_mode != I387_CW_ANY)
14740 output_asm_insn ("fldcw\t%2", operands);
14746 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14747 have the values zero or one, indicates the ffreep insn's operand
14748 from the OPERANDS array. */
14750 static const char *
14751 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14753 if (TARGET_USE_FFREEP)
14754 #ifdef HAVE_AS_IX86_FFREEP
14755 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14758 static char retval[32];
14759 int regno = REGNO (operands[opno]);
14761 gcc_assert (FP_REGNO_P (regno));
14763 regno -= FIRST_STACK_REG;
14765 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14770 return opno ? "fstp\t%y1" : "fstp\t%y0";
14774 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14775 should be used. UNORDERED_P is true when fucom should be used. */
14778 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
14780 int stack_top_dies;
14781 rtx cmp_op0, cmp_op1;
14782 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14786 cmp_op0 = operands[0];
14787 cmp_op1 = operands[1];
14791 cmp_op0 = operands[1];
14792 cmp_op1 = operands[2];
14797 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14798 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14799 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14800 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14802 if (GET_MODE (operands[0]) == SFmode)
14804 return &ucomiss[TARGET_AVX ? 0 : 1];
14806 return &comiss[TARGET_AVX ? 0 : 1];
14809 return &ucomisd[TARGET_AVX ? 0 : 1];
14811 return &comisd[TARGET_AVX ? 0 : 1];
14814 gcc_assert (STACK_TOP_P (cmp_op0));
14816 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14818 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14820 if (stack_top_dies)
14822 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14823 return output_387_ffreep (operands, 1);
14826 return "ftst\n\tfnstsw\t%0";
14829 if (STACK_REG_P (cmp_op1)
14831 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14832 && REGNO (cmp_op1) != FIRST_STACK_REG)
14834 /* If both the top of the 387 stack dies, and the other operand
14835 is also a stack register that dies, then this must be a
14836 `fcompp' float compare */
14840 /* There is no double popping fcomi variant. Fortunately,
14841 eflags is immune from the fstp's cc clobbering. */
14843 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14845 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14846 return output_387_ffreep (operands, 0);
14851 return "fucompp\n\tfnstsw\t%0";
14853 return "fcompp\n\tfnstsw\t%0";
14858 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14860 static const char * const alt[16] =
14862 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14863 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14864 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14865 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14867 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14868 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14872 "fcomi\t{%y1, %0|%0, %y1}",
14873 "fcomip\t{%y1, %0|%0, %y1}",
14874 "fucomi\t{%y1, %0|%0, %y1}",
14875 "fucomip\t{%y1, %0|%0, %y1}",
14886 mask = eflags_p << 3;
14887 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14888 mask |= unordered_p << 1;
14889 mask |= stack_top_dies;
14891 gcc_assert (mask < 16);
14900 ix86_output_addr_vec_elt (FILE *file, int value)
14902 const char *directive = ASM_LONG;
14906 directive = ASM_QUAD;
14908 gcc_assert (!TARGET_64BIT);
14911 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14915 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14917 const char *directive = ASM_LONG;
14920 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14921 directive = ASM_QUAD;
14923 gcc_assert (!TARGET_64BIT);
14925 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14926 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14927 fprintf (file, "%s%s%d-%s%d\n",
14928 directive, LPREFIX, value, LPREFIX, rel);
14929 else if (HAVE_AS_GOTOFF_IN_DATA)
14930 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14932 else if (TARGET_MACHO)
14934 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14935 machopic_output_function_base_name (file);
14940 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14941 GOT_SYMBOL_NAME, LPREFIX, value);
14944 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
14948 ix86_expand_clear (rtx dest)
14952 /* We play register width games, which are only valid after reload. */
14953 gcc_assert (reload_completed);
14955 /* Avoid HImode and its attendant prefix byte. */
14956 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
14957 dest = gen_rtx_REG (SImode, REGNO (dest));
14958 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
14960 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
14961 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
14963 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14964 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
14970 /* X is an unchanging MEM. If it is a constant pool reference, return
14971 the constant pool rtx, else NULL. */
14974 maybe_get_pool_constant (rtx x)
14976 x = ix86_delegitimize_address (XEXP (x, 0));
14978 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
14979 return get_pool_constant (x);
14985 ix86_expand_move (enum machine_mode mode, rtx operands[])
14988 enum tls_model model;
14993 if (GET_CODE (op1) == SYMBOL_REF)
14995 model = SYMBOL_REF_TLS_MODEL (op1);
14998 op1 = legitimize_tls_address (op1, model, true);
14999 op1 = force_operand (op1, op0);
15002 if (GET_MODE (op1) != mode)
15003 op1 = convert_to_mode (mode, op1, 1);
15005 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15006 && SYMBOL_REF_DLLIMPORT_P (op1))
15007 op1 = legitimize_dllimport_symbol (op1, false);
15009 else if (GET_CODE (op1) == CONST
15010 && GET_CODE (XEXP (op1, 0)) == PLUS
15011 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15013 rtx addend = XEXP (XEXP (op1, 0), 1);
15014 rtx symbol = XEXP (XEXP (op1, 0), 0);
15017 model = SYMBOL_REF_TLS_MODEL (symbol);
15019 tmp = legitimize_tls_address (symbol, model, true);
15020 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15021 && SYMBOL_REF_DLLIMPORT_P (symbol))
15022 tmp = legitimize_dllimport_symbol (symbol, true);
15026 tmp = force_operand (tmp, NULL);
15027 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15028 op0, 1, OPTAB_DIRECT);
15034 if ((flag_pic || MACHOPIC_INDIRECT)
15035 && mode == Pmode && symbolic_operand (op1, Pmode))
15037 if (TARGET_MACHO && !TARGET_64BIT)
15040 /* dynamic-no-pic */
15041 if (MACHOPIC_INDIRECT)
15043 rtx temp = ((reload_in_progress
15044 || ((op0 && REG_P (op0))
15046 ? op0 : gen_reg_rtx (Pmode));
15047 op1 = machopic_indirect_data_reference (op1, temp);
15049 op1 = machopic_legitimize_pic_address (op1, mode,
15050 temp == op1 ? 0 : temp);
15052 if (op0 != op1 && GET_CODE (op0) != MEM)
15054 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15058 if (GET_CODE (op0) == MEM)
15059 op1 = force_reg (Pmode, op1);
15063 if (GET_CODE (temp) != REG)
15064 temp = gen_reg_rtx (Pmode);
15065 temp = legitimize_pic_address (op1, temp);
15070 /* dynamic-no-pic */
15076 op1 = force_reg (Pmode, op1);
15077 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
15079 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15080 op1 = legitimize_pic_address (op1, reg);
15089 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15090 || !push_operand (op0, mode))
15092 op1 = force_reg (mode, op1);
15094 if (push_operand (op0, mode)
15095 && ! general_no_elim_operand (op1, mode))
15096 op1 = copy_to_mode_reg (mode, op1);
15098 /* Force large constants in 64bit compilation into register
15099 to get them CSEed. */
15100 if (can_create_pseudo_p ()
15101 && (mode == DImode) && TARGET_64BIT
15102 && immediate_operand (op1, mode)
15103 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15104 && !register_operand (op0, mode)
15106 op1 = copy_to_mode_reg (mode, op1);
15108 if (can_create_pseudo_p ()
15109 && FLOAT_MODE_P (mode)
15110 && GET_CODE (op1) == CONST_DOUBLE)
15112 /* If we are loading a floating point constant to a register,
15113 force the value to memory now, since we'll get better code
15114 out the back end. */
15116 op1 = validize_mem (force_const_mem (mode, op1));
15117 if (!register_operand (op0, mode))
15119 rtx temp = gen_reg_rtx (mode);
15120 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15121 emit_move_insn (op0, temp);
15127 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15131 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15133 rtx op0 = operands[0], op1 = operands[1];
15134 unsigned int align = GET_MODE_ALIGNMENT (mode);
15136 /* Force constants other than zero into memory. We do not know how
15137 the instructions used to build constants modify the upper 64 bits
15138 of the register, once we have that information we may be able
15139 to handle some of them more efficiently. */
15140 if (can_create_pseudo_p ()
15141 && register_operand (op0, mode)
15142 && (CONSTANT_P (op1)
15143 || (GET_CODE (op1) == SUBREG
15144 && CONSTANT_P (SUBREG_REG (op1))))
15145 && !standard_sse_constant_p (op1))
15146 op1 = validize_mem (force_const_mem (mode, op1));
15148 /* We need to check memory alignment for SSE mode since attribute
15149 can make operands unaligned. */
15150 if (can_create_pseudo_p ()
15151 && SSE_REG_MODE_P (mode)
15152 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15153 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15157 /* ix86_expand_vector_move_misalign() does not like constants ... */
15158 if (CONSTANT_P (op1)
15159 || (GET_CODE (op1) == SUBREG
15160 && CONSTANT_P (SUBREG_REG (op1))))
15161 op1 = validize_mem (force_const_mem (mode, op1));
15163 /* ... nor both arguments in memory. */
15164 if (!register_operand (op0, mode)
15165 && !register_operand (op1, mode))
15166 op1 = force_reg (mode, op1);
15168 tmp[0] = op0; tmp[1] = op1;
15169 ix86_expand_vector_move_misalign (mode, tmp);
15173 /* Make operand1 a register if it isn't already. */
15174 if (can_create_pseudo_p ()
15175 && !register_operand (op0, mode)
15176 && !register_operand (op1, mode))
15178 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15182 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15185 /* Split 32-byte AVX unaligned load and store if needed. */
15188 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15191 rtx (*extract) (rtx, rtx, rtx);
15192 rtx (*move_unaligned) (rtx, rtx);
15193 enum machine_mode mode;
15195 switch (GET_MODE (op0))
15198 gcc_unreachable ();
15200 extract = gen_avx_vextractf128v32qi;
15201 move_unaligned = gen_avx_movdqu256;
15205 extract = gen_avx_vextractf128v8sf;
15206 move_unaligned = gen_avx_movups256;
15210 extract = gen_avx_vextractf128v4df;
15211 move_unaligned = gen_avx_movupd256;
15216 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15218 rtx r = gen_reg_rtx (mode);
15219 m = adjust_address (op1, mode, 0);
15220 emit_move_insn (r, m);
15221 m = adjust_address (op1, mode, 16);
15222 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15223 emit_move_insn (op0, r);
15225 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15227 m = adjust_address (op0, mode, 0);
15228 emit_insn (extract (m, op1, const0_rtx));
15229 m = adjust_address (op0, mode, 16);
15230 emit_insn (extract (m, op1, const1_rtx));
15233 emit_insn (move_unaligned (op0, op1));
15236 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15237 straight to ix86_expand_vector_move. */
15238 /* Code generation for scalar reg-reg moves of single and double precision data:
15239 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15243 if (x86_sse_partial_reg_dependency == true)
15248 Code generation for scalar loads of double precision data:
15249 if (x86_sse_split_regs == true)
15250 movlpd mem, reg (gas syntax)
15254 Code generation for unaligned packed loads of single precision data
15255 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15256 if (x86_sse_unaligned_move_optimal)
15259 if (x86_sse_partial_reg_dependency == true)
15271 Code generation for unaligned packed loads of double precision data
15272 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15273 if (x86_sse_unaligned_move_optimal)
15276 if (x86_sse_split_regs == true)
15289 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15298 switch (GET_MODE_CLASS (mode))
15300 case MODE_VECTOR_INT:
15302 switch (GET_MODE_SIZE (mode))
15305 /* If we're optimizing for size, movups is the smallest. */
15306 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15308 op0 = gen_lowpart (V4SFmode, op0);
15309 op1 = gen_lowpart (V4SFmode, op1);
15310 emit_insn (gen_sse_movups (op0, op1));
15313 op0 = gen_lowpart (V16QImode, op0);
15314 op1 = gen_lowpart (V16QImode, op1);
15315 emit_insn (gen_sse2_movdqu (op0, op1));
15318 op0 = gen_lowpart (V32QImode, op0);
15319 op1 = gen_lowpart (V32QImode, op1);
15320 ix86_avx256_split_vector_move_misalign (op0, op1);
15323 gcc_unreachable ();
15326 case MODE_VECTOR_FLOAT:
15327 op0 = gen_lowpart (mode, op0);
15328 op1 = gen_lowpart (mode, op1);
15333 emit_insn (gen_sse_movups (op0, op1));
15336 ix86_avx256_split_vector_move_misalign (op0, op1);
15339 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15341 op0 = gen_lowpart (V4SFmode, op0);
15342 op1 = gen_lowpart (V4SFmode, op1);
15343 emit_insn (gen_sse_movups (op0, op1));
15346 emit_insn (gen_sse2_movupd (op0, op1));
15349 ix86_avx256_split_vector_move_misalign (op0, op1);
15352 gcc_unreachable ();
15357 gcc_unreachable ();
15365 /* If we're optimizing for size, movups is the smallest. */
15366 if (optimize_insn_for_size_p ()
15367 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15369 op0 = gen_lowpart (V4SFmode, op0);
15370 op1 = gen_lowpart (V4SFmode, op1);
15371 emit_insn (gen_sse_movups (op0, op1));
15375 /* ??? If we have typed data, then it would appear that using
15376 movdqu is the only way to get unaligned data loaded with
15378 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15380 op0 = gen_lowpart (V16QImode, op0);
15381 op1 = gen_lowpart (V16QImode, op1);
15382 emit_insn (gen_sse2_movdqu (op0, op1));
15386 if (TARGET_SSE2 && mode == V2DFmode)
15390 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15392 op0 = gen_lowpart (V2DFmode, op0);
15393 op1 = gen_lowpart (V2DFmode, op1);
15394 emit_insn (gen_sse2_movupd (op0, op1));
15398 /* When SSE registers are split into halves, we can avoid
15399 writing to the top half twice. */
15400 if (TARGET_SSE_SPLIT_REGS)
15402 emit_clobber (op0);
15407 /* ??? Not sure about the best option for the Intel chips.
15408 The following would seem to satisfy; the register is
15409 entirely cleared, breaking the dependency chain. We
15410 then store to the upper half, with a dependency depth
15411 of one. A rumor has it that Intel recommends two movsd
15412 followed by an unpacklpd, but this is unconfirmed. And
15413 given that the dependency depth of the unpacklpd would
15414 still be one, I'm not sure why this would be better. */
15415 zero = CONST0_RTX (V2DFmode);
15418 m = adjust_address (op1, DFmode, 0);
15419 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15420 m = adjust_address (op1, DFmode, 8);
15421 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15425 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15427 op0 = gen_lowpart (V4SFmode, op0);
15428 op1 = gen_lowpart (V4SFmode, op1);
15429 emit_insn (gen_sse_movups (op0, op1));
15433 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15434 emit_move_insn (op0, CONST0_RTX (mode));
15436 emit_clobber (op0);
15438 if (mode != V4SFmode)
15439 op0 = gen_lowpart (V4SFmode, op0);
15440 m = adjust_address (op1, V2SFmode, 0);
15441 emit_insn (gen_sse_loadlps (op0, op0, m));
15442 m = adjust_address (op1, V2SFmode, 8);
15443 emit_insn (gen_sse_loadhps (op0, op0, m));
15446 else if (MEM_P (op0))
15448 /* If we're optimizing for size, movups is the smallest. */
15449 if (optimize_insn_for_size_p ()
15450 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15452 op0 = gen_lowpart (V4SFmode, op0);
15453 op1 = gen_lowpart (V4SFmode, op1);
15454 emit_insn (gen_sse_movups (op0, op1));
15458 /* ??? Similar to above, only less clear because of quote
15459 typeless stores unquote. */
15460 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15461 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15463 op0 = gen_lowpart (V16QImode, op0);
15464 op1 = gen_lowpart (V16QImode, op1);
15465 emit_insn (gen_sse2_movdqu (op0, op1));
15469 if (TARGET_SSE2 && mode == V2DFmode)
15471 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15473 op0 = gen_lowpart (V2DFmode, op0);
15474 op1 = gen_lowpart (V2DFmode, op1);
15475 emit_insn (gen_sse2_movupd (op0, op1));
15479 m = adjust_address (op0, DFmode, 0);
15480 emit_insn (gen_sse2_storelpd (m, op1));
15481 m = adjust_address (op0, DFmode, 8);
15482 emit_insn (gen_sse2_storehpd (m, op1));
15487 if (mode != V4SFmode)
15488 op1 = gen_lowpart (V4SFmode, op1);
15490 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15492 op0 = gen_lowpart (V4SFmode, op0);
15493 emit_insn (gen_sse_movups (op0, op1));
15497 m = adjust_address (op0, V2SFmode, 0);
15498 emit_insn (gen_sse_storelps (m, op1));
15499 m = adjust_address (op0, V2SFmode, 8);
15500 emit_insn (gen_sse_storehps (m, op1));
15505 gcc_unreachable ();
15508 /* Expand a push in MODE. This is some mode for which we do not support
15509 proper push instructions, at least from the registers that we expect
15510 the value to live in. */
15513 ix86_expand_push (enum machine_mode mode, rtx x)
15517 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15518 GEN_INT (-GET_MODE_SIZE (mode)),
15519 stack_pointer_rtx, 1, OPTAB_DIRECT);
15520 if (tmp != stack_pointer_rtx)
15521 emit_move_insn (stack_pointer_rtx, tmp);
15523 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15525 /* When we push an operand onto stack, it has to be aligned at least
15526 at the function argument boundary. However since we don't have
15527 the argument type, we can't determine the actual argument
15529 emit_move_insn (tmp, x);
15532 /* Helper function of ix86_fixup_binary_operands to canonicalize
15533 operand order. Returns true if the operands should be swapped. */
15536 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15539 rtx dst = operands[0];
15540 rtx src1 = operands[1];
15541 rtx src2 = operands[2];
15543 /* If the operation is not commutative, we can't do anything. */
15544 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15547 /* Highest priority is that src1 should match dst. */
15548 if (rtx_equal_p (dst, src1))
15550 if (rtx_equal_p (dst, src2))
15553 /* Next highest priority is that immediate constants come second. */
15554 if (immediate_operand (src2, mode))
15556 if (immediate_operand (src1, mode))
15559 /* Lowest priority is that memory references should come second. */
15569 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15570 destination to use for the operation. If different from the true
15571 destination in operands[0], a copy operation will be required. */
15574 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15577 rtx dst = operands[0];
15578 rtx src1 = operands[1];
15579 rtx src2 = operands[2];
15581 /* Canonicalize operand order. */
15582 if (ix86_swap_binary_operands_p (code, mode, operands))
15586 /* It is invalid to swap operands of different modes. */
15587 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15594 /* Both source operands cannot be in memory. */
15595 if (MEM_P (src1) && MEM_P (src2))
15597 /* Optimization: Only read from memory once. */
15598 if (rtx_equal_p (src1, src2))
15600 src2 = force_reg (mode, src2);
15604 src2 = force_reg (mode, src2);
15607 /* If the destination is memory, and we do not have matching source
15608 operands, do things in registers. */
15609 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15610 dst = gen_reg_rtx (mode);
15612 /* Source 1 cannot be a constant. */
15613 if (CONSTANT_P (src1))
15614 src1 = force_reg (mode, src1);
15616 /* Source 1 cannot be a non-matching memory. */
15617 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15618 src1 = force_reg (mode, src1);
15620 operands[1] = src1;
15621 operands[2] = src2;
15625 /* Similarly, but assume that the destination has already been
15626 set up properly. */
15629 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15630 enum machine_mode mode, rtx operands[])
15632 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15633 gcc_assert (dst == operands[0]);
15636 /* Attempt to expand a binary operator. Make the expansion closer to the
15637 actual machine, then just general_operand, which will allow 3 separate
15638 memory references (one output, two input) in a single insn. */
15641 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15644 rtx src1, src2, dst, op, clob;
15646 dst = ix86_fixup_binary_operands (code, mode, operands);
15647 src1 = operands[1];
15648 src2 = operands[2];
15650 /* Emit the instruction. */
15652 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15653 if (reload_in_progress)
15655 /* Reload doesn't know about the flags register, and doesn't know that
15656 it doesn't want to clobber it. We can only do this with PLUS. */
15657 gcc_assert (code == PLUS);
15660 else if (reload_completed
15662 && !rtx_equal_p (dst, src1))
15664 /* This is going to be an LEA; avoid splitting it later. */
15669 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15670 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15673 /* Fix up the destination if needed. */
15674 if (dst != operands[0])
15675 emit_move_insn (operands[0], dst);
15678 /* Return TRUE or FALSE depending on whether the binary operator meets the
15679 appropriate constraints. */
15682 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15685 rtx dst = operands[0];
15686 rtx src1 = operands[1];
15687 rtx src2 = operands[2];
15689 /* Both source operands cannot be in memory. */
15690 if (MEM_P (src1) && MEM_P (src2))
15693 /* Canonicalize operand order for commutative operators. */
15694 if (ix86_swap_binary_operands_p (code, mode, operands))
15701 /* If the destination is memory, we must have a matching source operand. */
15702 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15705 /* Source 1 cannot be a constant. */
15706 if (CONSTANT_P (src1))
15709 /* Source 1 cannot be a non-matching memory. */
15710 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15712 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15713 return (code == AND
15716 || (TARGET_64BIT && mode == DImode))
15717 && CONST_INT_P (src2)
15718 && (INTVAL (src2) == 0xff
15719 || INTVAL (src2) == 0xffff));
15725 /* Attempt to expand a unary operator. Make the expansion closer to the
15726 actual machine, then just general_operand, which will allow 2 separate
15727 memory references (one output, one input) in a single insn. */
15730 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15733 int matching_memory;
15734 rtx src, dst, op, clob;
15739 /* If the destination is memory, and we do not have matching source
15740 operands, do things in registers. */
15741 matching_memory = 0;
15744 if (rtx_equal_p (dst, src))
15745 matching_memory = 1;
15747 dst = gen_reg_rtx (mode);
15750 /* When source operand is memory, destination must match. */
15751 if (MEM_P (src) && !matching_memory)
15752 src = force_reg (mode, src);
15754 /* Emit the instruction. */
15756 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15757 if (reload_in_progress || code == NOT)
15759 /* Reload doesn't know about the flags register, and doesn't know that
15760 it doesn't want to clobber it. */
15761 gcc_assert (code == NOT);
15766 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15767 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15770 /* Fix up the destination if needed. */
15771 if (dst != operands[0])
15772 emit_move_insn (operands[0], dst);
15775 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15776 divisor are within the range [0-255]. */
15779 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15782 rtx end_label, qimode_label;
15783 rtx insn, div, mod;
15784 rtx scratch, tmp0, tmp1, tmp2;
15785 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15786 rtx (*gen_zero_extend) (rtx, rtx);
15787 rtx (*gen_test_ccno_1) (rtx, rtx);
15792 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15793 gen_test_ccno_1 = gen_testsi_ccno_1;
15794 gen_zero_extend = gen_zero_extendqisi2;
15797 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15798 gen_test_ccno_1 = gen_testdi_ccno_1;
15799 gen_zero_extend = gen_zero_extendqidi2;
15802 gcc_unreachable ();
15805 end_label = gen_label_rtx ();
15806 qimode_label = gen_label_rtx ();
15808 scratch = gen_reg_rtx (mode);
15810 /* Use 8bit unsigned divimod if dividend and divisor are within
15811 the range [0-255]. */
15812 emit_move_insn (scratch, operands[2]);
15813 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15814 scratch, 1, OPTAB_DIRECT);
15815 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15816 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15817 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15818 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15819 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15821 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15822 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15823 JUMP_LABEL (insn) = qimode_label;
15825 /* Generate original signed/unsigned divimod. */
15826 div = gen_divmod4_1 (operands[0], operands[1],
15827 operands[2], operands[3]);
15830 /* Branch to the end. */
15831 emit_jump_insn (gen_jump (end_label));
15834 /* Generate 8bit unsigned divide. */
15835 emit_label (qimode_label);
15836 /* Don't use operands[0] for result of 8bit divide since not all
15837 registers support QImode ZERO_EXTRACT. */
15838 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15839 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15840 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15841 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15845 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15846 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15850 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15851 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15854 /* Extract remainder from AH. */
15855 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15856 if (REG_P (operands[1]))
15857 insn = emit_move_insn (operands[1], tmp1);
15860 /* Need a new scratch register since the old one has result
15862 scratch = gen_reg_rtx (mode);
15863 emit_move_insn (scratch, tmp1);
15864 insn = emit_move_insn (operands[1], scratch);
15866 set_unique_reg_note (insn, REG_EQUAL, mod);
15868 /* Zero extend quotient from AL. */
15869 tmp1 = gen_lowpart (QImode, tmp0);
15870 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15871 set_unique_reg_note (insn, REG_EQUAL, div);
15873 emit_label (end_label);
15876 #define LEA_SEARCH_THRESHOLD 12
15878 /* Search backward for non-agu definition of register number REGNO1
15879 or register number REGNO2 in INSN's basic block until
15880 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15881 2. Reach BB boundary, or
15882 3. Reach agu definition.
15883 Returns the distance between the non-agu definition point and INSN.
15884 If no definition point, returns -1. */
15887 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15890 basic_block bb = BLOCK_FOR_INSN (insn);
15893 enum attr_type insn_type;
15895 if (insn != BB_HEAD (bb))
15897 rtx prev = PREV_INSN (insn);
15898 while (prev && distance < LEA_SEARCH_THRESHOLD)
15900 if (NONDEBUG_INSN_P (prev))
15903 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15904 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15905 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15906 && (regno1 == DF_REF_REGNO (*def_rec)
15907 || regno2 == DF_REF_REGNO (*def_rec)))
15909 insn_type = get_attr_type (prev);
15910 if (insn_type != TYPE_LEA)
15914 if (prev == BB_HEAD (bb))
15916 prev = PREV_INSN (prev);
15920 if (distance < LEA_SEARCH_THRESHOLD)
15924 bool simple_loop = false;
15926 FOR_EACH_EDGE (e, ei, bb->preds)
15929 simple_loop = true;
15935 rtx prev = BB_END (bb);
15938 && distance < LEA_SEARCH_THRESHOLD)
15940 if (NONDEBUG_INSN_P (prev))
15943 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15944 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15945 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15946 && (regno1 == DF_REF_REGNO (*def_rec)
15947 || regno2 == DF_REF_REGNO (*def_rec)))
15949 insn_type = get_attr_type (prev);
15950 if (insn_type != TYPE_LEA)
15954 prev = PREV_INSN (prev);
15962 /* get_attr_type may modify recog data. We want to make sure
15963 that recog data is valid for instruction INSN, on which
15964 distance_non_agu_define is called. INSN is unchanged here. */
15965 extract_insn_cached (insn);
15969 /* Return the distance between INSN and the next insn that uses
15970 register number REGNO0 in memory address. Return -1 if no such
15971 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15974 distance_agu_use (unsigned int regno0, rtx insn)
15976 basic_block bb = BLOCK_FOR_INSN (insn);
15981 if (insn != BB_END (bb))
15983 rtx next = NEXT_INSN (insn);
15984 while (next && distance < LEA_SEARCH_THRESHOLD)
15986 if (NONDEBUG_INSN_P (next))
15990 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15991 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15992 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15993 && regno0 == DF_REF_REGNO (*use_rec))
15995 /* Return DISTANCE if OP0 is used in memory
15996 address in NEXT. */
16000 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16001 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16002 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16003 && regno0 == DF_REF_REGNO (*def_rec))
16005 /* Return -1 if OP0 is set in NEXT. */
16009 if (next == BB_END (bb))
16011 next = NEXT_INSN (next);
16015 if (distance < LEA_SEARCH_THRESHOLD)
16019 bool simple_loop = false;
16021 FOR_EACH_EDGE (e, ei, bb->succs)
16024 simple_loop = true;
16030 rtx next = BB_HEAD (bb);
16033 && distance < LEA_SEARCH_THRESHOLD)
16035 if (NONDEBUG_INSN_P (next))
16039 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16040 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16041 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16042 && regno0 == DF_REF_REGNO (*use_rec))
16044 /* Return DISTANCE if OP0 is used in memory
16045 address in NEXT. */
16049 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16050 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16051 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16052 && regno0 == DF_REF_REGNO (*def_rec))
16054 /* Return -1 if OP0 is set in NEXT. */
16059 next = NEXT_INSN (next);
16067 /* Define this macro to tune LEA priority vs ADD, it take effect when
16068 there is a dilemma of choicing LEA or ADD
16069 Negative value: ADD is more preferred than LEA
16071 Positive value: LEA is more preferred than ADD*/
16072 #define IX86_LEA_PRIORITY 2
16074 /* Return true if it is ok to optimize an ADD operation to LEA
16075 operation to avoid flag register consumation. For most processors,
16076 ADD is faster than LEA. For the processors like ATOM, if the
16077 destination register of LEA holds an actual address which will be
16078 used soon, LEA is better and otherwise ADD is better. */
16081 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16083 unsigned int regno0 = true_regnum (operands[0]);
16084 unsigned int regno1 = true_regnum (operands[1]);
16085 unsigned int regno2 = true_regnum (operands[2]);
16087 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16088 if (regno0 != regno1 && regno0 != regno2)
16091 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16095 int dist_define, dist_use;
16097 /* Return false if REGNO0 isn't used in memory address. */
16098 dist_use = distance_agu_use (regno0, insn);
16102 dist_define = distance_non_agu_define (regno1, regno2, insn);
16103 if (dist_define <= 0)
16106 /* If this insn has both backward non-agu dependence and forward
16107 agu dependence, the one with short distance take effect. */
16108 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16115 /* Return true if destination reg of SET_BODY is shift count of
16119 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16125 /* Retrieve destination of SET_BODY. */
16126 switch (GET_CODE (set_body))
16129 set_dest = SET_DEST (set_body);
16130 if (!set_dest || !REG_P (set_dest))
16134 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16135 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16143 /* Retrieve shift count of USE_BODY. */
16144 switch (GET_CODE (use_body))
16147 shift_rtx = XEXP (use_body, 1);
16150 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16151 if (ix86_dep_by_shift_count_body (set_body,
16152 XVECEXP (use_body, 0, i)))
16160 && (GET_CODE (shift_rtx) == ASHIFT
16161 || GET_CODE (shift_rtx) == LSHIFTRT
16162 || GET_CODE (shift_rtx) == ASHIFTRT
16163 || GET_CODE (shift_rtx) == ROTATE
16164 || GET_CODE (shift_rtx) == ROTATERT))
16166 rtx shift_count = XEXP (shift_rtx, 1);
16168 /* Return true if shift count is dest of SET_BODY. */
16169 if (REG_P (shift_count)
16170 && true_regnum (set_dest) == true_regnum (shift_count))
16177 /* Return true if destination reg of SET_INSN is shift count of
16181 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16183 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16184 PATTERN (use_insn));
16187 /* Return TRUE or FALSE depending on whether the unary operator meets the
16188 appropriate constraints. */
16191 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16192 enum machine_mode mode ATTRIBUTE_UNUSED,
16193 rtx operands[2] ATTRIBUTE_UNUSED)
16195 /* If one of operands is memory, source and destination must match. */
16196 if ((MEM_P (operands[0])
16197 || MEM_P (operands[1]))
16198 && ! rtx_equal_p (operands[0], operands[1]))
16203 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16204 are ok, keeping in mind the possible movddup alternative. */
16207 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16209 if (MEM_P (operands[0]))
16210 return rtx_equal_p (operands[0], operands[1 + high]);
16211 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16212 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16216 /* Post-reload splitter for converting an SF or DFmode value in an
16217 SSE register into an unsigned SImode. */
16220 ix86_split_convert_uns_si_sse (rtx operands[])
16222 enum machine_mode vecmode;
16223 rtx value, large, zero_or_two31, input, two31, x;
16225 large = operands[1];
16226 zero_or_two31 = operands[2];
16227 input = operands[3];
16228 two31 = operands[4];
16229 vecmode = GET_MODE (large);
16230 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16232 /* Load up the value into the low element. We must ensure that the other
16233 elements are valid floats -- zero is the easiest such value. */
16236 if (vecmode == V4SFmode)
16237 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16239 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16243 input = gen_rtx_REG (vecmode, REGNO (input));
16244 emit_move_insn (value, CONST0_RTX (vecmode));
16245 if (vecmode == V4SFmode)
16246 emit_insn (gen_sse_movss (value, value, input));
16248 emit_insn (gen_sse2_movsd (value, value, input));
16251 emit_move_insn (large, two31);
16252 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16254 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16255 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16257 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16258 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16260 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16261 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16263 large = gen_rtx_REG (V4SImode, REGNO (large));
16264 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16266 x = gen_rtx_REG (V4SImode, REGNO (value));
16267 if (vecmode == V4SFmode)
16268 emit_insn (gen_sse2_cvttps2dq (x, value));
16270 emit_insn (gen_sse2_cvttpd2dq (x, value));
16273 emit_insn (gen_xorv4si3 (value, value, large));
16276 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16277 Expects the 64-bit DImode to be supplied in a pair of integral
16278 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16279 -mfpmath=sse, !optimize_size only. */
16282 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16284 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16285 rtx int_xmm, fp_xmm;
16286 rtx biases, exponents;
16289 int_xmm = gen_reg_rtx (V4SImode);
16290 if (TARGET_INTER_UNIT_MOVES)
16291 emit_insn (gen_movdi_to_sse (int_xmm, input));
16292 else if (TARGET_SSE_SPLIT_REGS)
16294 emit_clobber (int_xmm);
16295 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16299 x = gen_reg_rtx (V2DImode);
16300 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16301 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16304 x = gen_rtx_CONST_VECTOR (V4SImode,
16305 gen_rtvec (4, GEN_INT (0x43300000UL),
16306 GEN_INT (0x45300000UL),
16307 const0_rtx, const0_rtx));
16308 exponents = validize_mem (force_const_mem (V4SImode, x));
16310 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16311 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16313 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16314 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16315 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16316 (0x1.0p84 + double(fp_value_hi_xmm)).
16317 Note these exponents differ by 32. */
16319 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16321 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16322 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16323 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16324 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16325 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16326 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16327 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16328 biases = validize_mem (force_const_mem (V2DFmode, biases));
16329 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16331 /* Add the upper and lower DFmode values together. */
16333 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16336 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16337 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16338 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16341 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16344 /* Not used, but eases macroization of patterns. */
16346 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16347 rtx input ATTRIBUTE_UNUSED)
16349 gcc_unreachable ();
16352 /* Convert an unsigned SImode value into a DFmode. Only currently used
16353 for SSE, but applicable anywhere. */
16356 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16358 REAL_VALUE_TYPE TWO31r;
16361 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16362 NULL, 1, OPTAB_DIRECT);
16364 fp = gen_reg_rtx (DFmode);
16365 emit_insn (gen_floatsidf2 (fp, x));
16367 real_ldexp (&TWO31r, &dconst1, 31);
16368 x = const_double_from_real_value (TWO31r, DFmode);
16370 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16372 emit_move_insn (target, x);
16375 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16376 32-bit mode; otherwise we have a direct convert instruction. */
16379 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16381 REAL_VALUE_TYPE TWO32r;
16382 rtx fp_lo, fp_hi, x;
16384 fp_lo = gen_reg_rtx (DFmode);
16385 fp_hi = gen_reg_rtx (DFmode);
16387 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16389 real_ldexp (&TWO32r, &dconst1, 32);
16390 x = const_double_from_real_value (TWO32r, DFmode);
16391 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16393 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16395 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16398 emit_move_insn (target, x);
16401 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16402 For x86_32, -mfpmath=sse, !optimize_size only. */
16404 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16406 REAL_VALUE_TYPE ONE16r;
16407 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16409 real_ldexp (&ONE16r, &dconst1, 16);
16410 x = const_double_from_real_value (ONE16r, SFmode);
16411 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16412 NULL, 0, OPTAB_DIRECT);
16413 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16414 NULL, 0, OPTAB_DIRECT);
16415 fp_hi = gen_reg_rtx (SFmode);
16416 fp_lo = gen_reg_rtx (SFmode);
16417 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16418 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16419 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16421 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16423 if (!rtx_equal_p (target, fp_hi))
16424 emit_move_insn (target, fp_hi);
16427 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16428 then replicate the value for all elements of the vector
16432 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16439 v = gen_rtvec (4, value, value, value, value);
16440 return gen_rtx_CONST_VECTOR (V4SImode, v);
16444 v = gen_rtvec (2, value, value);
16445 return gen_rtx_CONST_VECTOR (V2DImode, v);
16449 v = gen_rtvec (8, value, value, value, value,
16450 value, value, value, value);
16452 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16453 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16454 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16455 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16456 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16460 v = gen_rtvec (4, value, value, value, value);
16462 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16463 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16464 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16468 v = gen_rtvec (4, value, value, value, value);
16470 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16471 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16472 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16476 v = gen_rtvec (2, value, value);
16478 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16479 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16482 gcc_unreachable ();
16486 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16487 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16488 for an SSE register. If VECT is true, then replicate the mask for
16489 all elements of the vector register. If INVERT is true, then create
16490 a mask excluding the sign bit. */
16493 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16495 enum machine_mode vec_mode, imode;
16496 HOST_WIDE_INT hi, lo;
16501 /* Find the sign bit, sign extended to 2*HWI. */
16508 mode = GET_MODE_INNER (mode);
16510 lo = 0x80000000, hi = lo < 0;
16517 mode = GET_MODE_INNER (mode);
16519 if (HOST_BITS_PER_WIDE_INT >= 64)
16520 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16522 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16527 vec_mode = VOIDmode;
16528 if (HOST_BITS_PER_WIDE_INT >= 64)
16531 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16538 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16542 lo = ~lo, hi = ~hi;
16548 mask = immed_double_const (lo, hi, imode);
16550 vec = gen_rtvec (2, v, mask);
16551 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
16552 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
16559 gcc_unreachable ();
16563 lo = ~lo, hi = ~hi;
16565 /* Force this value into the low part of a fp vector constant. */
16566 mask = immed_double_const (lo, hi, imode);
16567 mask = gen_lowpart (mode, mask);
16569 if (vec_mode == VOIDmode)
16570 return force_reg (mode, mask);
16572 v = ix86_build_const_vector (vec_mode, vect, mask);
16573 return force_reg (vec_mode, v);
16576 /* Generate code for floating point ABS or NEG. */
16579 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
16582 rtx mask, set, dst, src;
16583 bool use_sse = false;
16584 bool vector_mode = VECTOR_MODE_P (mode);
16585 enum machine_mode vmode = mode;
16589 else if (mode == TFmode)
16591 else if (TARGET_SSE_MATH)
16593 use_sse = SSE_FLOAT_MODE_P (mode);
16594 if (mode == SFmode)
16596 else if (mode == DFmode)
16600 /* NEG and ABS performed with SSE use bitwise mask operations.
16601 Create the appropriate mask now. */
16603 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
16610 set = gen_rtx_fmt_e (code, mode, src);
16611 set = gen_rtx_SET (VOIDmode, dst, set);
16618 use = gen_rtx_USE (VOIDmode, mask);
16620 par = gen_rtvec (2, set, use);
16623 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16624 par = gen_rtvec (3, set, use, clob);
16626 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16632 /* Expand a copysign operation. Special case operand 0 being a constant. */
16635 ix86_expand_copysign (rtx operands[])
16637 enum machine_mode mode, vmode;
16638 rtx dest, op0, op1, mask, nmask;
16640 dest = operands[0];
16644 mode = GET_MODE (dest);
16646 if (mode == SFmode)
16648 else if (mode == DFmode)
16653 if (GET_CODE (op0) == CONST_DOUBLE)
16655 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
16657 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
16658 op0 = simplify_unary_operation (ABS, mode, op0, mode);
16660 if (mode == SFmode || mode == DFmode)
16662 if (op0 == CONST0_RTX (mode))
16663 op0 = CONST0_RTX (vmode);
16666 rtx v = ix86_build_const_vector (vmode, false, op0);
16668 op0 = force_reg (vmode, v);
16671 else if (op0 != CONST0_RTX (mode))
16672 op0 = force_reg (mode, op0);
16674 mask = ix86_build_signbit_mask (vmode, 0, 0);
16676 if (mode == SFmode)
16677 copysign_insn = gen_copysignsf3_const;
16678 else if (mode == DFmode)
16679 copysign_insn = gen_copysigndf3_const;
16681 copysign_insn = gen_copysigntf3_const;
16683 emit_insn (copysign_insn (dest, op0, op1, mask));
16687 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
16689 nmask = ix86_build_signbit_mask (vmode, 0, 1);
16690 mask = ix86_build_signbit_mask (vmode, 0, 0);
16692 if (mode == SFmode)
16693 copysign_insn = gen_copysignsf3_var;
16694 else if (mode == DFmode)
16695 copysign_insn = gen_copysigndf3_var;
16697 copysign_insn = gen_copysigntf3_var;
16699 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
16703 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16704 be a constant, and so has already been expanded into a vector constant. */
16707 ix86_split_copysign_const (rtx operands[])
16709 enum machine_mode mode, vmode;
16710 rtx dest, op0, mask, x;
16712 dest = operands[0];
16714 mask = operands[3];
16716 mode = GET_MODE (dest);
16717 vmode = GET_MODE (mask);
16719 dest = simplify_gen_subreg (vmode, dest, mode, 0);
16720 x = gen_rtx_AND (vmode, dest, mask);
16721 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16723 if (op0 != CONST0_RTX (vmode))
16725 x = gen_rtx_IOR (vmode, dest, op0);
16726 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16730 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16731 so we have to do two masks. */
16734 ix86_split_copysign_var (rtx operands[])
16736 enum machine_mode mode, vmode;
16737 rtx dest, scratch, op0, op1, mask, nmask, x;
16739 dest = operands[0];
16740 scratch = operands[1];
16743 nmask = operands[4];
16744 mask = operands[5];
16746 mode = GET_MODE (dest);
16747 vmode = GET_MODE (mask);
16749 if (rtx_equal_p (op0, op1))
16751 /* Shouldn't happen often (it's useless, obviously), but when it does
16752 we'd generate incorrect code if we continue below. */
16753 emit_move_insn (dest, op0);
16757 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16759 gcc_assert (REGNO (op1) == REGNO (scratch));
16761 x = gen_rtx_AND (vmode, scratch, mask);
16762 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16765 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16766 x = gen_rtx_NOT (vmode, dest);
16767 x = gen_rtx_AND (vmode, x, op0);
16768 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16772 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16774 x = gen_rtx_AND (vmode, scratch, mask);
16776 else /* alternative 2,4 */
16778 gcc_assert (REGNO (mask) == REGNO (scratch));
16779 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16780 x = gen_rtx_AND (vmode, scratch, op1);
16782 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16784 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16786 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16787 x = gen_rtx_AND (vmode, dest, nmask);
16789 else /* alternative 3,4 */
16791 gcc_assert (REGNO (nmask) == REGNO (dest));
16793 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16794 x = gen_rtx_AND (vmode, dest, op0);
16796 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16799 x = gen_rtx_IOR (vmode, dest, scratch);
16800 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16803 /* Return TRUE or FALSE depending on whether the first SET in INSN
16804 has source and destination with matching CC modes, and that the
16805 CC mode is at least as constrained as REQ_MODE. */
16808 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16811 enum machine_mode set_mode;
16813 set = PATTERN (insn);
16814 if (GET_CODE (set) == PARALLEL)
16815 set = XVECEXP (set, 0, 0);
16816 gcc_assert (GET_CODE (set) == SET);
16817 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16819 set_mode = GET_MODE (SET_DEST (set));
16823 if (req_mode != CCNOmode
16824 && (req_mode != CCmode
16825 || XEXP (SET_SRC (set), 1) != const0_rtx))
16829 if (req_mode == CCGCmode)
16833 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16837 if (req_mode == CCZmode)
16847 if (set_mode != req_mode)
16852 gcc_unreachable ();
16855 return GET_MODE (SET_SRC (set)) == set_mode;
16858 /* Generate insn patterns to do an integer compare of OPERANDS. */
16861 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16863 enum machine_mode cmpmode;
16866 cmpmode = SELECT_CC_MODE (code, op0, op1);
16867 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16869 /* This is very simple, but making the interface the same as in the
16870 FP case makes the rest of the code easier. */
16871 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16872 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16874 /* Return the test that should be put into the flags user, i.e.
16875 the bcc, scc, or cmov instruction. */
16876 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16879 /* Figure out whether to use ordered or unordered fp comparisons.
16880 Return the appropriate mode to use. */
16883 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16885 /* ??? In order to make all comparisons reversible, we do all comparisons
16886 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16887 all forms trapping and nontrapping comparisons, we can make inequality
16888 comparisons trapping again, since it results in better code when using
16889 FCOM based compares. */
16890 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16894 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16896 enum machine_mode mode = GET_MODE (op0);
16898 if (SCALAR_FLOAT_MODE_P (mode))
16900 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16901 return ix86_fp_compare_mode (code);
16906 /* Only zero flag is needed. */
16907 case EQ: /* ZF=0 */
16908 case NE: /* ZF!=0 */
16910 /* Codes needing carry flag. */
16911 case GEU: /* CF=0 */
16912 case LTU: /* CF=1 */
16913 /* Detect overflow checks. They need just the carry flag. */
16914 if (GET_CODE (op0) == PLUS
16915 && rtx_equal_p (op1, XEXP (op0, 0)))
16919 case GTU: /* CF=0 & ZF=0 */
16920 case LEU: /* CF=1 | ZF=1 */
16921 /* Detect overflow checks. They need just the carry flag. */
16922 if (GET_CODE (op0) == MINUS
16923 && rtx_equal_p (op1, XEXP (op0, 0)))
16927 /* Codes possibly doable only with sign flag when
16928 comparing against zero. */
16929 case GE: /* SF=OF or SF=0 */
16930 case LT: /* SF<>OF or SF=1 */
16931 if (op1 == const0_rtx)
16934 /* For other cases Carry flag is not required. */
16936 /* Codes doable only with sign flag when comparing
16937 against zero, but we miss jump instruction for it
16938 so we need to use relational tests against overflow
16939 that thus needs to be zero. */
16940 case GT: /* ZF=0 & SF=OF */
16941 case LE: /* ZF=1 | SF<>OF */
16942 if (op1 == const0_rtx)
16946 /* strcmp pattern do (use flags) and combine may ask us for proper
16951 gcc_unreachable ();
16955 /* Return the fixed registers used for condition codes. */
16958 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16965 /* If two condition code modes are compatible, return a condition code
16966 mode which is compatible with both. Otherwise, return
16969 static enum machine_mode
16970 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
16975 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16978 if ((m1 == CCGCmode && m2 == CCGOCmode)
16979 || (m1 == CCGOCmode && m2 == CCGCmode))
16985 gcc_unreachable ();
17015 /* These are only compatible with themselves, which we already
17022 /* Return a comparison we can do and that it is equivalent to
17023 swap_condition (code) apart possibly from orderedness.
17024 But, never change orderedness if TARGET_IEEE_FP, returning
17025 UNKNOWN in that case if necessary. */
17027 static enum rtx_code
17028 ix86_fp_swap_condition (enum rtx_code code)
17032 case GT: /* GTU - CF=0 & ZF=0 */
17033 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17034 case GE: /* GEU - CF=0 */
17035 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17036 case UNLT: /* LTU - CF=1 */
17037 return TARGET_IEEE_FP ? UNKNOWN : GT;
17038 case UNLE: /* LEU - CF=1 | ZF=1 */
17039 return TARGET_IEEE_FP ? UNKNOWN : GE;
17041 return swap_condition (code);
17045 /* Return cost of comparison CODE using the best strategy for performance.
17046 All following functions do use number of instructions as a cost metrics.
17047 In future this should be tweaked to compute bytes for optimize_size and
17048 take into account performance of various instructions on various CPUs. */
17051 ix86_fp_comparison_cost (enum rtx_code code)
17055 /* The cost of code using bit-twiddling on %ah. */
17072 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17076 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17079 gcc_unreachable ();
17082 switch (ix86_fp_comparison_strategy (code))
17084 case IX86_FPCMP_COMI:
17085 return arith_cost > 4 ? 3 : 2;
17086 case IX86_FPCMP_SAHF:
17087 return arith_cost > 4 ? 4 : 3;
17093 /* Return strategy to use for floating-point. We assume that fcomi is always
17094 preferrable where available, since that is also true when looking at size
17095 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17097 enum ix86_fpcmp_strategy
17098 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17100 /* Do fcomi/sahf based test when profitable. */
17103 return IX86_FPCMP_COMI;
17105 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17106 return IX86_FPCMP_SAHF;
17108 return IX86_FPCMP_ARITH;
17111 /* Swap, force into registers, or otherwise massage the two operands
17112 to a fp comparison. The operands are updated in place; the new
17113 comparison code is returned. */
17115 static enum rtx_code
17116 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17118 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17119 rtx op0 = *pop0, op1 = *pop1;
17120 enum machine_mode op_mode = GET_MODE (op0);
17121 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17123 /* All of the unordered compare instructions only work on registers.
17124 The same is true of the fcomi compare instructions. The XFmode
17125 compare instructions require registers except when comparing
17126 against zero or when converting operand 1 from fixed point to
17130 && (fpcmp_mode == CCFPUmode
17131 || (op_mode == XFmode
17132 && ! (standard_80387_constant_p (op0) == 1
17133 || standard_80387_constant_p (op1) == 1)
17134 && GET_CODE (op1) != FLOAT)
17135 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17137 op0 = force_reg (op_mode, op0);
17138 op1 = force_reg (op_mode, op1);
17142 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17143 things around if they appear profitable, otherwise force op0
17144 into a register. */
17146 if (standard_80387_constant_p (op0) == 0
17148 && ! (standard_80387_constant_p (op1) == 0
17151 enum rtx_code new_code = ix86_fp_swap_condition (code);
17152 if (new_code != UNKNOWN)
17155 tmp = op0, op0 = op1, op1 = tmp;
17161 op0 = force_reg (op_mode, op0);
17163 if (CONSTANT_P (op1))
17165 int tmp = standard_80387_constant_p (op1);
17167 op1 = validize_mem (force_const_mem (op_mode, op1));
17171 op1 = force_reg (op_mode, op1);
17174 op1 = force_reg (op_mode, op1);
17178 /* Try to rearrange the comparison to make it cheaper. */
17179 if (ix86_fp_comparison_cost (code)
17180 > ix86_fp_comparison_cost (swap_condition (code))
17181 && (REG_P (op1) || can_create_pseudo_p ()))
17184 tmp = op0, op0 = op1, op1 = tmp;
17185 code = swap_condition (code);
17187 op0 = force_reg (op_mode, op0);
17195 /* Convert comparison codes we use to represent FP comparison to integer
17196 code that will result in proper branch. Return UNKNOWN if no such code
17200 ix86_fp_compare_code_to_integer (enum rtx_code code)
17229 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17232 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17234 enum machine_mode fpcmp_mode, intcmp_mode;
17237 fpcmp_mode = ix86_fp_compare_mode (code);
17238 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17240 /* Do fcomi/sahf based test when profitable. */
17241 switch (ix86_fp_comparison_strategy (code))
17243 case IX86_FPCMP_COMI:
17244 intcmp_mode = fpcmp_mode;
17245 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17246 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17251 case IX86_FPCMP_SAHF:
17252 intcmp_mode = fpcmp_mode;
17253 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17254 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17258 scratch = gen_reg_rtx (HImode);
17259 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17260 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17263 case IX86_FPCMP_ARITH:
17264 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17265 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17266 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17268 scratch = gen_reg_rtx (HImode);
17269 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17271 /* In the unordered case, we have to check C2 for NaN's, which
17272 doesn't happen to work out to anything nice combination-wise.
17273 So do some bit twiddling on the value we've got in AH to come
17274 up with an appropriate set of condition codes. */
17276 intcmp_mode = CCNOmode;
17281 if (code == GT || !TARGET_IEEE_FP)
17283 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17288 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17289 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17290 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17291 intcmp_mode = CCmode;
17297 if (code == LT && TARGET_IEEE_FP)
17299 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17300 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17301 intcmp_mode = CCmode;
17306 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17312 if (code == GE || !TARGET_IEEE_FP)
17314 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17319 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17320 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17326 if (code == LE && TARGET_IEEE_FP)
17328 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17329 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17330 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17331 intcmp_mode = CCmode;
17336 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17342 if (code == EQ && TARGET_IEEE_FP)
17344 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17345 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17346 intcmp_mode = CCmode;
17351 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17357 if (code == NE && TARGET_IEEE_FP)
17359 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17360 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17366 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17372 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17376 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17381 gcc_unreachable ();
17389 /* Return the test that should be put into the flags user, i.e.
17390 the bcc, scc, or cmov instruction. */
17391 return gen_rtx_fmt_ee (code, VOIDmode,
17392 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17397 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17401 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17402 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17404 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17406 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17407 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17410 ret = ix86_expand_int_compare (code, op0, op1);
17416 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17418 enum machine_mode mode = GET_MODE (op0);
17430 tmp = ix86_expand_compare (code, op0, op1);
17431 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17432 gen_rtx_LABEL_REF (VOIDmode, label),
17434 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17441 /* Expand DImode branch into multiple compare+branch. */
17443 rtx lo[2], hi[2], label2;
17444 enum rtx_code code1, code2, code3;
17445 enum machine_mode submode;
17447 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17449 tmp = op0, op0 = op1, op1 = tmp;
17450 code = swap_condition (code);
17453 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17454 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17456 submode = mode == DImode ? SImode : DImode;
17458 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17459 avoid two branches. This costs one extra insn, so disable when
17460 optimizing for size. */
17462 if ((code == EQ || code == NE)
17463 && (!optimize_insn_for_size_p ()
17464 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17469 if (hi[1] != const0_rtx)
17470 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17471 NULL_RTX, 0, OPTAB_WIDEN);
17474 if (lo[1] != const0_rtx)
17475 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17476 NULL_RTX, 0, OPTAB_WIDEN);
17478 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17479 NULL_RTX, 0, OPTAB_WIDEN);
17481 ix86_expand_branch (code, tmp, const0_rtx, label);
17485 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17486 op1 is a constant and the low word is zero, then we can just
17487 examine the high word. Similarly for low word -1 and
17488 less-or-equal-than or greater-than. */
17490 if (CONST_INT_P (hi[1]))
17493 case LT: case LTU: case GE: case GEU:
17494 if (lo[1] == const0_rtx)
17496 ix86_expand_branch (code, hi[0], hi[1], label);
17500 case LE: case LEU: case GT: case GTU:
17501 if (lo[1] == constm1_rtx)
17503 ix86_expand_branch (code, hi[0], hi[1], label);
17511 /* Otherwise, we need two or three jumps. */
17513 label2 = gen_label_rtx ();
17516 code2 = swap_condition (code);
17517 code3 = unsigned_condition (code);
17521 case LT: case GT: case LTU: case GTU:
17524 case LE: code1 = LT; code2 = GT; break;
17525 case GE: code1 = GT; code2 = LT; break;
17526 case LEU: code1 = LTU; code2 = GTU; break;
17527 case GEU: code1 = GTU; code2 = LTU; break;
17529 case EQ: code1 = UNKNOWN; code2 = NE; break;
17530 case NE: code2 = UNKNOWN; break;
17533 gcc_unreachable ();
17538 * if (hi(a) < hi(b)) goto true;
17539 * if (hi(a) > hi(b)) goto false;
17540 * if (lo(a) < lo(b)) goto true;
17544 if (code1 != UNKNOWN)
17545 ix86_expand_branch (code1, hi[0], hi[1], label);
17546 if (code2 != UNKNOWN)
17547 ix86_expand_branch (code2, hi[0], hi[1], label2);
17549 ix86_expand_branch (code3, lo[0], lo[1], label);
17551 if (code2 != UNKNOWN)
17552 emit_label (label2);
17557 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
17562 /* Split branch based on floating point condition. */
17564 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
17565 rtx target1, rtx target2, rtx tmp, rtx pushed)
17570 if (target2 != pc_rtx)
17573 code = reverse_condition_maybe_unordered (code);
17578 condition = ix86_expand_fp_compare (code, op1, op2,
17581 /* Remove pushed operand from stack. */
17583 ix86_free_from_memory (GET_MODE (pushed));
17585 i = emit_jump_insn (gen_rtx_SET
17587 gen_rtx_IF_THEN_ELSE (VOIDmode,
17588 condition, target1, target2)));
17589 if (split_branch_probability >= 0)
17590 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
17594 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
17598 gcc_assert (GET_MODE (dest) == QImode);
17600 ret = ix86_expand_compare (code, op0, op1);
17601 PUT_MODE (ret, QImode);
17602 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
17605 /* Expand comparison setting or clearing carry flag. Return true when
17606 successful and set pop for the operation. */
17608 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
17610 enum machine_mode mode =
17611 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
17613 /* Do not handle double-mode compares that go through special path. */
17614 if (mode == (TARGET_64BIT ? TImode : DImode))
17617 if (SCALAR_FLOAT_MODE_P (mode))
17619 rtx compare_op, compare_seq;
17621 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17623 /* Shortcut: following common codes never translate
17624 into carry flag compares. */
17625 if (code == EQ || code == NE || code == UNEQ || code == LTGT
17626 || code == ORDERED || code == UNORDERED)
17629 /* These comparisons require zero flag; swap operands so they won't. */
17630 if ((code == GT || code == UNLE || code == LE || code == UNGT)
17631 && !TARGET_IEEE_FP)
17636 code = swap_condition (code);
17639 /* Try to expand the comparison and verify that we end up with
17640 carry flag based comparison. This fails to be true only when
17641 we decide to expand comparison using arithmetic that is not
17642 too common scenario. */
17644 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17645 compare_seq = get_insns ();
17648 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
17649 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
17650 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
17652 code = GET_CODE (compare_op);
17654 if (code != LTU && code != GEU)
17657 emit_insn (compare_seq);
17662 if (!INTEGRAL_MODE_P (mode))
17671 /* Convert a==0 into (unsigned)a<1. */
17674 if (op1 != const0_rtx)
17677 code = (code == EQ ? LTU : GEU);
17680 /* Convert a>b into b<a or a>=b-1. */
17683 if (CONST_INT_P (op1))
17685 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
17686 /* Bail out on overflow. We still can swap operands but that
17687 would force loading of the constant into register. */
17688 if (op1 == const0_rtx
17689 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
17691 code = (code == GTU ? GEU : LTU);
17698 code = (code == GTU ? LTU : GEU);
17702 /* Convert a>=0 into (unsigned)a<0x80000000. */
17705 if (mode == DImode || op1 != const0_rtx)
17707 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17708 code = (code == LT ? GEU : LTU);
17712 if (mode == DImode || op1 != constm1_rtx)
17714 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17715 code = (code == LE ? GEU : LTU);
17721 /* Swapping operands may cause constant to appear as first operand. */
17722 if (!nonimmediate_operand (op0, VOIDmode))
17724 if (!can_create_pseudo_p ())
17726 op0 = force_reg (mode, op0);
17728 *pop = ix86_expand_compare (code, op0, op1);
17729 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
17734 ix86_expand_int_movcc (rtx operands[])
17736 enum rtx_code code = GET_CODE (operands[1]), compare_code;
17737 rtx compare_seq, compare_op;
17738 enum machine_mode mode = GET_MODE (operands[0]);
17739 bool sign_bit_compare_p = false;
17740 rtx op0 = XEXP (operands[1], 0);
17741 rtx op1 = XEXP (operands[1], 1);
17744 compare_op = ix86_expand_compare (code, op0, op1);
17745 compare_seq = get_insns ();
17748 compare_code = GET_CODE (compare_op);
17750 if ((op1 == const0_rtx && (code == GE || code == LT))
17751 || (op1 == constm1_rtx && (code == GT || code == LE)))
17752 sign_bit_compare_p = true;
17754 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17755 HImode insns, we'd be swallowed in word prefix ops. */
17757 if ((mode != HImode || TARGET_FAST_PREFIX)
17758 && (mode != (TARGET_64BIT ? TImode : DImode))
17759 && CONST_INT_P (operands[2])
17760 && CONST_INT_P (operands[3]))
17762 rtx out = operands[0];
17763 HOST_WIDE_INT ct = INTVAL (operands[2]);
17764 HOST_WIDE_INT cf = INTVAL (operands[3]);
17765 HOST_WIDE_INT diff;
17768 /* Sign bit compares are better done using shifts than we do by using
17770 if (sign_bit_compare_p
17771 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17773 /* Detect overlap between destination and compare sources. */
17776 if (!sign_bit_compare_p)
17779 bool fpcmp = false;
17781 compare_code = GET_CODE (compare_op);
17783 flags = XEXP (compare_op, 0);
17785 if (GET_MODE (flags) == CCFPmode
17786 || GET_MODE (flags) == CCFPUmode)
17790 = ix86_fp_compare_code_to_integer (compare_code);
17793 /* To simplify rest of code, restrict to the GEU case. */
17794 if (compare_code == LTU)
17796 HOST_WIDE_INT tmp = ct;
17799 compare_code = reverse_condition (compare_code);
17800 code = reverse_condition (code);
17805 PUT_CODE (compare_op,
17806 reverse_condition_maybe_unordered
17807 (GET_CODE (compare_op)));
17809 PUT_CODE (compare_op,
17810 reverse_condition (GET_CODE (compare_op)));
17814 if (reg_overlap_mentioned_p (out, op0)
17815 || reg_overlap_mentioned_p (out, op1))
17816 tmp = gen_reg_rtx (mode);
17818 if (mode == DImode)
17819 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17821 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17822 flags, compare_op));
17826 if (code == GT || code == GE)
17827 code = reverse_condition (code);
17830 HOST_WIDE_INT tmp = ct;
17835 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17848 tmp = expand_simple_binop (mode, PLUS,
17850 copy_rtx (tmp), 1, OPTAB_DIRECT);
17861 tmp = expand_simple_binop (mode, IOR,
17863 copy_rtx (tmp), 1, OPTAB_DIRECT);
17865 else if (diff == -1 && ct)
17875 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17877 tmp = expand_simple_binop (mode, PLUS,
17878 copy_rtx (tmp), GEN_INT (cf),
17879 copy_rtx (tmp), 1, OPTAB_DIRECT);
17887 * andl cf - ct, dest
17897 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17900 tmp = expand_simple_binop (mode, AND,
17902 gen_int_mode (cf - ct, mode),
17903 copy_rtx (tmp), 1, OPTAB_DIRECT);
17905 tmp = expand_simple_binop (mode, PLUS,
17906 copy_rtx (tmp), GEN_INT (ct),
17907 copy_rtx (tmp), 1, OPTAB_DIRECT);
17910 if (!rtx_equal_p (tmp, out))
17911 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17918 enum machine_mode cmp_mode = GET_MODE (op0);
17921 tmp = ct, ct = cf, cf = tmp;
17924 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17926 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17928 /* We may be reversing unordered compare to normal compare, that
17929 is not valid in general (we may convert non-trapping condition
17930 to trapping one), however on i386 we currently emit all
17931 comparisons unordered. */
17932 compare_code = reverse_condition_maybe_unordered (compare_code);
17933 code = reverse_condition_maybe_unordered (code);
17937 compare_code = reverse_condition (compare_code);
17938 code = reverse_condition (code);
17942 compare_code = UNKNOWN;
17943 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
17944 && CONST_INT_P (op1))
17946 if (op1 == const0_rtx
17947 && (code == LT || code == GE))
17948 compare_code = code;
17949 else if (op1 == constm1_rtx)
17953 else if (code == GT)
17958 /* Optimize dest = (op0 < 0) ? -1 : cf. */
17959 if (compare_code != UNKNOWN
17960 && GET_MODE (op0) == GET_MODE (out)
17961 && (cf == -1 || ct == -1))
17963 /* If lea code below could be used, only optimize
17964 if it results in a 2 insn sequence. */
17966 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
17967 || diff == 3 || diff == 5 || diff == 9)
17968 || (compare_code == LT && ct == -1)
17969 || (compare_code == GE && cf == -1))
17972 * notl op1 (if necessary)
17980 code = reverse_condition (code);
17983 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17985 out = expand_simple_binop (mode, IOR,
17987 out, 1, OPTAB_DIRECT);
17988 if (out != operands[0])
17989 emit_move_insn (operands[0], out);
17996 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
17997 || diff == 3 || diff == 5 || diff == 9)
17998 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18000 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18006 * lea cf(dest*(ct-cf)),dest
18010 * This also catches the degenerate setcc-only case.
18016 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18019 /* On x86_64 the lea instruction operates on Pmode, so we need
18020 to get arithmetics done in proper mode to match. */
18022 tmp = copy_rtx (out);
18026 out1 = copy_rtx (out);
18027 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18031 tmp = gen_rtx_PLUS (mode, tmp, out1);
18037 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18040 if (!rtx_equal_p (tmp, out))
18043 out = force_operand (tmp, copy_rtx (out));
18045 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18047 if (!rtx_equal_p (out, operands[0]))
18048 emit_move_insn (operands[0], copy_rtx (out));
18054 * General case: Jumpful:
18055 * xorl dest,dest cmpl op1, op2
18056 * cmpl op1, op2 movl ct, dest
18057 * setcc dest jcc 1f
18058 * decl dest movl cf, dest
18059 * andl (cf-ct),dest 1:
18062 * Size 20. Size 14.
18064 * This is reasonably steep, but branch mispredict costs are
18065 * high on modern cpus, so consider failing only if optimizing
18069 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18070 && BRANCH_COST (optimize_insn_for_speed_p (),
18075 enum machine_mode cmp_mode = GET_MODE (op0);
18080 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18082 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18084 /* We may be reversing unordered compare to normal compare,
18085 that is not valid in general (we may convert non-trapping
18086 condition to trapping one), however on i386 we currently
18087 emit all comparisons unordered. */
18088 code = reverse_condition_maybe_unordered (code);
18092 code = reverse_condition (code);
18093 if (compare_code != UNKNOWN)
18094 compare_code = reverse_condition (compare_code);
18098 if (compare_code != UNKNOWN)
18100 /* notl op1 (if needed)
18105 For x < 0 (resp. x <= -1) there will be no notl,
18106 so if possible swap the constants to get rid of the
18108 True/false will be -1/0 while code below (store flag
18109 followed by decrement) is 0/-1, so the constants need
18110 to be exchanged once more. */
18112 if (compare_code == GE || !cf)
18114 code = reverse_condition (code);
18119 HOST_WIDE_INT tmp = cf;
18124 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18128 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18130 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18132 copy_rtx (out), 1, OPTAB_DIRECT);
18135 out = expand_simple_binop (mode, AND, copy_rtx (out),
18136 gen_int_mode (cf - ct, mode),
18137 copy_rtx (out), 1, OPTAB_DIRECT);
18139 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18140 copy_rtx (out), 1, OPTAB_DIRECT);
18141 if (!rtx_equal_p (out, operands[0]))
18142 emit_move_insn (operands[0], copy_rtx (out));
18148 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18150 /* Try a few things more with specific constants and a variable. */
18153 rtx var, orig_out, out, tmp;
18155 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18158 /* If one of the two operands is an interesting constant, load a
18159 constant with the above and mask it in with a logical operation. */
18161 if (CONST_INT_P (operands[2]))
18164 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18165 operands[3] = constm1_rtx, op = and_optab;
18166 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18167 operands[3] = const0_rtx, op = ior_optab;
18171 else if (CONST_INT_P (operands[3]))
18174 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18175 operands[2] = constm1_rtx, op = and_optab;
18176 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18177 operands[2] = const0_rtx, op = ior_optab;
18184 orig_out = operands[0];
18185 tmp = gen_reg_rtx (mode);
18188 /* Recurse to get the constant loaded. */
18189 if (ix86_expand_int_movcc (operands) == 0)
18192 /* Mask in the interesting variable. */
18193 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18195 if (!rtx_equal_p (out, orig_out))
18196 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18202 * For comparison with above,
18212 if (! nonimmediate_operand (operands[2], mode))
18213 operands[2] = force_reg (mode, operands[2]);
18214 if (! nonimmediate_operand (operands[3], mode))
18215 operands[3] = force_reg (mode, operands[3]);
18217 if (! register_operand (operands[2], VOIDmode)
18219 || ! register_operand (operands[3], VOIDmode)))
18220 operands[2] = force_reg (mode, operands[2]);
18223 && ! register_operand (operands[3], VOIDmode))
18224 operands[3] = force_reg (mode, operands[3]);
18226 emit_insn (compare_seq);
18227 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18228 gen_rtx_IF_THEN_ELSE (mode,
18229 compare_op, operands[2],
18234 /* Swap, force into registers, or otherwise massage the two operands
18235 to an sse comparison with a mask result. Thus we differ a bit from
18236 ix86_prepare_fp_compare_args which expects to produce a flags result.
18238 The DEST operand exists to help determine whether to commute commutative
18239 operators. The POP0/POP1 operands are updated in place. The new
18240 comparison code is returned, or UNKNOWN if not implementable. */
18242 static enum rtx_code
18243 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18244 rtx *pop0, rtx *pop1)
18252 /* We have no LTGT as an operator. We could implement it with
18253 NE & ORDERED, but this requires an extra temporary. It's
18254 not clear that it's worth it. */
18261 /* These are supported directly. */
18268 /* For commutative operators, try to canonicalize the destination
18269 operand to be first in the comparison - this helps reload to
18270 avoid extra moves. */
18271 if (!dest || !rtx_equal_p (dest, *pop1))
18279 /* These are not supported directly. Swap the comparison operands
18280 to transform into something that is supported. */
18284 code = swap_condition (code);
18288 gcc_unreachable ();
18294 /* Detect conditional moves that exactly match min/max operational
18295 semantics. Note that this is IEEE safe, as long as we don't
18296 interchange the operands.
18298 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18299 and TRUE if the operation is successful and instructions are emitted. */
18302 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18303 rtx cmp_op1, rtx if_true, rtx if_false)
18305 enum machine_mode mode;
18311 else if (code == UNGE)
18314 if_true = if_false;
18320 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18322 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18327 mode = GET_MODE (dest);
18329 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18330 but MODE may be a vector mode and thus not appropriate. */
18331 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18333 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18336 if_true = force_reg (mode, if_true);
18337 v = gen_rtvec (2, if_true, if_false);
18338 tmp = gen_rtx_UNSPEC (mode, v, u);
18342 code = is_min ? SMIN : SMAX;
18343 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18346 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18350 /* Expand an sse vector comparison. Return the register with the result. */
18353 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18354 rtx op_true, rtx op_false)
18356 enum machine_mode mode = GET_MODE (dest);
18359 cmp_op0 = force_reg (mode, cmp_op0);
18360 if (!nonimmediate_operand (cmp_op1, mode))
18361 cmp_op1 = force_reg (mode, cmp_op1);
18364 || reg_overlap_mentioned_p (dest, op_true)
18365 || reg_overlap_mentioned_p (dest, op_false))
18366 dest = gen_reg_rtx (mode);
18368 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18369 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18374 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18375 operations. This is used for both scalar and vector conditional moves. */
18378 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18380 enum machine_mode mode = GET_MODE (dest);
18383 if (op_false == CONST0_RTX (mode))
18385 op_true = force_reg (mode, op_true);
18386 x = gen_rtx_AND (mode, cmp, op_true);
18387 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18389 else if (op_true == CONST0_RTX (mode))
18391 op_false = force_reg (mode, op_false);
18392 x = gen_rtx_NOT (mode, cmp);
18393 x = gen_rtx_AND (mode, x, op_false);
18394 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18396 else if (TARGET_XOP)
18398 rtx pcmov = gen_rtx_SET (mode, dest,
18399 gen_rtx_IF_THEN_ELSE (mode, cmp,
18406 op_true = force_reg (mode, op_true);
18407 op_false = force_reg (mode, op_false);
18409 t2 = gen_reg_rtx (mode);
18411 t3 = gen_reg_rtx (mode);
18415 x = gen_rtx_AND (mode, op_true, cmp);
18416 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18418 x = gen_rtx_NOT (mode, cmp);
18419 x = gen_rtx_AND (mode, x, op_false);
18420 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18422 x = gen_rtx_IOR (mode, t3, t2);
18423 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18427 /* Expand a floating-point conditional move. Return true if successful. */
18430 ix86_expand_fp_movcc (rtx operands[])
18432 enum machine_mode mode = GET_MODE (operands[0]);
18433 enum rtx_code code = GET_CODE (operands[1]);
18434 rtx tmp, compare_op;
18435 rtx op0 = XEXP (operands[1], 0);
18436 rtx op1 = XEXP (operands[1], 1);
18438 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18440 enum machine_mode cmode;
18442 /* Since we've no cmove for sse registers, don't force bad register
18443 allocation just to gain access to it. Deny movcc when the
18444 comparison mode doesn't match the move mode. */
18445 cmode = GET_MODE (op0);
18446 if (cmode == VOIDmode)
18447 cmode = GET_MODE (op1);
18451 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18452 if (code == UNKNOWN)
18455 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18456 operands[2], operands[3]))
18459 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18460 operands[2], operands[3]);
18461 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18465 /* The floating point conditional move instructions don't directly
18466 support conditions resulting from a signed integer comparison. */
18468 compare_op = ix86_expand_compare (code, op0, op1);
18469 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18471 tmp = gen_reg_rtx (QImode);
18472 ix86_expand_setcc (tmp, code, op0, op1);
18474 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18477 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18478 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18479 operands[2], operands[3])));
18484 /* Expand a floating-point vector conditional move; a vcond operation
18485 rather than a movcc operation. */
18488 ix86_expand_fp_vcond (rtx operands[])
18490 enum rtx_code code = GET_CODE (operands[3]);
18493 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18494 &operands[4], &operands[5]);
18495 if (code == UNKNOWN)
18498 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18499 operands[5], operands[1], operands[2]))
18502 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18503 operands[1], operands[2]);
18504 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18508 /* Expand a signed/unsigned integral vector conditional move. */
18511 ix86_expand_int_vcond (rtx operands[])
18513 enum machine_mode mode = GET_MODE (operands[0]);
18514 enum rtx_code code = GET_CODE (operands[3]);
18515 bool negate = false;
18518 cop0 = operands[4];
18519 cop1 = operands[5];
18521 /* XOP supports all of the comparisons on all vector int types. */
18524 /* Canonicalize the comparison to EQ, GT, GTU. */
18535 code = reverse_condition (code);
18541 code = reverse_condition (code);
18547 code = swap_condition (code);
18548 x = cop0, cop0 = cop1, cop1 = x;
18552 gcc_unreachable ();
18555 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18556 if (mode == V2DImode)
18561 /* SSE4.1 supports EQ. */
18562 if (!TARGET_SSE4_1)
18568 /* SSE4.2 supports GT/GTU. */
18569 if (!TARGET_SSE4_2)
18574 gcc_unreachable ();
18578 /* Unsigned parallel compare is not supported by the hardware.
18579 Play some tricks to turn this into a signed comparison
18583 cop0 = force_reg (mode, cop0);
18591 rtx (*gen_sub3) (rtx, rtx, rtx);
18593 /* Subtract (-(INT MAX) - 1) from both operands to make
18595 mask = ix86_build_signbit_mask (mode, true, false);
18596 gen_sub3 = (mode == V4SImode
18597 ? gen_subv4si3 : gen_subv2di3);
18598 t1 = gen_reg_rtx (mode);
18599 emit_insn (gen_sub3 (t1, cop0, mask));
18601 t2 = gen_reg_rtx (mode);
18602 emit_insn (gen_sub3 (t2, cop1, mask));
18612 /* Perform a parallel unsigned saturating subtraction. */
18613 x = gen_reg_rtx (mode);
18614 emit_insn (gen_rtx_SET (VOIDmode, x,
18615 gen_rtx_US_MINUS (mode, cop0, cop1)));
18618 cop1 = CONST0_RTX (mode);
18624 gcc_unreachable ();
18629 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
18630 operands[1+negate], operands[2-negate]);
18632 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
18633 operands[2-negate]);
18637 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18638 true if we should do zero extension, else sign extension. HIGH_P is
18639 true if we want the N/2 high elements, else the low elements. */
18642 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18644 enum machine_mode imode = GET_MODE (operands[1]);
18649 rtx (*unpack)(rtx, rtx);
18655 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
18657 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
18661 unpack = gen_sse4_1_zero_extendv4hiv4si2;
18663 unpack = gen_sse4_1_sign_extendv4hiv4si2;
18667 unpack = gen_sse4_1_zero_extendv2siv2di2;
18669 unpack = gen_sse4_1_sign_extendv2siv2di2;
18672 gcc_unreachable ();
18677 /* Shift higher 8 bytes to lower 8 bytes. */
18678 tmp = gen_reg_rtx (imode);
18679 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
18680 gen_lowpart (V1TImode, operands[1]),
18686 emit_insn (unpack (operands[0], tmp));
18690 rtx (*unpack)(rtx, rtx, rtx);
18696 unpack = gen_vec_interleave_highv16qi;
18698 unpack = gen_vec_interleave_lowv16qi;
18702 unpack = gen_vec_interleave_highv8hi;
18704 unpack = gen_vec_interleave_lowv8hi;
18708 unpack = gen_vec_interleave_highv4si;
18710 unpack = gen_vec_interleave_lowv4si;
18713 gcc_unreachable ();
18716 dest = gen_lowpart (imode, operands[0]);
18719 tmp = force_reg (imode, CONST0_RTX (imode));
18721 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
18722 operands[1], pc_rtx, pc_rtx);
18724 emit_insn (unpack (dest, operands[1], tmp));
18728 /* Expand conditional increment or decrement using adb/sbb instructions.
18729 The default case using setcc followed by the conditional move can be
18730 done by generic code. */
18732 ix86_expand_int_addcc (rtx operands[])
18734 enum rtx_code code = GET_CODE (operands[1]);
18736 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18738 rtx val = const0_rtx;
18739 bool fpcmp = false;
18740 enum machine_mode mode;
18741 rtx op0 = XEXP (operands[1], 0);
18742 rtx op1 = XEXP (operands[1], 1);
18744 if (operands[3] != const1_rtx
18745 && operands[3] != constm1_rtx)
18747 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18749 code = GET_CODE (compare_op);
18751 flags = XEXP (compare_op, 0);
18753 if (GET_MODE (flags) == CCFPmode
18754 || GET_MODE (flags) == CCFPUmode)
18757 code = ix86_fp_compare_code_to_integer (code);
18764 PUT_CODE (compare_op,
18765 reverse_condition_maybe_unordered
18766 (GET_CODE (compare_op)));
18768 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18771 mode = GET_MODE (operands[0]);
18773 /* Construct either adc or sbb insn. */
18774 if ((code == LTU) == (operands[3] == constm1_rtx))
18779 insn = gen_subqi3_carry;
18782 insn = gen_subhi3_carry;
18785 insn = gen_subsi3_carry;
18788 insn = gen_subdi3_carry;
18791 gcc_unreachable ();
18799 insn = gen_addqi3_carry;
18802 insn = gen_addhi3_carry;
18805 insn = gen_addsi3_carry;
18808 insn = gen_adddi3_carry;
18811 gcc_unreachable ();
18814 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18820 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18821 but works for floating pointer parameters and nonoffsetable memories.
18822 For pushes, it returns just stack offsets; the values will be saved
18823 in the right order. Maximally three parts are generated. */
18826 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18831 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18833 size = (GET_MODE_SIZE (mode) + 4) / 8;
18835 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18836 gcc_assert (size >= 2 && size <= 4);
18838 /* Optimize constant pool reference to immediates. This is used by fp
18839 moves, that force all constants to memory to allow combining. */
18840 if (MEM_P (operand) && MEM_READONLY_P (operand))
18842 rtx tmp = maybe_get_pool_constant (operand);
18847 if (MEM_P (operand) && !offsettable_memref_p (operand))
18849 /* The only non-offsetable memories we handle are pushes. */
18850 int ok = push_operand (operand, VOIDmode);
18854 operand = copy_rtx (operand);
18855 PUT_MODE (operand, Pmode);
18856 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18860 if (GET_CODE (operand) == CONST_VECTOR)
18862 enum machine_mode imode = int_mode_for_mode (mode);
18863 /* Caution: if we looked through a constant pool memory above,
18864 the operand may actually have a different mode now. That's
18865 ok, since we want to pun this all the way back to an integer. */
18866 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18867 gcc_assert (operand != NULL);
18873 if (mode == DImode)
18874 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18879 if (REG_P (operand))
18881 gcc_assert (reload_completed);
18882 for (i = 0; i < size; i++)
18883 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18885 else if (offsettable_memref_p (operand))
18887 operand = adjust_address (operand, SImode, 0);
18888 parts[0] = operand;
18889 for (i = 1; i < size; i++)
18890 parts[i] = adjust_address (operand, SImode, 4 * i);
18892 else if (GET_CODE (operand) == CONST_DOUBLE)
18897 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18901 real_to_target (l, &r, mode);
18902 parts[3] = gen_int_mode (l[3], SImode);
18903 parts[2] = gen_int_mode (l[2], SImode);
18906 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18907 parts[2] = gen_int_mode (l[2], SImode);
18910 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18913 gcc_unreachable ();
18915 parts[1] = gen_int_mode (l[1], SImode);
18916 parts[0] = gen_int_mode (l[0], SImode);
18919 gcc_unreachable ();
18924 if (mode == TImode)
18925 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18926 if (mode == XFmode || mode == TFmode)
18928 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
18929 if (REG_P (operand))
18931 gcc_assert (reload_completed);
18932 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
18933 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
18935 else if (offsettable_memref_p (operand))
18937 operand = adjust_address (operand, DImode, 0);
18938 parts[0] = operand;
18939 parts[1] = adjust_address (operand, upper_mode, 8);
18941 else if (GET_CODE (operand) == CONST_DOUBLE)
18946 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18947 real_to_target (l, &r, mode);
18949 /* Do not use shift by 32 to avoid warning on 32bit systems. */
18950 if (HOST_BITS_PER_WIDE_INT >= 64)
18953 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
18954 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
18957 parts[0] = immed_double_const (l[0], l[1], DImode);
18959 if (upper_mode == SImode)
18960 parts[1] = gen_int_mode (l[2], SImode);
18961 else if (HOST_BITS_PER_WIDE_INT >= 64)
18964 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
18965 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
18968 parts[1] = immed_double_const (l[2], l[3], DImode);
18971 gcc_unreachable ();
18978 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
18979 Return false when normal moves are needed; true when all required
18980 insns have been emitted. Operands 2-4 contain the input values
18981 int the correct order; operands 5-7 contain the output values. */
18984 ix86_split_long_move (rtx operands[])
18989 int collisions = 0;
18990 enum machine_mode mode = GET_MODE (operands[0]);
18991 bool collisionparts[4];
18993 /* The DFmode expanders may ask us to move double.
18994 For 64bit target this is single move. By hiding the fact
18995 here we simplify i386.md splitters. */
18996 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
18998 /* Optimize constant pool reference to immediates. This is used by
18999 fp moves, that force all constants to memory to allow combining. */
19001 if (MEM_P (operands[1])
19002 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19003 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19004 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19005 if (push_operand (operands[0], VOIDmode))
19007 operands[0] = copy_rtx (operands[0]);
19008 PUT_MODE (operands[0], Pmode);
19011 operands[0] = gen_lowpart (DImode, operands[0]);
19012 operands[1] = gen_lowpart (DImode, operands[1]);
19013 emit_move_insn (operands[0], operands[1]);
19017 /* The only non-offsettable memory we handle is push. */
19018 if (push_operand (operands[0], VOIDmode))
19021 gcc_assert (!MEM_P (operands[0])
19022 || offsettable_memref_p (operands[0]));
19024 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19025 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19027 /* When emitting push, take care for source operands on the stack. */
19028 if (push && MEM_P (operands[1])
19029 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19031 rtx src_base = XEXP (part[1][nparts - 1], 0);
19033 /* Compensate for the stack decrement by 4. */
19034 if (!TARGET_64BIT && nparts == 3
19035 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19036 src_base = plus_constant (src_base, 4);
19038 /* src_base refers to the stack pointer and is
19039 automatically decreased by emitted push. */
19040 for (i = 0; i < nparts; i++)
19041 part[1][i] = change_address (part[1][i],
19042 GET_MODE (part[1][i]), src_base);
19045 /* We need to do copy in the right order in case an address register
19046 of the source overlaps the destination. */
19047 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19051 for (i = 0; i < nparts; i++)
19054 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19055 if (collisionparts[i])
19059 /* Collision in the middle part can be handled by reordering. */
19060 if (collisions == 1 && nparts == 3 && collisionparts [1])
19062 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19063 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19065 else if (collisions == 1
19067 && (collisionparts [1] || collisionparts [2]))
19069 if (collisionparts [1])
19071 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19072 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19076 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19077 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19081 /* If there are more collisions, we can't handle it by reordering.
19082 Do an lea to the last part and use only one colliding move. */
19083 else if (collisions > 1)
19089 base = part[0][nparts - 1];
19091 /* Handle the case when the last part isn't valid for lea.
19092 Happens in 64-bit mode storing the 12-byte XFmode. */
19093 if (GET_MODE (base) != Pmode)
19094 base = gen_rtx_REG (Pmode, REGNO (base));
19096 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19097 part[1][0] = replace_equiv_address (part[1][0], base);
19098 for (i = 1; i < nparts; i++)
19100 tmp = plus_constant (base, UNITS_PER_WORD * i);
19101 part[1][i] = replace_equiv_address (part[1][i], tmp);
19112 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19113 emit_insn (gen_addsi3 (stack_pointer_rtx,
19114 stack_pointer_rtx, GEN_INT (-4)));
19115 emit_move_insn (part[0][2], part[1][2]);
19117 else if (nparts == 4)
19119 emit_move_insn (part[0][3], part[1][3]);
19120 emit_move_insn (part[0][2], part[1][2]);
19125 /* In 64bit mode we don't have 32bit push available. In case this is
19126 register, it is OK - we will just use larger counterpart. We also
19127 retype memory - these comes from attempt to avoid REX prefix on
19128 moving of second half of TFmode value. */
19129 if (GET_MODE (part[1][1]) == SImode)
19131 switch (GET_CODE (part[1][1]))
19134 part[1][1] = adjust_address (part[1][1], DImode, 0);
19138 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19142 gcc_unreachable ();
19145 if (GET_MODE (part[1][0]) == SImode)
19146 part[1][0] = part[1][1];
19149 emit_move_insn (part[0][1], part[1][1]);
19150 emit_move_insn (part[0][0], part[1][0]);
19154 /* Choose correct order to not overwrite the source before it is copied. */
19155 if ((REG_P (part[0][0])
19156 && REG_P (part[1][1])
19157 && (REGNO (part[0][0]) == REGNO (part[1][1])
19159 && REGNO (part[0][0]) == REGNO (part[1][2]))
19161 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19163 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19165 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19167 operands[2 + i] = part[0][j];
19168 operands[6 + i] = part[1][j];
19173 for (i = 0; i < nparts; i++)
19175 operands[2 + i] = part[0][i];
19176 operands[6 + i] = part[1][i];
19180 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19181 if (optimize_insn_for_size_p ())
19183 for (j = 0; j < nparts - 1; j++)
19184 if (CONST_INT_P (operands[6 + j])
19185 && operands[6 + j] != const0_rtx
19186 && REG_P (operands[2 + j]))
19187 for (i = j; i < nparts - 1; i++)
19188 if (CONST_INT_P (operands[7 + i])
19189 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19190 operands[7 + i] = operands[2 + j];
19193 for (i = 0; i < nparts; i++)
19194 emit_move_insn (operands[2 + i], operands[6 + i]);
19199 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19200 left shift by a constant, either using a single shift or
19201 a sequence of add instructions. */
19204 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19206 rtx (*insn)(rtx, rtx, rtx);
19209 || (count * ix86_cost->add <= ix86_cost->shift_const
19210 && !optimize_insn_for_size_p ()))
19212 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19213 while (count-- > 0)
19214 emit_insn (insn (operand, operand, operand));
19218 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19219 emit_insn (insn (operand, operand, GEN_INT (count)));
19224 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19226 rtx (*gen_ashl3)(rtx, rtx, rtx);
19227 rtx (*gen_shld)(rtx, rtx, rtx);
19228 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19230 rtx low[2], high[2];
19233 if (CONST_INT_P (operands[2]))
19235 split_double_mode (mode, operands, 2, low, high);
19236 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19238 if (count >= half_width)
19240 emit_move_insn (high[0], low[1]);
19241 emit_move_insn (low[0], const0_rtx);
19243 if (count > half_width)
19244 ix86_expand_ashl_const (high[0], count - half_width, mode);
19248 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19250 if (!rtx_equal_p (operands[0], operands[1]))
19251 emit_move_insn (operands[0], operands[1]);
19253 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19254 ix86_expand_ashl_const (low[0], count, mode);
19259 split_double_mode (mode, operands, 1, low, high);
19261 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19263 if (operands[1] == const1_rtx)
19265 /* Assuming we've chosen a QImode capable registers, then 1 << N
19266 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19267 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19269 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19271 ix86_expand_clear (low[0]);
19272 ix86_expand_clear (high[0]);
19273 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19275 d = gen_lowpart (QImode, low[0]);
19276 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19277 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19278 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19280 d = gen_lowpart (QImode, high[0]);
19281 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19282 s = gen_rtx_NE (QImode, flags, const0_rtx);
19283 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19286 /* Otherwise, we can get the same results by manually performing
19287 a bit extract operation on bit 5/6, and then performing the two
19288 shifts. The two methods of getting 0/1 into low/high are exactly
19289 the same size. Avoiding the shift in the bit extract case helps
19290 pentium4 a bit; no one else seems to care much either way. */
19293 enum machine_mode half_mode;
19294 rtx (*gen_lshr3)(rtx, rtx, rtx);
19295 rtx (*gen_and3)(rtx, rtx, rtx);
19296 rtx (*gen_xor3)(rtx, rtx, rtx);
19297 HOST_WIDE_INT bits;
19300 if (mode == DImode)
19302 half_mode = SImode;
19303 gen_lshr3 = gen_lshrsi3;
19304 gen_and3 = gen_andsi3;
19305 gen_xor3 = gen_xorsi3;
19310 half_mode = DImode;
19311 gen_lshr3 = gen_lshrdi3;
19312 gen_and3 = gen_anddi3;
19313 gen_xor3 = gen_xordi3;
19317 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19318 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19320 x = gen_lowpart (half_mode, operands[2]);
19321 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19323 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19324 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19325 emit_move_insn (low[0], high[0]);
19326 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19329 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19330 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19334 if (operands[1] == constm1_rtx)
19336 /* For -1 << N, we can avoid the shld instruction, because we
19337 know that we're shifting 0...31/63 ones into a -1. */
19338 emit_move_insn (low[0], constm1_rtx);
19339 if (optimize_insn_for_size_p ())
19340 emit_move_insn (high[0], low[0]);
19342 emit_move_insn (high[0], constm1_rtx);
19346 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19348 if (!rtx_equal_p (operands[0], operands[1]))
19349 emit_move_insn (operands[0], operands[1]);
19351 split_double_mode (mode, operands, 1, low, high);
19352 emit_insn (gen_shld (high[0], low[0], operands[2]));
19355 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19357 if (TARGET_CMOVE && scratch)
19359 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19360 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19362 ix86_expand_clear (scratch);
19363 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19367 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19368 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19370 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19375 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19377 rtx (*gen_ashr3)(rtx, rtx, rtx)
19378 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19379 rtx (*gen_shrd)(rtx, rtx, rtx);
19380 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19382 rtx low[2], high[2];
19385 if (CONST_INT_P (operands[2]))
19387 split_double_mode (mode, operands, 2, low, high);
19388 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19390 if (count == GET_MODE_BITSIZE (mode) - 1)
19392 emit_move_insn (high[0], high[1]);
19393 emit_insn (gen_ashr3 (high[0], high[0],
19394 GEN_INT (half_width - 1)));
19395 emit_move_insn (low[0], high[0]);
19398 else if (count >= half_width)
19400 emit_move_insn (low[0], high[1]);
19401 emit_move_insn (high[0], low[0]);
19402 emit_insn (gen_ashr3 (high[0], high[0],
19403 GEN_INT (half_width - 1)));
19405 if (count > half_width)
19406 emit_insn (gen_ashr3 (low[0], low[0],
19407 GEN_INT (count - half_width)));
19411 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19413 if (!rtx_equal_p (operands[0], operands[1]))
19414 emit_move_insn (operands[0], operands[1]);
19416 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19417 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19422 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19424 if (!rtx_equal_p (operands[0], operands[1]))
19425 emit_move_insn (operands[0], operands[1]);
19427 split_double_mode (mode, operands, 1, low, high);
19429 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19430 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19432 if (TARGET_CMOVE && scratch)
19434 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19435 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19437 emit_move_insn (scratch, high[0]);
19438 emit_insn (gen_ashr3 (scratch, scratch,
19439 GEN_INT (half_width - 1)));
19440 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19445 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19446 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19448 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19454 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19456 rtx (*gen_lshr3)(rtx, rtx, rtx)
19457 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19458 rtx (*gen_shrd)(rtx, rtx, rtx);
19459 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19461 rtx low[2], high[2];
19464 if (CONST_INT_P (operands[2]))
19466 split_double_mode (mode, operands, 2, low, high);
19467 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19469 if (count >= half_width)
19471 emit_move_insn (low[0], high[1]);
19472 ix86_expand_clear (high[0]);
19474 if (count > half_width)
19475 emit_insn (gen_lshr3 (low[0], low[0],
19476 GEN_INT (count - half_width)));
19480 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19482 if (!rtx_equal_p (operands[0], operands[1]))
19483 emit_move_insn (operands[0], operands[1]);
19485 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19486 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19491 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19493 if (!rtx_equal_p (operands[0], operands[1]))
19494 emit_move_insn (operands[0], operands[1]);
19496 split_double_mode (mode, operands, 1, low, high);
19498 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19499 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19501 if (TARGET_CMOVE && scratch)
19503 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19504 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19506 ix86_expand_clear (scratch);
19507 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19512 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19513 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19515 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19520 /* Predict just emitted jump instruction to be taken with probability PROB. */
19522 predict_jump (int prob)
19524 rtx insn = get_last_insn ();
19525 gcc_assert (JUMP_P (insn));
19526 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19529 /* Helper function for the string operations below. Dest VARIABLE whether
19530 it is aligned to VALUE bytes. If true, jump to the label. */
19532 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19534 rtx label = gen_label_rtx ();
19535 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19536 if (GET_MODE (variable) == DImode)
19537 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19539 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19540 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19543 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19545 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19549 /* Adjust COUNTER by the VALUE. */
19551 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
19553 rtx (*gen_add)(rtx, rtx, rtx)
19554 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
19556 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
19559 /* Zero extend possibly SImode EXP to Pmode register. */
19561 ix86_zero_extend_to_Pmode (rtx exp)
19564 if (GET_MODE (exp) == VOIDmode)
19565 return force_reg (Pmode, exp);
19566 if (GET_MODE (exp) == Pmode)
19567 return copy_to_mode_reg (Pmode, exp);
19568 r = gen_reg_rtx (Pmode);
19569 emit_insn (gen_zero_extendsidi2 (r, exp));
19573 /* Divide COUNTREG by SCALE. */
19575 scale_counter (rtx countreg, int scale)
19581 if (CONST_INT_P (countreg))
19582 return GEN_INT (INTVAL (countreg) / scale);
19583 gcc_assert (REG_P (countreg));
19585 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
19586 GEN_INT (exact_log2 (scale)),
19587 NULL, 1, OPTAB_DIRECT);
19591 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19592 DImode for constant loop counts. */
19594 static enum machine_mode
19595 counter_mode (rtx count_exp)
19597 if (GET_MODE (count_exp) != VOIDmode)
19598 return GET_MODE (count_exp);
19599 if (!CONST_INT_P (count_exp))
19601 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
19606 /* When SRCPTR is non-NULL, output simple loop to move memory
19607 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19608 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19609 equivalent loop to set memory by VALUE (supposed to be in MODE).
19611 The size is rounded down to whole number of chunk size moved at once.
19612 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19616 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
19617 rtx destptr, rtx srcptr, rtx value,
19618 rtx count, enum machine_mode mode, int unroll,
19621 rtx out_label, top_label, iter, tmp;
19622 enum machine_mode iter_mode = counter_mode (count);
19623 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
19624 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
19630 top_label = gen_label_rtx ();
19631 out_label = gen_label_rtx ();
19632 iter = gen_reg_rtx (iter_mode);
19634 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
19635 NULL, 1, OPTAB_DIRECT);
19636 /* Those two should combine. */
19637 if (piece_size == const1_rtx)
19639 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
19641 predict_jump (REG_BR_PROB_BASE * 10 / 100);
19643 emit_move_insn (iter, const0_rtx);
19645 emit_label (top_label);
19647 tmp = convert_modes (Pmode, iter_mode, iter, true);
19648 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
19649 destmem = change_address (destmem, mode, x_addr);
19653 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
19654 srcmem = change_address (srcmem, mode, y_addr);
19656 /* When unrolling for chips that reorder memory reads and writes,
19657 we can save registers by using single temporary.
19658 Also using 4 temporaries is overkill in 32bit mode. */
19659 if (!TARGET_64BIT && 0)
19661 for (i = 0; i < unroll; i++)
19666 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19668 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19670 emit_move_insn (destmem, srcmem);
19676 gcc_assert (unroll <= 4);
19677 for (i = 0; i < unroll; i++)
19679 tmpreg[i] = gen_reg_rtx (mode);
19683 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19685 emit_move_insn (tmpreg[i], srcmem);
19687 for (i = 0; i < unroll; i++)
19692 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19694 emit_move_insn (destmem, tmpreg[i]);
19699 for (i = 0; i < unroll; i++)
19703 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19704 emit_move_insn (destmem, value);
19707 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
19708 true, OPTAB_LIB_WIDEN);
19710 emit_move_insn (iter, tmp);
19712 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
19714 if (expected_size != -1)
19716 expected_size /= GET_MODE_SIZE (mode) * unroll;
19717 if (expected_size == 0)
19719 else if (expected_size > REG_BR_PROB_BASE)
19720 predict_jump (REG_BR_PROB_BASE - 1);
19722 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
19725 predict_jump (REG_BR_PROB_BASE * 80 / 100);
19726 iter = ix86_zero_extend_to_Pmode (iter);
19727 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
19728 true, OPTAB_LIB_WIDEN);
19729 if (tmp != destptr)
19730 emit_move_insn (destptr, tmp);
19733 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19734 true, OPTAB_LIB_WIDEN);
19736 emit_move_insn (srcptr, tmp);
19738 emit_label (out_label);
19741 /* Output "rep; mov" instruction.
19742 Arguments have same meaning as for previous function */
19744 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19745 rtx destptr, rtx srcptr,
19747 enum machine_mode mode)
19752 HOST_WIDE_INT rounded_count;
19754 /* If the size is known, it is shorter to use rep movs. */
19755 if (mode == QImode && CONST_INT_P (count)
19756 && !(INTVAL (count) & 3))
19759 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19760 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19761 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19762 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19763 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19764 if (mode != QImode)
19766 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19767 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19768 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19769 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19770 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19771 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19775 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19776 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19778 if (CONST_INT_P (count))
19780 rounded_count = (INTVAL (count)
19781 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19782 destmem = shallow_copy_rtx (destmem);
19783 srcmem = shallow_copy_rtx (srcmem);
19784 set_mem_size (destmem, rounded_count);
19785 set_mem_size (srcmem, rounded_count);
19789 if (MEM_SIZE_KNOWN_P (destmem))
19790 clear_mem_size (destmem);
19791 if (MEM_SIZE_KNOWN_P (srcmem))
19792 clear_mem_size (srcmem);
19794 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19798 /* Output "rep; stos" instruction.
19799 Arguments have same meaning as for previous function */
19801 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19802 rtx count, enum machine_mode mode,
19807 HOST_WIDE_INT rounded_count;
19809 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19810 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19811 value = force_reg (mode, gen_lowpart (mode, value));
19812 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19813 if (mode != QImode)
19815 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19816 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19817 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19820 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19821 if (orig_value == const0_rtx && CONST_INT_P (count))
19823 rounded_count = (INTVAL (count)
19824 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19825 destmem = shallow_copy_rtx (destmem);
19826 set_mem_size (destmem, rounded_count);
19828 else if (MEM_SIZE_KNOWN_P (destmem))
19829 clear_mem_size (destmem);
19830 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19834 emit_strmov (rtx destmem, rtx srcmem,
19835 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19837 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19838 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19839 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19842 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19844 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19845 rtx destptr, rtx srcptr, rtx count, int max_size)
19848 if (CONST_INT_P (count))
19850 HOST_WIDE_INT countval = INTVAL (count);
19853 if ((countval & 0x10) && max_size > 16)
19857 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19858 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19861 gcc_unreachable ();
19864 if ((countval & 0x08) && max_size > 8)
19867 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19870 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19871 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19875 if ((countval & 0x04) && max_size > 4)
19877 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19880 if ((countval & 0x02) && max_size > 2)
19882 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19885 if ((countval & 0x01) && max_size > 1)
19887 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19894 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19895 count, 1, OPTAB_DIRECT);
19896 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19897 count, QImode, 1, 4);
19901 /* When there are stringops, we can cheaply increase dest and src pointers.
19902 Otherwise we save code size by maintaining offset (zero is readily
19903 available from preceding rep operation) and using x86 addressing modes.
19905 if (TARGET_SINGLE_STRINGOP)
19909 rtx label = ix86_expand_aligntest (count, 4, true);
19910 src = change_address (srcmem, SImode, srcptr);
19911 dest = change_address (destmem, SImode, destptr);
19912 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19913 emit_label (label);
19914 LABEL_NUSES (label) = 1;
19918 rtx label = ix86_expand_aligntest (count, 2, true);
19919 src = change_address (srcmem, HImode, srcptr);
19920 dest = change_address (destmem, HImode, destptr);
19921 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19922 emit_label (label);
19923 LABEL_NUSES (label) = 1;
19927 rtx label = ix86_expand_aligntest (count, 1, true);
19928 src = change_address (srcmem, QImode, srcptr);
19929 dest = change_address (destmem, QImode, destptr);
19930 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19931 emit_label (label);
19932 LABEL_NUSES (label) = 1;
19937 rtx offset = force_reg (Pmode, const0_rtx);
19942 rtx label = ix86_expand_aligntest (count, 4, true);
19943 src = change_address (srcmem, SImode, srcptr);
19944 dest = change_address (destmem, SImode, destptr);
19945 emit_move_insn (dest, src);
19946 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
19947 true, OPTAB_LIB_WIDEN);
19949 emit_move_insn (offset, tmp);
19950 emit_label (label);
19951 LABEL_NUSES (label) = 1;
19955 rtx label = ix86_expand_aligntest (count, 2, true);
19956 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19957 src = change_address (srcmem, HImode, tmp);
19958 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19959 dest = change_address (destmem, HImode, tmp);
19960 emit_move_insn (dest, src);
19961 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
19962 true, OPTAB_LIB_WIDEN);
19964 emit_move_insn (offset, tmp);
19965 emit_label (label);
19966 LABEL_NUSES (label) = 1;
19970 rtx label = ix86_expand_aligntest (count, 1, true);
19971 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19972 src = change_address (srcmem, QImode, tmp);
19973 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19974 dest = change_address (destmem, QImode, tmp);
19975 emit_move_insn (dest, src);
19976 emit_label (label);
19977 LABEL_NUSES (label) = 1;
19982 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19984 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
19985 rtx count, int max_size)
19988 expand_simple_binop (counter_mode (count), AND, count,
19989 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
19990 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
19991 gen_lowpart (QImode, value), count, QImode,
19995 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19997 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20001 if (CONST_INT_P (count))
20003 HOST_WIDE_INT countval = INTVAL (count);
20006 if ((countval & 0x10) && max_size > 16)
20010 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20011 emit_insn (gen_strset (destptr, dest, value));
20012 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20013 emit_insn (gen_strset (destptr, dest, value));
20016 gcc_unreachable ();
20019 if ((countval & 0x08) && max_size > 8)
20023 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20024 emit_insn (gen_strset (destptr, dest, value));
20028 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20029 emit_insn (gen_strset (destptr, dest, value));
20030 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20031 emit_insn (gen_strset (destptr, dest, value));
20035 if ((countval & 0x04) && max_size > 4)
20037 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20038 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20041 if ((countval & 0x02) && max_size > 2)
20043 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20044 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20047 if ((countval & 0x01) && max_size > 1)
20049 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20050 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20057 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20062 rtx label = ix86_expand_aligntest (count, 16, true);
20065 dest = change_address (destmem, DImode, destptr);
20066 emit_insn (gen_strset (destptr, dest, value));
20067 emit_insn (gen_strset (destptr, dest, value));
20071 dest = change_address (destmem, SImode, destptr);
20072 emit_insn (gen_strset (destptr, dest, value));
20073 emit_insn (gen_strset (destptr, dest, value));
20074 emit_insn (gen_strset (destptr, dest, value));
20075 emit_insn (gen_strset (destptr, dest, value));
20077 emit_label (label);
20078 LABEL_NUSES (label) = 1;
20082 rtx label = ix86_expand_aligntest (count, 8, true);
20085 dest = change_address (destmem, DImode, destptr);
20086 emit_insn (gen_strset (destptr, dest, value));
20090 dest = change_address (destmem, SImode, destptr);
20091 emit_insn (gen_strset (destptr, dest, value));
20092 emit_insn (gen_strset (destptr, dest, value));
20094 emit_label (label);
20095 LABEL_NUSES (label) = 1;
20099 rtx label = ix86_expand_aligntest (count, 4, true);
20100 dest = change_address (destmem, SImode, destptr);
20101 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20102 emit_label (label);
20103 LABEL_NUSES (label) = 1;
20107 rtx label = ix86_expand_aligntest (count, 2, true);
20108 dest = change_address (destmem, HImode, destptr);
20109 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20110 emit_label (label);
20111 LABEL_NUSES (label) = 1;
20115 rtx label = ix86_expand_aligntest (count, 1, true);
20116 dest = change_address (destmem, QImode, destptr);
20117 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20118 emit_label (label);
20119 LABEL_NUSES (label) = 1;
20123 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20124 DESIRED_ALIGNMENT. */
20126 expand_movmem_prologue (rtx destmem, rtx srcmem,
20127 rtx destptr, rtx srcptr, rtx count,
20128 int align, int desired_alignment)
20130 if (align <= 1 && desired_alignment > 1)
20132 rtx label = ix86_expand_aligntest (destptr, 1, false);
20133 srcmem = change_address (srcmem, QImode, srcptr);
20134 destmem = change_address (destmem, QImode, destptr);
20135 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20136 ix86_adjust_counter (count, 1);
20137 emit_label (label);
20138 LABEL_NUSES (label) = 1;
20140 if (align <= 2 && desired_alignment > 2)
20142 rtx label = ix86_expand_aligntest (destptr, 2, false);
20143 srcmem = change_address (srcmem, HImode, srcptr);
20144 destmem = change_address (destmem, HImode, destptr);
20145 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20146 ix86_adjust_counter (count, 2);
20147 emit_label (label);
20148 LABEL_NUSES (label) = 1;
20150 if (align <= 4 && desired_alignment > 4)
20152 rtx label = ix86_expand_aligntest (destptr, 4, false);
20153 srcmem = change_address (srcmem, SImode, srcptr);
20154 destmem = change_address (destmem, SImode, destptr);
20155 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20156 ix86_adjust_counter (count, 4);
20157 emit_label (label);
20158 LABEL_NUSES (label) = 1;
20160 gcc_assert (desired_alignment <= 8);
20163 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20164 ALIGN_BYTES is how many bytes need to be copied. */
20166 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20167 int desired_align, int align_bytes)
20170 rtx orig_dst = dst;
20171 rtx orig_src = src;
20173 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20174 if (src_align_bytes >= 0)
20175 src_align_bytes = desired_align - src_align_bytes;
20176 if (align_bytes & 1)
20178 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20179 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20181 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20183 if (align_bytes & 2)
20185 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20186 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20187 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20188 set_mem_align (dst, 2 * BITS_PER_UNIT);
20189 if (src_align_bytes >= 0
20190 && (src_align_bytes & 1) == (align_bytes & 1)
20191 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20192 set_mem_align (src, 2 * BITS_PER_UNIT);
20194 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20196 if (align_bytes & 4)
20198 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20199 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20200 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20201 set_mem_align (dst, 4 * BITS_PER_UNIT);
20202 if (src_align_bytes >= 0)
20204 unsigned int src_align = 0;
20205 if ((src_align_bytes & 3) == (align_bytes & 3))
20207 else if ((src_align_bytes & 1) == (align_bytes & 1))
20209 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20210 set_mem_align (src, src_align * BITS_PER_UNIT);
20213 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20215 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20216 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20217 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20218 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20219 if (src_align_bytes >= 0)
20221 unsigned int src_align = 0;
20222 if ((src_align_bytes & 7) == (align_bytes & 7))
20224 else if ((src_align_bytes & 3) == (align_bytes & 3))
20226 else if ((src_align_bytes & 1) == (align_bytes & 1))
20228 if (src_align > (unsigned int) desired_align)
20229 src_align = desired_align;
20230 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20231 set_mem_align (src, src_align * BITS_PER_UNIT);
20233 if (MEM_SIZE_KNOWN_P (orig_dst))
20234 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
20235 if (MEM_SIZE_KNOWN_P (orig_src))
20236 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
20241 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20242 DESIRED_ALIGNMENT. */
20244 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20245 int align, int desired_alignment)
20247 if (align <= 1 && desired_alignment > 1)
20249 rtx label = ix86_expand_aligntest (destptr, 1, false);
20250 destmem = change_address (destmem, QImode, destptr);
20251 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20252 ix86_adjust_counter (count, 1);
20253 emit_label (label);
20254 LABEL_NUSES (label) = 1;
20256 if (align <= 2 && desired_alignment > 2)
20258 rtx label = ix86_expand_aligntest (destptr, 2, false);
20259 destmem = change_address (destmem, HImode, destptr);
20260 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20261 ix86_adjust_counter (count, 2);
20262 emit_label (label);
20263 LABEL_NUSES (label) = 1;
20265 if (align <= 4 && desired_alignment > 4)
20267 rtx label = ix86_expand_aligntest (destptr, 4, false);
20268 destmem = change_address (destmem, SImode, destptr);
20269 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20270 ix86_adjust_counter (count, 4);
20271 emit_label (label);
20272 LABEL_NUSES (label) = 1;
20274 gcc_assert (desired_alignment <= 8);
20277 /* Set enough from DST to align DST known to by aligned by ALIGN to
20278 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20280 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20281 int desired_align, int align_bytes)
20284 rtx orig_dst = dst;
20285 if (align_bytes & 1)
20287 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20289 emit_insn (gen_strset (destreg, dst,
20290 gen_lowpart (QImode, value)));
20292 if (align_bytes & 2)
20294 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20295 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20296 set_mem_align (dst, 2 * BITS_PER_UNIT);
20298 emit_insn (gen_strset (destreg, dst,
20299 gen_lowpart (HImode, value)));
20301 if (align_bytes & 4)
20303 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20304 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20305 set_mem_align (dst, 4 * BITS_PER_UNIT);
20307 emit_insn (gen_strset (destreg, dst,
20308 gen_lowpart (SImode, value)));
20310 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20311 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20312 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20313 if (MEM_SIZE_KNOWN_P (orig_dst))
20314 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
20318 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20319 static enum stringop_alg
20320 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20321 int *dynamic_check)
20323 const struct stringop_algs * algs;
20324 bool optimize_for_speed;
20325 /* Algorithms using the rep prefix want at least edi and ecx;
20326 additionally, memset wants eax and memcpy wants esi. Don't
20327 consider such algorithms if the user has appropriated those
20328 registers for their own purposes. */
20329 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20331 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20333 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20334 || (alg != rep_prefix_1_byte \
20335 && alg != rep_prefix_4_byte \
20336 && alg != rep_prefix_8_byte))
20337 const struct processor_costs *cost;
20339 /* Even if the string operation call is cold, we still might spend a lot
20340 of time processing large blocks. */
20341 if (optimize_function_for_size_p (cfun)
20342 || (optimize_insn_for_size_p ()
20343 && expected_size != -1 && expected_size < 256))
20344 optimize_for_speed = false;
20346 optimize_for_speed = true;
20348 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20350 *dynamic_check = -1;
20352 algs = &cost->memset[TARGET_64BIT != 0];
20354 algs = &cost->memcpy[TARGET_64BIT != 0];
20355 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
20356 return ix86_stringop_alg;
20357 /* rep; movq or rep; movl is the smallest variant. */
20358 else if (!optimize_for_speed)
20360 if (!count || (count & 3))
20361 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20363 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20365 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20367 else if (expected_size != -1 && expected_size < 4)
20368 return loop_1_byte;
20369 else if (expected_size != -1)
20372 enum stringop_alg alg = libcall;
20373 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20375 /* We get here if the algorithms that were not libcall-based
20376 were rep-prefix based and we are unable to use rep prefixes
20377 based on global register usage. Break out of the loop and
20378 use the heuristic below. */
20379 if (algs->size[i].max == 0)
20381 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20383 enum stringop_alg candidate = algs->size[i].alg;
20385 if (candidate != libcall && ALG_USABLE_P (candidate))
20387 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20388 last non-libcall inline algorithm. */
20389 if (TARGET_INLINE_ALL_STRINGOPS)
20391 /* When the current size is best to be copied by a libcall,
20392 but we are still forced to inline, run the heuristic below
20393 that will pick code for medium sized blocks. */
20394 if (alg != libcall)
20398 else if (ALG_USABLE_P (candidate))
20402 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20404 /* When asked to inline the call anyway, try to pick meaningful choice.
20405 We look for maximal size of block that is faster to copy by hand and
20406 take blocks of at most of that size guessing that average size will
20407 be roughly half of the block.
20409 If this turns out to be bad, we might simply specify the preferred
20410 choice in ix86_costs. */
20411 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20412 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20415 enum stringop_alg alg;
20417 bool any_alg_usable_p = true;
20419 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20421 enum stringop_alg candidate = algs->size[i].alg;
20422 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20424 if (candidate != libcall && candidate
20425 && ALG_USABLE_P (candidate))
20426 max = algs->size[i].max;
20428 /* If there aren't any usable algorithms, then recursing on
20429 smaller sizes isn't going to find anything. Just return the
20430 simple byte-at-a-time copy loop. */
20431 if (!any_alg_usable_p)
20433 /* Pick something reasonable. */
20434 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20435 *dynamic_check = 128;
20436 return loop_1_byte;
20440 alg = decide_alg (count, max / 2, memset, dynamic_check);
20441 gcc_assert (*dynamic_check == -1);
20442 gcc_assert (alg != libcall);
20443 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20444 *dynamic_check = max;
20447 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20448 #undef ALG_USABLE_P
20451 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20452 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20454 decide_alignment (int align,
20455 enum stringop_alg alg,
20458 int desired_align = 0;
20462 gcc_unreachable ();
20464 case unrolled_loop:
20465 desired_align = GET_MODE_SIZE (Pmode);
20467 case rep_prefix_8_byte:
20470 case rep_prefix_4_byte:
20471 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20472 copying whole cacheline at once. */
20473 if (TARGET_PENTIUMPRO)
20478 case rep_prefix_1_byte:
20479 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20480 copying whole cacheline at once. */
20481 if (TARGET_PENTIUMPRO)
20495 if (desired_align < align)
20496 desired_align = align;
20497 if (expected_size != -1 && expected_size < 4)
20498 desired_align = align;
20499 return desired_align;
20502 /* Return the smallest power of 2 greater than VAL. */
20504 smallest_pow2_greater_than (int val)
20512 /* Expand string move (memcpy) operation. Use i386 string operations
20513 when profitable. expand_setmem contains similar code. The code
20514 depends upon architecture, block size and alignment, but always has
20515 the same overall structure:
20517 1) Prologue guard: Conditional that jumps up to epilogues for small
20518 blocks that can be handled by epilogue alone. This is faster
20519 but also needed for correctness, since prologue assume the block
20520 is larger than the desired alignment.
20522 Optional dynamic check for size and libcall for large
20523 blocks is emitted here too, with -minline-stringops-dynamically.
20525 2) Prologue: copy first few bytes in order to get destination
20526 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
20527 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
20528 copied. We emit either a jump tree on power of two sized
20529 blocks, or a byte loop.
20531 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20532 with specified algorithm.
20534 4) Epilogue: code copying tail of the block that is too small to be
20535 handled by main body (or up to size guarded by prologue guard). */
20538 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20539 rtx expected_align_exp, rtx expected_size_exp)
20545 rtx jump_around_label = NULL;
20546 HOST_WIDE_INT align = 1;
20547 unsigned HOST_WIDE_INT count = 0;
20548 HOST_WIDE_INT expected_size = -1;
20549 int size_needed = 0, epilogue_size_needed;
20550 int desired_align = 0, align_bytes = 0;
20551 enum stringop_alg alg;
20553 bool need_zero_guard = false;
20555 if (CONST_INT_P (align_exp))
20556 align = INTVAL (align_exp);
20557 /* i386 can do misaligned access on reasonably increased cost. */
20558 if (CONST_INT_P (expected_align_exp)
20559 && INTVAL (expected_align_exp) > align)
20560 align = INTVAL (expected_align_exp);
20561 /* ALIGN is the minimum of destination and source alignment, but we care here
20562 just about destination alignment. */
20563 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
20564 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
20566 if (CONST_INT_P (count_exp))
20567 count = expected_size = INTVAL (count_exp);
20568 if (CONST_INT_P (expected_size_exp) && count == 0)
20569 expected_size = INTVAL (expected_size_exp);
20571 /* Make sure we don't need to care about overflow later on. */
20572 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20575 /* Step 0: Decide on preferred algorithm, desired alignment and
20576 size of chunks to be copied by main loop. */
20578 alg = decide_alg (count, expected_size, false, &dynamic_check);
20579 desired_align = decide_alignment (align, alg, expected_size);
20581 if (!TARGET_ALIGN_STRINGOPS)
20582 align = desired_align;
20584 if (alg == libcall)
20586 gcc_assert (alg != no_stringop);
20588 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
20589 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20590 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
20595 gcc_unreachable ();
20597 need_zero_guard = true;
20598 size_needed = GET_MODE_SIZE (Pmode);
20600 case unrolled_loop:
20601 need_zero_guard = true;
20602 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
20604 case rep_prefix_8_byte:
20607 case rep_prefix_4_byte:
20610 case rep_prefix_1_byte:
20614 need_zero_guard = true;
20619 epilogue_size_needed = size_needed;
20621 /* Step 1: Prologue guard. */
20623 /* Alignment code needs count to be in register. */
20624 if (CONST_INT_P (count_exp) && desired_align > align)
20626 if (INTVAL (count_exp) > desired_align
20627 && INTVAL (count_exp) > size_needed)
20630 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20631 if (align_bytes <= 0)
20634 align_bytes = desired_align - align_bytes;
20636 if (align_bytes == 0)
20637 count_exp = force_reg (counter_mode (count_exp), count_exp);
20639 gcc_assert (desired_align >= 1 && align >= 1);
20641 /* Ensure that alignment prologue won't copy past end of block. */
20642 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20644 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20645 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20646 Make sure it is power of 2. */
20647 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20651 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20653 /* If main algorithm works on QImode, no epilogue is needed.
20654 For small sizes just don't align anything. */
20655 if (size_needed == 1)
20656 desired_align = align;
20663 label = gen_label_rtx ();
20664 emit_cmp_and_jump_insns (count_exp,
20665 GEN_INT (epilogue_size_needed),
20666 LTU, 0, counter_mode (count_exp), 1, label);
20667 if (expected_size == -1 || expected_size < epilogue_size_needed)
20668 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20670 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20674 /* Emit code to decide on runtime whether library call or inline should be
20676 if (dynamic_check != -1)
20678 if (CONST_INT_P (count_exp))
20680 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
20682 emit_block_move_via_libcall (dst, src, count_exp, false);
20683 count_exp = const0_rtx;
20689 rtx hot_label = gen_label_rtx ();
20690 jump_around_label = gen_label_rtx ();
20691 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20692 LEU, 0, GET_MODE (count_exp), 1, hot_label);
20693 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20694 emit_block_move_via_libcall (dst, src, count_exp, false);
20695 emit_jump (jump_around_label);
20696 emit_label (hot_label);
20700 /* Step 2: Alignment prologue. */
20702 if (desired_align > align)
20704 if (align_bytes == 0)
20706 /* Except for the first move in epilogue, we no longer know
20707 constant offset in aliasing info. It don't seems to worth
20708 the pain to maintain it for the first move, so throw away
20710 src = change_address (src, BLKmode, srcreg);
20711 dst = change_address (dst, BLKmode, destreg);
20712 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
20717 /* If we know how many bytes need to be stored before dst is
20718 sufficiently aligned, maintain aliasing info accurately. */
20719 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
20720 desired_align, align_bytes);
20721 count_exp = plus_constant (count_exp, -align_bytes);
20722 count -= align_bytes;
20724 if (need_zero_guard
20725 && (count < (unsigned HOST_WIDE_INT) size_needed
20726 || (align_bytes == 0
20727 && count < ((unsigned HOST_WIDE_INT) size_needed
20728 + desired_align - align))))
20730 /* It is possible that we copied enough so the main loop will not
20732 gcc_assert (size_needed > 1);
20733 if (label == NULL_RTX)
20734 label = gen_label_rtx ();
20735 emit_cmp_and_jump_insns (count_exp,
20736 GEN_INT (size_needed),
20737 LTU, 0, counter_mode (count_exp), 1, label);
20738 if (expected_size == -1
20739 || expected_size < (desired_align - align) / 2 + size_needed)
20740 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20742 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20745 if (label && size_needed == 1)
20747 emit_label (label);
20748 LABEL_NUSES (label) = 1;
20750 epilogue_size_needed = 1;
20752 else if (label == NULL_RTX)
20753 epilogue_size_needed = size_needed;
20755 /* Step 3: Main loop. */
20761 gcc_unreachable ();
20763 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20764 count_exp, QImode, 1, expected_size);
20767 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20768 count_exp, Pmode, 1, expected_size);
20770 case unrolled_loop:
20771 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20772 registers for 4 temporaries anyway. */
20773 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20774 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20777 case rep_prefix_8_byte:
20778 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20781 case rep_prefix_4_byte:
20782 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20785 case rep_prefix_1_byte:
20786 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20790 /* Adjust properly the offset of src and dest memory for aliasing. */
20791 if (CONST_INT_P (count_exp))
20793 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20794 (count / size_needed) * size_needed);
20795 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20796 (count / size_needed) * size_needed);
20800 src = change_address (src, BLKmode, srcreg);
20801 dst = change_address (dst, BLKmode, destreg);
20804 /* Step 4: Epilogue to copy the remaining bytes. */
20808 /* When the main loop is done, COUNT_EXP might hold original count,
20809 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20810 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20811 bytes. Compensate if needed. */
20813 if (size_needed < epilogue_size_needed)
20816 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20817 GEN_INT (size_needed - 1), count_exp, 1,
20819 if (tmp != count_exp)
20820 emit_move_insn (count_exp, tmp);
20822 emit_label (label);
20823 LABEL_NUSES (label) = 1;
20826 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20827 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20828 epilogue_size_needed);
20829 if (jump_around_label)
20830 emit_label (jump_around_label);
20834 /* Helper function for memcpy. For QImode value 0xXY produce
20835 0xXYXYXYXY of wide specified by MODE. This is essentially
20836 a * 0x10101010, but we can do slightly better than
20837 synth_mult by unwinding the sequence by hand on CPUs with
20840 promote_duplicated_reg (enum machine_mode mode, rtx val)
20842 enum machine_mode valmode = GET_MODE (val);
20844 int nops = mode == DImode ? 3 : 2;
20846 gcc_assert (mode == SImode || mode == DImode);
20847 if (val == const0_rtx)
20848 return copy_to_mode_reg (mode, const0_rtx);
20849 if (CONST_INT_P (val))
20851 HOST_WIDE_INT v = INTVAL (val) & 255;
20855 if (mode == DImode)
20856 v |= (v << 16) << 16;
20857 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20860 if (valmode == VOIDmode)
20862 if (valmode != QImode)
20863 val = gen_lowpart (QImode, val);
20864 if (mode == QImode)
20866 if (!TARGET_PARTIAL_REG_STALL)
20868 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20869 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20870 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20871 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20873 rtx reg = convert_modes (mode, QImode, val, true);
20874 tmp = promote_duplicated_reg (mode, const1_rtx);
20875 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20880 rtx reg = convert_modes (mode, QImode, val, true);
20882 if (!TARGET_PARTIAL_REG_STALL)
20883 if (mode == SImode)
20884 emit_insn (gen_movsi_insv_1 (reg, reg));
20886 emit_insn (gen_movdi_insv_1 (reg, reg));
20889 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20890 NULL, 1, OPTAB_DIRECT);
20892 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20894 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20895 NULL, 1, OPTAB_DIRECT);
20896 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20897 if (mode == SImode)
20899 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20900 NULL, 1, OPTAB_DIRECT);
20901 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20906 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20907 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20908 alignment from ALIGN to DESIRED_ALIGN. */
20910 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20915 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20916 promoted_val = promote_duplicated_reg (DImode, val);
20917 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20918 promoted_val = promote_duplicated_reg (SImode, val);
20919 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
20920 promoted_val = promote_duplicated_reg (HImode, val);
20922 promoted_val = val;
20924 return promoted_val;
20927 /* Expand string clear operation (bzero). Use i386 string operations when
20928 profitable. See expand_movmem comment for explanation of individual
20929 steps performed. */
20931 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
20932 rtx expected_align_exp, rtx expected_size_exp)
20937 rtx jump_around_label = NULL;
20938 HOST_WIDE_INT align = 1;
20939 unsigned HOST_WIDE_INT count = 0;
20940 HOST_WIDE_INT expected_size = -1;
20941 int size_needed = 0, epilogue_size_needed;
20942 int desired_align = 0, align_bytes = 0;
20943 enum stringop_alg alg;
20944 rtx promoted_val = NULL;
20945 bool force_loopy_epilogue = false;
20947 bool need_zero_guard = false;
20949 if (CONST_INT_P (align_exp))
20950 align = INTVAL (align_exp);
20951 /* i386 can do misaligned access on reasonably increased cost. */
20952 if (CONST_INT_P (expected_align_exp)
20953 && INTVAL (expected_align_exp) > align)
20954 align = INTVAL (expected_align_exp);
20955 if (CONST_INT_P (count_exp))
20956 count = expected_size = INTVAL (count_exp);
20957 if (CONST_INT_P (expected_size_exp) && count == 0)
20958 expected_size = INTVAL (expected_size_exp);
20960 /* Make sure we don't need to care about overflow later on. */
20961 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20964 /* Step 0: Decide on preferred algorithm, desired alignment and
20965 size of chunks to be copied by main loop. */
20967 alg = decide_alg (count, expected_size, true, &dynamic_check);
20968 desired_align = decide_alignment (align, alg, expected_size);
20970 if (!TARGET_ALIGN_STRINGOPS)
20971 align = desired_align;
20973 if (alg == libcall)
20975 gcc_assert (alg != no_stringop);
20977 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
20978 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20983 gcc_unreachable ();
20985 need_zero_guard = true;
20986 size_needed = GET_MODE_SIZE (Pmode);
20988 case unrolled_loop:
20989 need_zero_guard = true;
20990 size_needed = GET_MODE_SIZE (Pmode) * 4;
20992 case rep_prefix_8_byte:
20995 case rep_prefix_4_byte:
20998 case rep_prefix_1_byte:
21002 need_zero_guard = true;
21006 epilogue_size_needed = size_needed;
21008 /* Step 1: Prologue guard. */
21010 /* Alignment code needs count to be in register. */
21011 if (CONST_INT_P (count_exp) && desired_align > align)
21013 if (INTVAL (count_exp) > desired_align
21014 && INTVAL (count_exp) > size_needed)
21017 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21018 if (align_bytes <= 0)
21021 align_bytes = desired_align - align_bytes;
21023 if (align_bytes == 0)
21025 enum machine_mode mode = SImode;
21026 if (TARGET_64BIT && (count & ~0xffffffff))
21028 count_exp = force_reg (mode, count_exp);
21031 /* Do the cheap promotion to allow better CSE across the
21032 main loop and epilogue (ie one load of the big constant in the
21033 front of all code. */
21034 if (CONST_INT_P (val_exp))
21035 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21036 desired_align, align);
21037 /* Ensure that alignment prologue won't copy past end of block. */
21038 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21040 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21041 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21042 Make sure it is power of 2. */
21043 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21045 /* To improve performance of small blocks, we jump around the VAL
21046 promoting mode. This mean that if the promoted VAL is not constant,
21047 we might not use it in the epilogue and have to use byte
21049 if (epilogue_size_needed > 2 && !promoted_val)
21050 force_loopy_epilogue = true;
21053 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21055 /* If main algorithm works on QImode, no epilogue is needed.
21056 For small sizes just don't align anything. */
21057 if (size_needed == 1)
21058 desired_align = align;
21065 label = gen_label_rtx ();
21066 emit_cmp_and_jump_insns (count_exp,
21067 GEN_INT (epilogue_size_needed),
21068 LTU, 0, counter_mode (count_exp), 1, label);
21069 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21070 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21072 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21075 if (dynamic_check != -1)
21077 rtx hot_label = gen_label_rtx ();
21078 jump_around_label = gen_label_rtx ();
21079 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21080 LEU, 0, counter_mode (count_exp), 1, hot_label);
21081 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21082 set_storage_via_libcall (dst, count_exp, val_exp, false);
21083 emit_jump (jump_around_label);
21084 emit_label (hot_label);
21087 /* Step 2: Alignment prologue. */
21089 /* Do the expensive promotion once we branched off the small blocks. */
21091 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21092 desired_align, align);
21093 gcc_assert (desired_align >= 1 && align >= 1);
21095 if (desired_align > align)
21097 if (align_bytes == 0)
21099 /* Except for the first move in epilogue, we no longer know
21100 constant offset in aliasing info. It don't seems to worth
21101 the pain to maintain it for the first move, so throw away
21103 dst = change_address (dst, BLKmode, destreg);
21104 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21109 /* If we know how many bytes need to be stored before dst is
21110 sufficiently aligned, maintain aliasing info accurately. */
21111 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21112 desired_align, align_bytes);
21113 count_exp = plus_constant (count_exp, -align_bytes);
21114 count -= align_bytes;
21116 if (need_zero_guard
21117 && (count < (unsigned HOST_WIDE_INT) size_needed
21118 || (align_bytes == 0
21119 && count < ((unsigned HOST_WIDE_INT) size_needed
21120 + desired_align - align))))
21122 /* It is possible that we copied enough so the main loop will not
21124 gcc_assert (size_needed > 1);
21125 if (label == NULL_RTX)
21126 label = gen_label_rtx ();
21127 emit_cmp_and_jump_insns (count_exp,
21128 GEN_INT (size_needed),
21129 LTU, 0, counter_mode (count_exp), 1, label);
21130 if (expected_size == -1
21131 || expected_size < (desired_align - align) / 2 + size_needed)
21132 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21134 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21137 if (label && size_needed == 1)
21139 emit_label (label);
21140 LABEL_NUSES (label) = 1;
21142 promoted_val = val_exp;
21143 epilogue_size_needed = 1;
21145 else if (label == NULL_RTX)
21146 epilogue_size_needed = size_needed;
21148 /* Step 3: Main loop. */
21154 gcc_unreachable ();
21156 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21157 count_exp, QImode, 1, expected_size);
21160 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21161 count_exp, Pmode, 1, expected_size);
21163 case unrolled_loop:
21164 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21165 count_exp, Pmode, 4, expected_size);
21167 case rep_prefix_8_byte:
21168 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21171 case rep_prefix_4_byte:
21172 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21175 case rep_prefix_1_byte:
21176 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21180 /* Adjust properly the offset of src and dest memory for aliasing. */
21181 if (CONST_INT_P (count_exp))
21182 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21183 (count / size_needed) * size_needed);
21185 dst = change_address (dst, BLKmode, destreg);
21187 /* Step 4: Epilogue to copy the remaining bytes. */
21191 /* When the main loop is done, COUNT_EXP might hold original count,
21192 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21193 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21194 bytes. Compensate if needed. */
21196 if (size_needed < epilogue_size_needed)
21199 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21200 GEN_INT (size_needed - 1), count_exp, 1,
21202 if (tmp != count_exp)
21203 emit_move_insn (count_exp, tmp);
21205 emit_label (label);
21206 LABEL_NUSES (label) = 1;
21209 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21211 if (force_loopy_epilogue)
21212 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21213 epilogue_size_needed);
21215 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21216 epilogue_size_needed);
21218 if (jump_around_label)
21219 emit_label (jump_around_label);
21223 /* Expand the appropriate insns for doing strlen if not just doing
21226 out = result, initialized with the start address
21227 align_rtx = alignment of the address.
21228 scratch = scratch register, initialized with the startaddress when
21229 not aligned, otherwise undefined
21231 This is just the body. It needs the initializations mentioned above and
21232 some address computing at the end. These things are done in i386.md. */
21235 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21239 rtx align_2_label = NULL_RTX;
21240 rtx align_3_label = NULL_RTX;
21241 rtx align_4_label = gen_label_rtx ();
21242 rtx end_0_label = gen_label_rtx ();
21244 rtx tmpreg = gen_reg_rtx (SImode);
21245 rtx scratch = gen_reg_rtx (SImode);
21249 if (CONST_INT_P (align_rtx))
21250 align = INTVAL (align_rtx);
21252 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21254 /* Is there a known alignment and is it less than 4? */
21257 rtx scratch1 = gen_reg_rtx (Pmode);
21258 emit_move_insn (scratch1, out);
21259 /* Is there a known alignment and is it not 2? */
21262 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21263 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21265 /* Leave just the 3 lower bits. */
21266 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21267 NULL_RTX, 0, OPTAB_WIDEN);
21269 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21270 Pmode, 1, align_4_label);
21271 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21272 Pmode, 1, align_2_label);
21273 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21274 Pmode, 1, align_3_label);
21278 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21279 check if is aligned to 4 - byte. */
21281 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21282 NULL_RTX, 0, OPTAB_WIDEN);
21284 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21285 Pmode, 1, align_4_label);
21288 mem = change_address (src, QImode, out);
21290 /* Now compare the bytes. */
21292 /* Compare the first n unaligned byte on a byte per byte basis. */
21293 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21294 QImode, 1, end_0_label);
21296 /* Increment the address. */
21297 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21299 /* Not needed with an alignment of 2 */
21302 emit_label (align_2_label);
21304 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21307 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21309 emit_label (align_3_label);
21312 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21315 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21318 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21319 align this loop. It gives only huge programs, but does not help to
21321 emit_label (align_4_label);
21323 mem = change_address (src, SImode, out);
21324 emit_move_insn (scratch, mem);
21325 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21327 /* This formula yields a nonzero result iff one of the bytes is zero.
21328 This saves three branches inside loop and many cycles. */
21330 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21331 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21332 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21333 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21334 gen_int_mode (0x80808080, SImode)));
21335 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21340 rtx reg = gen_reg_rtx (SImode);
21341 rtx reg2 = gen_reg_rtx (Pmode);
21342 emit_move_insn (reg, tmpreg);
21343 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21345 /* If zero is not in the first two bytes, move two bytes forward. */
21346 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21347 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21348 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21349 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21350 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21353 /* Emit lea manually to avoid clobbering of flags. */
21354 emit_insn (gen_rtx_SET (SImode, reg2,
21355 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21357 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21358 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21359 emit_insn (gen_rtx_SET (VOIDmode, out,
21360 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21366 rtx end_2_label = gen_label_rtx ();
21367 /* Is zero in the first two bytes? */
21369 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21370 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21371 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21372 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21373 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21375 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21376 JUMP_LABEL (tmp) = end_2_label;
21378 /* Not in the first two. Move two bytes forward. */
21379 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21380 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21382 emit_label (end_2_label);
21386 /* Avoid branch in fixing the byte. */
21387 tmpreg = gen_lowpart (QImode, tmpreg);
21388 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21389 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21390 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21391 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21393 emit_label (end_0_label);
21396 /* Expand strlen. */
21399 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21401 rtx addr, scratch1, scratch2, scratch3, scratch4;
21403 /* The generic case of strlen expander is long. Avoid it's
21404 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21406 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21407 && !TARGET_INLINE_ALL_STRINGOPS
21408 && !optimize_insn_for_size_p ()
21409 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21412 addr = force_reg (Pmode, XEXP (src, 0));
21413 scratch1 = gen_reg_rtx (Pmode);
21415 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21416 && !optimize_insn_for_size_p ())
21418 /* Well it seems that some optimizer does not combine a call like
21419 foo(strlen(bar), strlen(bar));
21420 when the move and the subtraction is done here. It does calculate
21421 the length just once when these instructions are done inside of
21422 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21423 often used and I use one fewer register for the lifetime of
21424 output_strlen_unroll() this is better. */
21426 emit_move_insn (out, addr);
21428 ix86_expand_strlensi_unroll_1 (out, src, align);
21430 /* strlensi_unroll_1 returns the address of the zero at the end of
21431 the string, like memchr(), so compute the length by subtracting
21432 the start address. */
21433 emit_insn (ix86_gen_sub3 (out, out, addr));
21439 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21440 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21443 scratch2 = gen_reg_rtx (Pmode);
21444 scratch3 = gen_reg_rtx (Pmode);
21445 scratch4 = force_reg (Pmode, constm1_rtx);
21447 emit_move_insn (scratch3, addr);
21448 eoschar = force_reg (QImode, eoschar);
21450 src = replace_equiv_address_nv (src, scratch3);
21452 /* If .md starts supporting :P, this can be done in .md. */
21453 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21454 scratch4), UNSPEC_SCAS);
21455 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21456 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21457 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21462 /* For given symbol (function) construct code to compute address of it's PLT
21463 entry in large x86-64 PIC model. */
21465 construct_plt_address (rtx symbol)
21467 rtx tmp = gen_reg_rtx (Pmode);
21468 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21470 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21471 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21473 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21474 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21479 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21481 rtx pop, bool sibcall)
21483 rtx use = NULL, call;
21485 if (pop == const0_rtx)
21487 gcc_assert (!TARGET_64BIT || !pop);
21489 if (TARGET_MACHO && !TARGET_64BIT)
21492 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21493 fnaddr = machopic_indirect_call_target (fnaddr);
21498 /* Static functions and indirect calls don't need the pic register. */
21499 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21500 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21501 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21502 use_reg (&use, pic_offset_table_rtx);
21505 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21507 rtx al = gen_rtx_REG (QImode, AX_REG);
21508 emit_move_insn (al, callarg2);
21509 use_reg (&use, al);
21512 if (ix86_cmodel == CM_LARGE_PIC
21514 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21515 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21516 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21518 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21519 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21521 fnaddr = XEXP (fnaddr, 0);
21522 if (GET_MODE (fnaddr) != Pmode)
21523 fnaddr = convert_to_mode (Pmode, fnaddr, 1);
21524 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
21527 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21529 call = gen_rtx_SET (VOIDmode, retval, call);
21532 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21533 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21534 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
21536 if (TARGET_64BIT_MS_ABI
21537 && (!callarg2 || INTVAL (callarg2) != -2))
21539 /* We need to represent that SI and DI registers are clobbered
21541 static int clobbered_registers[] = {
21542 XMM6_REG, XMM7_REG, XMM8_REG,
21543 XMM9_REG, XMM10_REG, XMM11_REG,
21544 XMM12_REG, XMM13_REG, XMM14_REG,
21545 XMM15_REG, SI_REG, DI_REG
21548 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
21549 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
21550 UNSPEC_MS_TO_SYSV_CALL);
21554 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
21555 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
21558 (SSE_REGNO_P (clobbered_registers[i])
21560 clobbered_registers[i]));
21562 call = gen_rtx_PARALLEL (VOIDmode,
21563 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
21567 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
21568 if (TARGET_VZEROUPPER)
21573 if (cfun->machine->callee_pass_avx256_p)
21575 if (cfun->machine->callee_return_avx256_p)
21576 avx256 = callee_return_pass_avx256;
21578 avx256 = callee_pass_avx256;
21580 else if (cfun->machine->callee_return_avx256_p)
21581 avx256 = callee_return_avx256;
21583 avx256 = call_no_avx256;
21585 if (reload_completed)
21586 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
21589 unspec = gen_rtx_UNSPEC (VOIDmode,
21590 gen_rtvec (1, GEN_INT (avx256)),
21591 UNSPEC_CALL_NEEDS_VZEROUPPER);
21592 call = gen_rtx_PARALLEL (VOIDmode,
21593 gen_rtvec (2, call, unspec));
21597 call = emit_call_insn (call);
21599 CALL_INSN_FUNCTION_USAGE (call) = use;
21605 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
21607 rtx call = XVECEXP (PATTERN (insn), 0, 0);
21608 emit_insn (gen_avx_vzeroupper (vzeroupper));
21609 emit_call_insn (call);
21612 /* Output the assembly for a call instruction. */
21615 ix86_output_call_insn (rtx insn, rtx call_op)
21617 bool direct_p = constant_call_address_operand (call_op, Pmode);
21618 bool seh_nop_p = false;
21621 if (SIBLING_CALL_P (insn))
21625 /* SEH epilogue detection requires the indirect branch case
21626 to include REX.W. */
21627 else if (TARGET_SEH)
21628 xasm = "rex.W jmp %A0";
21632 output_asm_insn (xasm, &call_op);
21636 /* SEH unwinding can require an extra nop to be emitted in several
21637 circumstances. Determine if we have one of those. */
21642 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
21644 /* If we get to another real insn, we don't need the nop. */
21648 /* If we get to the epilogue note, prevent a catch region from
21649 being adjacent to the standard epilogue sequence. If non-
21650 call-exceptions, we'll have done this during epilogue emission. */
21651 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
21652 && !flag_non_call_exceptions
21653 && !can_throw_internal (insn))
21660 /* If we didn't find a real insn following the call, prevent the
21661 unwinder from looking into the next function. */
21667 xasm = "call\t%P0";
21669 xasm = "call\t%A0";
21671 output_asm_insn (xasm, &call_op);
21679 /* Clear stack slot assignments remembered from previous functions.
21680 This is called from INIT_EXPANDERS once before RTL is emitted for each
21683 static struct machine_function *
21684 ix86_init_machine_status (void)
21686 struct machine_function *f;
21688 f = ggc_alloc_cleared_machine_function ();
21689 f->use_fast_prologue_epilogue_nregs = -1;
21690 f->tls_descriptor_call_expanded_p = 0;
21691 f->call_abi = ix86_abi;
21696 /* Return a MEM corresponding to a stack slot with mode MODE.
21697 Allocate a new slot if necessary.
21699 The RTL for a function can have several slots available: N is
21700 which slot to use. */
21703 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
21705 struct stack_local_entry *s;
21707 gcc_assert (n < MAX_386_STACK_LOCALS);
21709 /* Virtual slot is valid only before vregs are instantiated. */
21710 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
21712 for (s = ix86_stack_locals; s; s = s->next)
21713 if (s->mode == mode && s->n == n)
21714 return copy_rtx (s->rtl);
21716 s = ggc_alloc_stack_local_entry ();
21719 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21721 s->next = ix86_stack_locals;
21722 ix86_stack_locals = s;
21726 /* Calculate the length of the memory address in the instruction
21727 encoding. Does not include the one-byte modrm, opcode, or prefix. */
21730 memory_address_length (rtx addr)
21732 struct ix86_address parts;
21733 rtx base, index, disp;
21737 if (GET_CODE (addr) == PRE_DEC
21738 || GET_CODE (addr) == POST_INC
21739 || GET_CODE (addr) == PRE_MODIFY
21740 || GET_CODE (addr) == POST_MODIFY)
21743 ok = ix86_decompose_address (addr, &parts);
21746 if (parts.base && GET_CODE (parts.base) == SUBREG)
21747 parts.base = SUBREG_REG (parts.base);
21748 if (parts.index && GET_CODE (parts.index) == SUBREG)
21749 parts.index = SUBREG_REG (parts.index);
21752 index = parts.index;
21757 - esp as the base always wants an index,
21758 - ebp as the base always wants a displacement,
21759 - r12 as the base always wants an index,
21760 - r13 as the base always wants a displacement. */
21762 /* Register Indirect. */
21763 if (base && !index && !disp)
21765 /* esp (for its index) and ebp (for its displacement) need
21766 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
21769 && (addr == arg_pointer_rtx
21770 || addr == frame_pointer_rtx
21771 || REGNO (addr) == SP_REG
21772 || REGNO (addr) == BP_REG
21773 || REGNO (addr) == R12_REG
21774 || REGNO (addr) == R13_REG))
21778 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21779 is not disp32, but disp32(%rip), so for disp32
21780 SIB byte is needed, unless print_operand_address
21781 optimizes it into disp32(%rip) or (%rip) is implied
21783 else if (disp && !base && !index)
21790 if (GET_CODE (disp) == CONST)
21791 symbol = XEXP (disp, 0);
21792 if (GET_CODE (symbol) == PLUS
21793 && CONST_INT_P (XEXP (symbol, 1)))
21794 symbol = XEXP (symbol, 0);
21796 if (GET_CODE (symbol) != LABEL_REF
21797 && (GET_CODE (symbol) != SYMBOL_REF
21798 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
21799 && (GET_CODE (symbol) != UNSPEC
21800 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
21801 && XINT (symbol, 1) != UNSPEC_PCREL
21802 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
21809 /* Find the length of the displacement constant. */
21812 if (base && satisfies_constraint_K (disp))
21817 /* ebp always wants a displacement. Similarly r13. */
21818 else if (base && REG_P (base)
21819 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
21822 /* An index requires the two-byte modrm form.... */
21824 /* ...like esp (or r12), which always wants an index. */
21825 || base == arg_pointer_rtx
21826 || base == frame_pointer_rtx
21827 || (base && REG_P (base)
21828 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
21845 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21846 is set, expect that insn have 8bit immediate alternative. */
21848 ix86_attr_length_immediate_default (rtx insn, bool shortform)
21852 extract_insn_cached (insn);
21853 for (i = recog_data.n_operands - 1; i >= 0; --i)
21854 if (CONSTANT_P (recog_data.operand[i]))
21856 enum attr_mode mode = get_attr_mode (insn);
21859 if (shortform && CONST_INT_P (recog_data.operand[i]))
21861 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21868 ival = trunc_int_for_mode (ival, HImode);
21871 ival = trunc_int_for_mode (ival, SImode);
21876 if (IN_RANGE (ival, -128, 127))
21893 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21898 fatal_insn ("unknown insn mode", insn);
21903 /* Compute default value for "length_address" attribute. */
21905 ix86_attr_length_address_default (rtx insn)
21909 if (get_attr_type (insn) == TYPE_LEA)
21911 rtx set = PATTERN (insn), addr;
21913 if (GET_CODE (set) == PARALLEL)
21914 set = XVECEXP (set, 0, 0);
21916 gcc_assert (GET_CODE (set) == SET);
21918 addr = SET_SRC (set);
21919 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
21921 if (GET_CODE (addr) == ZERO_EXTEND)
21922 addr = XEXP (addr, 0);
21923 if (GET_CODE (addr) == SUBREG)
21924 addr = SUBREG_REG (addr);
21927 return memory_address_length (addr);
21930 extract_insn_cached (insn);
21931 for (i = recog_data.n_operands - 1; i >= 0; --i)
21932 if (MEM_P (recog_data.operand[i]))
21934 constrain_operands_cached (reload_completed);
21935 if (which_alternative != -1)
21937 const char *constraints = recog_data.constraints[i];
21938 int alt = which_alternative;
21940 while (*constraints == '=' || *constraints == '+')
21943 while (*constraints++ != ',')
21945 /* Skip ignored operands. */
21946 if (*constraints == 'X')
21949 return memory_address_length (XEXP (recog_data.operand[i], 0));
21954 /* Compute default value for "length_vex" attribute. It includes
21955 2 or 3 byte VEX prefix and 1 opcode byte. */
21958 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
21962 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
21963 byte VEX prefix. */
21964 if (!has_0f_opcode || has_vex_w)
21967 /* We can always use 2 byte VEX prefix in 32bit. */
21971 extract_insn_cached (insn);
21973 for (i = recog_data.n_operands - 1; i >= 0; --i)
21974 if (REG_P (recog_data.operand[i]))
21976 /* REX.W bit uses 3 byte VEX prefix. */
21977 if (GET_MODE (recog_data.operand[i]) == DImode
21978 && GENERAL_REG_P (recog_data.operand[i]))
21983 /* REX.X or REX.B bits use 3 byte VEX prefix. */
21984 if (MEM_P (recog_data.operand[i])
21985 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
21992 /* Return the maximum number of instructions a cpu can issue. */
21995 ix86_issue_rate (void)
21999 case PROCESSOR_PENTIUM:
22000 case PROCESSOR_ATOM:
22004 case PROCESSOR_PENTIUMPRO:
22005 case PROCESSOR_PENTIUM4:
22006 case PROCESSOR_CORE2_32:
22007 case PROCESSOR_CORE2_64:
22008 case PROCESSOR_COREI7_32:
22009 case PROCESSOR_COREI7_64:
22010 case PROCESSOR_ATHLON:
22012 case PROCESSOR_AMDFAM10:
22013 case PROCESSOR_NOCONA:
22014 case PROCESSOR_GENERIC32:
22015 case PROCESSOR_GENERIC64:
22016 case PROCESSOR_BDVER1:
22017 case PROCESSOR_BDVER2:
22018 case PROCESSOR_BTVER1:
22026 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
22027 by DEP_INSN and nothing set by DEP_INSN. */
22030 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22034 /* Simplify the test for uninteresting insns. */
22035 if (insn_type != TYPE_SETCC
22036 && insn_type != TYPE_ICMOV
22037 && insn_type != TYPE_FCMOV
22038 && insn_type != TYPE_IBR)
22041 if ((set = single_set (dep_insn)) != 0)
22043 set = SET_DEST (set);
22046 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22047 && XVECLEN (PATTERN (dep_insn), 0) == 2
22048 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22049 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22051 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22052 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22057 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22060 /* This test is true if the dependent insn reads the flags but
22061 not any other potentially set register. */
22062 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22065 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22071 /* Return true iff USE_INSN has a memory address with operands set by
22075 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22078 extract_insn_cached (use_insn);
22079 for (i = recog_data.n_operands - 1; i >= 0; --i)
22080 if (MEM_P (recog_data.operand[i]))
22082 rtx addr = XEXP (recog_data.operand[i], 0);
22083 return modified_in_p (addr, set_insn) != 0;
22089 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22091 enum attr_type insn_type, dep_insn_type;
22092 enum attr_memory memory;
22094 int dep_insn_code_number;
22096 /* Anti and output dependencies have zero cost on all CPUs. */
22097 if (REG_NOTE_KIND (link) != 0)
22100 dep_insn_code_number = recog_memoized (dep_insn);
22102 /* If we can't recognize the insns, we can't really do anything. */
22103 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22106 insn_type = get_attr_type (insn);
22107 dep_insn_type = get_attr_type (dep_insn);
22111 case PROCESSOR_PENTIUM:
22112 /* Address Generation Interlock adds a cycle of latency. */
22113 if (insn_type == TYPE_LEA)
22115 rtx addr = PATTERN (insn);
22117 if (GET_CODE (addr) == PARALLEL)
22118 addr = XVECEXP (addr, 0, 0);
22120 gcc_assert (GET_CODE (addr) == SET);
22122 addr = SET_SRC (addr);
22123 if (modified_in_p (addr, dep_insn))
22126 else if (ix86_agi_dependent (dep_insn, insn))
22129 /* ??? Compares pair with jump/setcc. */
22130 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22133 /* Floating point stores require value to be ready one cycle earlier. */
22134 if (insn_type == TYPE_FMOV
22135 && get_attr_memory (insn) == MEMORY_STORE
22136 && !ix86_agi_dependent (dep_insn, insn))
22140 case PROCESSOR_PENTIUMPRO:
22141 memory = get_attr_memory (insn);
22143 /* INT->FP conversion is expensive. */
22144 if (get_attr_fp_int_src (dep_insn))
22147 /* There is one cycle extra latency between an FP op and a store. */
22148 if (insn_type == TYPE_FMOV
22149 && (set = single_set (dep_insn)) != NULL_RTX
22150 && (set2 = single_set (insn)) != NULL_RTX
22151 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22152 && MEM_P (SET_DEST (set2)))
22155 /* Show ability of reorder buffer to hide latency of load by executing
22156 in parallel with previous instruction in case
22157 previous instruction is not needed to compute the address. */
22158 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22159 && !ix86_agi_dependent (dep_insn, insn))
22161 /* Claim moves to take one cycle, as core can issue one load
22162 at time and the next load can start cycle later. */
22163 if (dep_insn_type == TYPE_IMOV
22164 || dep_insn_type == TYPE_FMOV)
22172 memory = get_attr_memory (insn);
22174 /* The esp dependency is resolved before the instruction is really
22176 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22177 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22180 /* INT->FP conversion is expensive. */
22181 if (get_attr_fp_int_src (dep_insn))
22184 /* Show ability of reorder buffer to hide latency of load by executing
22185 in parallel with previous instruction in case
22186 previous instruction is not needed to compute the address. */
22187 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22188 && !ix86_agi_dependent (dep_insn, insn))
22190 /* Claim moves to take one cycle, as core can issue one load
22191 at time and the next load can start cycle later. */
22192 if (dep_insn_type == TYPE_IMOV
22193 || dep_insn_type == TYPE_FMOV)
22202 case PROCESSOR_ATHLON:
22204 case PROCESSOR_AMDFAM10:
22205 case PROCESSOR_BDVER1:
22206 case PROCESSOR_BDVER2:
22207 case PROCESSOR_BTVER1:
22208 case PROCESSOR_ATOM:
22209 case PROCESSOR_GENERIC32:
22210 case PROCESSOR_GENERIC64:
22211 memory = get_attr_memory (insn);
22213 /* Show ability of reorder buffer to hide latency of load by executing
22214 in parallel with previous instruction in case
22215 previous instruction is not needed to compute the address. */
22216 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22217 && !ix86_agi_dependent (dep_insn, insn))
22219 enum attr_unit unit = get_attr_unit (insn);
22222 /* Because of the difference between the length of integer and
22223 floating unit pipeline preparation stages, the memory operands
22224 for floating point are cheaper.
22226 ??? For Athlon it the difference is most probably 2. */
22227 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22230 loadcost = TARGET_ATHLON ? 2 : 0;
22232 if (cost >= loadcost)
22245 /* How many alternative schedules to try. This should be as wide as the
22246 scheduling freedom in the DFA, but no wider. Making this value too
22247 large results extra work for the scheduler. */
22250 ia32_multipass_dfa_lookahead (void)
22254 case PROCESSOR_PENTIUM:
22257 case PROCESSOR_PENTIUMPRO:
22261 case PROCESSOR_CORE2_32:
22262 case PROCESSOR_CORE2_64:
22263 case PROCESSOR_COREI7_32:
22264 case PROCESSOR_COREI7_64:
22265 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22266 as many instructions can be executed on a cycle, i.e.,
22267 issue_rate. I wonder why tuning for many CPUs does not do this. */
22268 return ix86_issue_rate ();
22277 /* Model decoder of Core 2/i7.
22278 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22279 track the instruction fetch block boundaries and make sure that long
22280 (9+ bytes) instructions are assigned to D0. */
22282 /* Maximum length of an insn that can be handled by
22283 a secondary decoder unit. '8' for Core 2/i7. */
22284 static int core2i7_secondary_decoder_max_insn_size;
22286 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22287 '16' for Core 2/i7. */
22288 static int core2i7_ifetch_block_size;
22290 /* Maximum number of instructions decoder can handle per cycle.
22291 '6' for Core 2/i7. */
22292 static int core2i7_ifetch_block_max_insns;
22294 typedef struct ix86_first_cycle_multipass_data_ *
22295 ix86_first_cycle_multipass_data_t;
22296 typedef const struct ix86_first_cycle_multipass_data_ *
22297 const_ix86_first_cycle_multipass_data_t;
22299 /* A variable to store target state across calls to max_issue within
22301 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22302 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22304 /* Initialize DATA. */
22306 core2i7_first_cycle_multipass_init (void *_data)
22308 ix86_first_cycle_multipass_data_t data
22309 = (ix86_first_cycle_multipass_data_t) _data;
22311 data->ifetch_block_len = 0;
22312 data->ifetch_block_n_insns = 0;
22313 data->ready_try_change = NULL;
22314 data->ready_try_change_size = 0;
22317 /* Advancing the cycle; reset ifetch block counts. */
22319 core2i7_dfa_post_advance_cycle (void)
22321 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22323 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22325 data->ifetch_block_len = 0;
22326 data->ifetch_block_n_insns = 0;
22329 static int min_insn_size (rtx);
22331 /* Filter out insns from ready_try that the core will not be able to issue
22332 on current cycle due to decoder. */
22334 core2i7_first_cycle_multipass_filter_ready_try
22335 (const_ix86_first_cycle_multipass_data_t data,
22336 char *ready_try, int n_ready, bool first_cycle_insn_p)
22343 if (ready_try[n_ready])
22346 insn = get_ready_element (n_ready);
22347 insn_size = min_insn_size (insn);
22349 if (/* If this is a too long an insn for a secondary decoder ... */
22350 (!first_cycle_insn_p
22351 && insn_size > core2i7_secondary_decoder_max_insn_size)
22352 /* ... or it would not fit into the ifetch block ... */
22353 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22354 /* ... or the decoder is full already ... */
22355 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22356 /* ... mask the insn out. */
22358 ready_try[n_ready] = 1;
22360 if (data->ready_try_change)
22361 SET_BIT (data->ready_try_change, n_ready);
22366 /* Prepare for a new round of multipass lookahead scheduling. */
22368 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22369 bool first_cycle_insn_p)
22371 ix86_first_cycle_multipass_data_t data
22372 = (ix86_first_cycle_multipass_data_t) _data;
22373 const_ix86_first_cycle_multipass_data_t prev_data
22374 = ix86_first_cycle_multipass_data;
22376 /* Restore the state from the end of the previous round. */
22377 data->ifetch_block_len = prev_data->ifetch_block_len;
22378 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22380 /* Filter instructions that cannot be issued on current cycle due to
22381 decoder restrictions. */
22382 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22383 first_cycle_insn_p);
22386 /* INSN is being issued in current solution. Account for its impact on
22387 the decoder model. */
22389 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22390 rtx insn, const void *_prev_data)
22392 ix86_first_cycle_multipass_data_t data
22393 = (ix86_first_cycle_multipass_data_t) _data;
22394 const_ix86_first_cycle_multipass_data_t prev_data
22395 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22397 int insn_size = min_insn_size (insn);
22399 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22400 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22401 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22402 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22404 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22405 if (!data->ready_try_change)
22407 data->ready_try_change = sbitmap_alloc (n_ready);
22408 data->ready_try_change_size = n_ready;
22410 else if (data->ready_try_change_size < n_ready)
22412 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22414 data->ready_try_change_size = n_ready;
22416 sbitmap_zero (data->ready_try_change);
22418 /* Filter out insns from ready_try that the core will not be able to issue
22419 on current cycle due to decoder. */
22420 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22424 /* Revert the effect on ready_try. */
22426 core2i7_first_cycle_multipass_backtrack (const void *_data,
22428 int n_ready ATTRIBUTE_UNUSED)
22430 const_ix86_first_cycle_multipass_data_t data
22431 = (const_ix86_first_cycle_multipass_data_t) _data;
22432 unsigned int i = 0;
22433 sbitmap_iterator sbi;
22435 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22436 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22442 /* Save the result of multipass lookahead scheduling for the next round. */
22444 core2i7_first_cycle_multipass_end (const void *_data)
22446 const_ix86_first_cycle_multipass_data_t data
22447 = (const_ix86_first_cycle_multipass_data_t) _data;
22448 ix86_first_cycle_multipass_data_t next_data
22449 = ix86_first_cycle_multipass_data;
22453 next_data->ifetch_block_len = data->ifetch_block_len;
22454 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22458 /* Deallocate target data. */
22460 core2i7_first_cycle_multipass_fini (void *_data)
22462 ix86_first_cycle_multipass_data_t data
22463 = (ix86_first_cycle_multipass_data_t) _data;
22465 if (data->ready_try_change)
22467 sbitmap_free (data->ready_try_change);
22468 data->ready_try_change = NULL;
22469 data->ready_try_change_size = 0;
22473 /* Prepare for scheduling pass. */
22475 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22476 int verbose ATTRIBUTE_UNUSED,
22477 int max_uid ATTRIBUTE_UNUSED)
22479 /* Install scheduling hooks for current CPU. Some of these hooks are used
22480 in time-critical parts of the scheduler, so we only set them up when
22481 they are actually used. */
22484 case PROCESSOR_CORE2_32:
22485 case PROCESSOR_CORE2_64:
22486 case PROCESSOR_COREI7_32:
22487 case PROCESSOR_COREI7_64:
22488 targetm.sched.dfa_post_advance_cycle
22489 = core2i7_dfa_post_advance_cycle;
22490 targetm.sched.first_cycle_multipass_init
22491 = core2i7_first_cycle_multipass_init;
22492 targetm.sched.first_cycle_multipass_begin
22493 = core2i7_first_cycle_multipass_begin;
22494 targetm.sched.first_cycle_multipass_issue
22495 = core2i7_first_cycle_multipass_issue;
22496 targetm.sched.first_cycle_multipass_backtrack
22497 = core2i7_first_cycle_multipass_backtrack;
22498 targetm.sched.first_cycle_multipass_end
22499 = core2i7_first_cycle_multipass_end;
22500 targetm.sched.first_cycle_multipass_fini
22501 = core2i7_first_cycle_multipass_fini;
22503 /* Set decoder parameters. */
22504 core2i7_secondary_decoder_max_insn_size = 8;
22505 core2i7_ifetch_block_size = 16;
22506 core2i7_ifetch_block_max_insns = 6;
22510 targetm.sched.dfa_post_advance_cycle = NULL;
22511 targetm.sched.first_cycle_multipass_init = NULL;
22512 targetm.sched.first_cycle_multipass_begin = NULL;
22513 targetm.sched.first_cycle_multipass_issue = NULL;
22514 targetm.sched.first_cycle_multipass_backtrack = NULL;
22515 targetm.sched.first_cycle_multipass_end = NULL;
22516 targetm.sched.first_cycle_multipass_fini = NULL;
22522 /* Compute the alignment given to a constant that is being placed in memory.
22523 EXP is the constant and ALIGN is the alignment that the object would
22525 The value of this function is used instead of that alignment to align
22529 ix86_constant_alignment (tree exp, int align)
22531 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
22532 || TREE_CODE (exp) == INTEGER_CST)
22534 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
22536 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
22539 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
22540 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
22541 return BITS_PER_WORD;
22546 /* Compute the alignment for a static variable.
22547 TYPE is the data type, and ALIGN is the alignment that
22548 the object would ordinarily have. The value of this function is used
22549 instead of that alignment to align the object. */
22552 ix86_data_alignment (tree type, int align)
22554 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
22556 if (AGGREGATE_TYPE_P (type)
22557 && TYPE_SIZE (type)
22558 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22559 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
22560 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
22561 && align < max_align)
22564 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22565 to 16byte boundary. */
22568 if (AGGREGATE_TYPE_P (type)
22569 && TYPE_SIZE (type)
22570 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22571 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
22572 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22576 if (TREE_CODE (type) == ARRAY_TYPE)
22578 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22580 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22583 else if (TREE_CODE (type) == COMPLEX_TYPE)
22586 if (TYPE_MODE (type) == DCmode && align < 64)
22588 if ((TYPE_MODE (type) == XCmode
22589 || TYPE_MODE (type) == TCmode) && align < 128)
22592 else if ((TREE_CODE (type) == RECORD_TYPE
22593 || TREE_CODE (type) == UNION_TYPE
22594 || TREE_CODE (type) == QUAL_UNION_TYPE)
22595 && TYPE_FIELDS (type))
22597 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22599 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22602 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22603 || TREE_CODE (type) == INTEGER_TYPE)
22605 if (TYPE_MODE (type) == DFmode && align < 64)
22607 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22614 /* Compute the alignment for a local variable or a stack slot. EXP is
22615 the data type or decl itself, MODE is the widest mode available and
22616 ALIGN is the alignment that the object would ordinarily have. The
22617 value of this macro is used instead of that alignment to align the
22621 ix86_local_alignment (tree exp, enum machine_mode mode,
22622 unsigned int align)
22626 if (exp && DECL_P (exp))
22628 type = TREE_TYPE (exp);
22637 /* Don't do dynamic stack realignment for long long objects with
22638 -mpreferred-stack-boundary=2. */
22641 && ix86_preferred_stack_boundary < 64
22642 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
22643 && (!type || !TYPE_USER_ALIGN (type))
22644 && (!decl || !DECL_USER_ALIGN (decl)))
22647 /* If TYPE is NULL, we are allocating a stack slot for caller-save
22648 register in MODE. We will return the largest alignment of XF
22652 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
22653 align = GET_MODE_ALIGNMENT (DFmode);
22657 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22658 to 16byte boundary. Exact wording is:
22660 An array uses the same alignment as its elements, except that a local or
22661 global array variable of length at least 16 bytes or
22662 a C99 variable-length array variable always has alignment of at least 16 bytes.
22664 This was added to allow use of aligned SSE instructions at arrays. This
22665 rule is meant for static storage (where compiler can not do the analysis
22666 by itself). We follow it for automatic variables only when convenient.
22667 We fully control everything in the function compiled and functions from
22668 other unit can not rely on the alignment.
22670 Exclude va_list type. It is the common case of local array where
22671 we can not benefit from the alignment. */
22672 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
22675 if (AGGREGATE_TYPE_P (type)
22676 && (va_list_type_node == NULL_TREE
22677 || (TYPE_MAIN_VARIANT (type)
22678 != TYPE_MAIN_VARIANT (va_list_type_node)))
22679 && TYPE_SIZE (type)
22680 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22681 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
22682 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22685 if (TREE_CODE (type) == ARRAY_TYPE)
22687 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22689 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22692 else if (TREE_CODE (type) == COMPLEX_TYPE)
22694 if (TYPE_MODE (type) == DCmode && align < 64)
22696 if ((TYPE_MODE (type) == XCmode
22697 || TYPE_MODE (type) == TCmode) && align < 128)
22700 else if ((TREE_CODE (type) == RECORD_TYPE
22701 || TREE_CODE (type) == UNION_TYPE
22702 || TREE_CODE (type) == QUAL_UNION_TYPE)
22703 && TYPE_FIELDS (type))
22705 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22707 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22710 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22711 || TREE_CODE (type) == INTEGER_TYPE)
22714 if (TYPE_MODE (type) == DFmode && align < 64)
22716 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22722 /* Compute the minimum required alignment for dynamic stack realignment
22723 purposes for a local variable, parameter or a stack slot. EXP is
22724 the data type or decl itself, MODE is its mode and ALIGN is the
22725 alignment that the object would ordinarily have. */
22728 ix86_minimum_alignment (tree exp, enum machine_mode mode,
22729 unsigned int align)
22733 if (exp && DECL_P (exp))
22735 type = TREE_TYPE (exp);
22744 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
22747 /* Don't do dynamic stack realignment for long long objects with
22748 -mpreferred-stack-boundary=2. */
22749 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
22750 && (!type || !TYPE_USER_ALIGN (type))
22751 && (!decl || !DECL_USER_ALIGN (decl)))
22757 /* Find a location for the static chain incoming to a nested function.
22758 This is a register, unless all free registers are used by arguments. */
22761 ix86_static_chain (const_tree fndecl, bool incoming_p)
22765 if (!DECL_STATIC_CHAIN (fndecl))
22770 /* We always use R10 in 64-bit mode. */
22778 /* By default in 32-bit mode we use ECX to pass the static chain. */
22781 fntype = TREE_TYPE (fndecl);
22782 ccvt = ix86_get_callcvt (fntype);
22783 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
22785 /* Fastcall functions use ecx/edx for arguments, which leaves
22786 us with EAX for the static chain.
22787 Thiscall functions use ecx for arguments, which also
22788 leaves us with EAX for the static chain. */
22791 else if (ix86_function_regparm (fntype, fndecl) == 3)
22793 /* For regparm 3, we have no free call-clobbered registers in
22794 which to store the static chain. In order to implement this,
22795 we have the trampoline push the static chain to the stack.
22796 However, we can't push a value below the return address when
22797 we call the nested function directly, so we have to use an
22798 alternate entry point. For this we use ESI, and have the
22799 alternate entry point push ESI, so that things appear the
22800 same once we're executing the nested function. */
22803 if (fndecl == current_function_decl)
22804 ix86_static_chain_on_stack = true;
22805 return gen_frame_mem (SImode,
22806 plus_constant (arg_pointer_rtx, -8));
22812 return gen_rtx_REG (Pmode, regno);
22815 /* Emit RTL insns to initialize the variable parts of a trampoline.
22816 FNDECL is the decl of the target address; M_TRAMP is a MEM for
22817 the trampoline, and CHAIN_VALUE is an RTX for the static chain
22818 to be passed to the target function. */
22821 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
22827 fnaddr = XEXP (DECL_RTL (fndecl), 0);
22833 /* Load the function address to r11. Try to load address using
22834 the shorter movl instead of movabs. We may want to support
22835 movq for kernel mode, but kernel does not use trampolines at
22837 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
22839 fnaddr = copy_to_mode_reg (DImode, fnaddr);
22841 mem = adjust_address (m_tramp, HImode, offset);
22842 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
22844 mem = adjust_address (m_tramp, SImode, offset + 2);
22845 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
22850 mem = adjust_address (m_tramp, HImode, offset);
22851 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
22853 mem = adjust_address (m_tramp, DImode, offset + 2);
22854 emit_move_insn (mem, fnaddr);
22858 /* Load static chain using movabs to r10. Use the
22859 shorter movl instead of movabs for x32. */
22871 mem = adjust_address (m_tramp, HImode, offset);
22872 emit_move_insn (mem, gen_int_mode (opcode, HImode));
22874 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
22875 emit_move_insn (mem, chain_value);
22878 /* Jump to r11; the last (unused) byte is a nop, only there to
22879 pad the write out to a single 32-bit store. */
22880 mem = adjust_address (m_tramp, SImode, offset);
22881 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
22888 /* Depending on the static chain location, either load a register
22889 with a constant, or push the constant to the stack. All of the
22890 instructions are the same size. */
22891 chain = ix86_static_chain (fndecl, true);
22894 switch (REGNO (chain))
22897 opcode = 0xb8; break;
22899 opcode = 0xb9; break;
22901 gcc_unreachable ();
22907 mem = adjust_address (m_tramp, QImode, offset);
22908 emit_move_insn (mem, gen_int_mode (opcode, QImode));
22910 mem = adjust_address (m_tramp, SImode, offset + 1);
22911 emit_move_insn (mem, chain_value);
22914 mem = adjust_address (m_tramp, QImode, offset);
22915 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
22917 mem = adjust_address (m_tramp, SImode, offset + 1);
22919 /* Compute offset from the end of the jmp to the target function.
22920 In the case in which the trampoline stores the static chain on
22921 the stack, we need to skip the first insn which pushes the
22922 (call-saved) register static chain; this push is 1 byte. */
22924 disp = expand_binop (SImode, sub_optab, fnaddr,
22925 plus_constant (XEXP (m_tramp, 0),
22926 offset - (MEM_P (chain) ? 1 : 0)),
22927 NULL_RTX, 1, OPTAB_DIRECT);
22928 emit_move_insn (mem, disp);
22931 gcc_assert (offset <= TRAMPOLINE_SIZE);
22933 #ifdef HAVE_ENABLE_EXECUTE_STACK
22934 #ifdef CHECK_EXECUTE_STACK_ENABLED
22935 if (CHECK_EXECUTE_STACK_ENABLED)
22937 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
22938 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
22942 /* The following file contains several enumerations and data structures
22943 built from the definitions in i386-builtin-types.def. */
22945 #include "i386-builtin-types.inc"
22947 /* Table for the ix86 builtin non-function types. */
22948 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
22950 /* Retrieve an element from the above table, building some of
22951 the types lazily. */
22954 ix86_get_builtin_type (enum ix86_builtin_type tcode)
22956 unsigned int index;
22959 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
22961 type = ix86_builtin_type_tab[(int) tcode];
22965 gcc_assert (tcode > IX86_BT_LAST_PRIM);
22966 if (tcode <= IX86_BT_LAST_VECT)
22968 enum machine_mode mode;
22970 index = tcode - IX86_BT_LAST_PRIM - 1;
22971 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
22972 mode = ix86_builtin_type_vect_mode[index];
22974 type = build_vector_type_for_mode (itype, mode);
22980 index = tcode - IX86_BT_LAST_VECT - 1;
22981 if (tcode <= IX86_BT_LAST_PTR)
22982 quals = TYPE_UNQUALIFIED;
22984 quals = TYPE_QUAL_CONST;
22986 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
22987 if (quals != TYPE_UNQUALIFIED)
22988 itype = build_qualified_type (itype, quals);
22990 type = build_pointer_type (itype);
22993 ix86_builtin_type_tab[(int) tcode] = type;
22997 /* Table for the ix86 builtin function types. */
22998 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23000 /* Retrieve an element from the above table, building some of
23001 the types lazily. */
23004 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23008 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23010 type = ix86_builtin_func_type_tab[(int) tcode];
23014 if (tcode <= IX86_BT_LAST_FUNC)
23016 unsigned start = ix86_builtin_func_start[(int) tcode];
23017 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23018 tree rtype, atype, args = void_list_node;
23021 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23022 for (i = after - 1; i > start; --i)
23024 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23025 args = tree_cons (NULL, atype, args);
23028 type = build_function_type (rtype, args);
23032 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23033 enum ix86_builtin_func_type icode;
23035 icode = ix86_builtin_func_alias_base[index];
23036 type = ix86_get_builtin_func_type (icode);
23039 ix86_builtin_func_type_tab[(int) tcode] = type;
23044 /* Codes for all the SSE/MMX builtins. */
23047 IX86_BUILTIN_ADDPS,
23048 IX86_BUILTIN_ADDSS,
23049 IX86_BUILTIN_DIVPS,
23050 IX86_BUILTIN_DIVSS,
23051 IX86_BUILTIN_MULPS,
23052 IX86_BUILTIN_MULSS,
23053 IX86_BUILTIN_SUBPS,
23054 IX86_BUILTIN_SUBSS,
23056 IX86_BUILTIN_CMPEQPS,
23057 IX86_BUILTIN_CMPLTPS,
23058 IX86_BUILTIN_CMPLEPS,
23059 IX86_BUILTIN_CMPGTPS,
23060 IX86_BUILTIN_CMPGEPS,
23061 IX86_BUILTIN_CMPNEQPS,
23062 IX86_BUILTIN_CMPNLTPS,
23063 IX86_BUILTIN_CMPNLEPS,
23064 IX86_BUILTIN_CMPNGTPS,
23065 IX86_BUILTIN_CMPNGEPS,
23066 IX86_BUILTIN_CMPORDPS,
23067 IX86_BUILTIN_CMPUNORDPS,
23068 IX86_BUILTIN_CMPEQSS,
23069 IX86_BUILTIN_CMPLTSS,
23070 IX86_BUILTIN_CMPLESS,
23071 IX86_BUILTIN_CMPNEQSS,
23072 IX86_BUILTIN_CMPNLTSS,
23073 IX86_BUILTIN_CMPNLESS,
23074 IX86_BUILTIN_CMPNGTSS,
23075 IX86_BUILTIN_CMPNGESS,
23076 IX86_BUILTIN_CMPORDSS,
23077 IX86_BUILTIN_CMPUNORDSS,
23079 IX86_BUILTIN_COMIEQSS,
23080 IX86_BUILTIN_COMILTSS,
23081 IX86_BUILTIN_COMILESS,
23082 IX86_BUILTIN_COMIGTSS,
23083 IX86_BUILTIN_COMIGESS,
23084 IX86_BUILTIN_COMINEQSS,
23085 IX86_BUILTIN_UCOMIEQSS,
23086 IX86_BUILTIN_UCOMILTSS,
23087 IX86_BUILTIN_UCOMILESS,
23088 IX86_BUILTIN_UCOMIGTSS,
23089 IX86_BUILTIN_UCOMIGESS,
23090 IX86_BUILTIN_UCOMINEQSS,
23092 IX86_BUILTIN_CVTPI2PS,
23093 IX86_BUILTIN_CVTPS2PI,
23094 IX86_BUILTIN_CVTSI2SS,
23095 IX86_BUILTIN_CVTSI642SS,
23096 IX86_BUILTIN_CVTSS2SI,
23097 IX86_BUILTIN_CVTSS2SI64,
23098 IX86_BUILTIN_CVTTPS2PI,
23099 IX86_BUILTIN_CVTTSS2SI,
23100 IX86_BUILTIN_CVTTSS2SI64,
23102 IX86_BUILTIN_MAXPS,
23103 IX86_BUILTIN_MAXSS,
23104 IX86_BUILTIN_MINPS,
23105 IX86_BUILTIN_MINSS,
23107 IX86_BUILTIN_LOADUPS,
23108 IX86_BUILTIN_STOREUPS,
23109 IX86_BUILTIN_MOVSS,
23111 IX86_BUILTIN_MOVHLPS,
23112 IX86_BUILTIN_MOVLHPS,
23113 IX86_BUILTIN_LOADHPS,
23114 IX86_BUILTIN_LOADLPS,
23115 IX86_BUILTIN_STOREHPS,
23116 IX86_BUILTIN_STORELPS,
23118 IX86_BUILTIN_MASKMOVQ,
23119 IX86_BUILTIN_MOVMSKPS,
23120 IX86_BUILTIN_PMOVMSKB,
23122 IX86_BUILTIN_MOVNTPS,
23123 IX86_BUILTIN_MOVNTQ,
23125 IX86_BUILTIN_LOADDQU,
23126 IX86_BUILTIN_STOREDQU,
23128 IX86_BUILTIN_PACKSSWB,
23129 IX86_BUILTIN_PACKSSDW,
23130 IX86_BUILTIN_PACKUSWB,
23132 IX86_BUILTIN_PADDB,
23133 IX86_BUILTIN_PADDW,
23134 IX86_BUILTIN_PADDD,
23135 IX86_BUILTIN_PADDQ,
23136 IX86_BUILTIN_PADDSB,
23137 IX86_BUILTIN_PADDSW,
23138 IX86_BUILTIN_PADDUSB,
23139 IX86_BUILTIN_PADDUSW,
23140 IX86_BUILTIN_PSUBB,
23141 IX86_BUILTIN_PSUBW,
23142 IX86_BUILTIN_PSUBD,
23143 IX86_BUILTIN_PSUBQ,
23144 IX86_BUILTIN_PSUBSB,
23145 IX86_BUILTIN_PSUBSW,
23146 IX86_BUILTIN_PSUBUSB,
23147 IX86_BUILTIN_PSUBUSW,
23150 IX86_BUILTIN_PANDN,
23154 IX86_BUILTIN_PAVGB,
23155 IX86_BUILTIN_PAVGW,
23157 IX86_BUILTIN_PCMPEQB,
23158 IX86_BUILTIN_PCMPEQW,
23159 IX86_BUILTIN_PCMPEQD,
23160 IX86_BUILTIN_PCMPGTB,
23161 IX86_BUILTIN_PCMPGTW,
23162 IX86_BUILTIN_PCMPGTD,
23164 IX86_BUILTIN_PMADDWD,
23166 IX86_BUILTIN_PMAXSW,
23167 IX86_BUILTIN_PMAXUB,
23168 IX86_BUILTIN_PMINSW,
23169 IX86_BUILTIN_PMINUB,
23171 IX86_BUILTIN_PMULHUW,
23172 IX86_BUILTIN_PMULHW,
23173 IX86_BUILTIN_PMULLW,
23175 IX86_BUILTIN_PSADBW,
23176 IX86_BUILTIN_PSHUFW,
23178 IX86_BUILTIN_PSLLW,
23179 IX86_BUILTIN_PSLLD,
23180 IX86_BUILTIN_PSLLQ,
23181 IX86_BUILTIN_PSRAW,
23182 IX86_BUILTIN_PSRAD,
23183 IX86_BUILTIN_PSRLW,
23184 IX86_BUILTIN_PSRLD,
23185 IX86_BUILTIN_PSRLQ,
23186 IX86_BUILTIN_PSLLWI,
23187 IX86_BUILTIN_PSLLDI,
23188 IX86_BUILTIN_PSLLQI,
23189 IX86_BUILTIN_PSRAWI,
23190 IX86_BUILTIN_PSRADI,
23191 IX86_BUILTIN_PSRLWI,
23192 IX86_BUILTIN_PSRLDI,
23193 IX86_BUILTIN_PSRLQI,
23195 IX86_BUILTIN_PUNPCKHBW,
23196 IX86_BUILTIN_PUNPCKHWD,
23197 IX86_BUILTIN_PUNPCKHDQ,
23198 IX86_BUILTIN_PUNPCKLBW,
23199 IX86_BUILTIN_PUNPCKLWD,
23200 IX86_BUILTIN_PUNPCKLDQ,
23202 IX86_BUILTIN_SHUFPS,
23204 IX86_BUILTIN_RCPPS,
23205 IX86_BUILTIN_RCPSS,
23206 IX86_BUILTIN_RSQRTPS,
23207 IX86_BUILTIN_RSQRTPS_NR,
23208 IX86_BUILTIN_RSQRTSS,
23209 IX86_BUILTIN_RSQRTF,
23210 IX86_BUILTIN_SQRTPS,
23211 IX86_BUILTIN_SQRTPS_NR,
23212 IX86_BUILTIN_SQRTSS,
23214 IX86_BUILTIN_UNPCKHPS,
23215 IX86_BUILTIN_UNPCKLPS,
23217 IX86_BUILTIN_ANDPS,
23218 IX86_BUILTIN_ANDNPS,
23220 IX86_BUILTIN_XORPS,
23223 IX86_BUILTIN_LDMXCSR,
23224 IX86_BUILTIN_STMXCSR,
23225 IX86_BUILTIN_SFENCE,
23227 /* 3DNow! Original */
23228 IX86_BUILTIN_FEMMS,
23229 IX86_BUILTIN_PAVGUSB,
23230 IX86_BUILTIN_PF2ID,
23231 IX86_BUILTIN_PFACC,
23232 IX86_BUILTIN_PFADD,
23233 IX86_BUILTIN_PFCMPEQ,
23234 IX86_BUILTIN_PFCMPGE,
23235 IX86_BUILTIN_PFCMPGT,
23236 IX86_BUILTIN_PFMAX,
23237 IX86_BUILTIN_PFMIN,
23238 IX86_BUILTIN_PFMUL,
23239 IX86_BUILTIN_PFRCP,
23240 IX86_BUILTIN_PFRCPIT1,
23241 IX86_BUILTIN_PFRCPIT2,
23242 IX86_BUILTIN_PFRSQIT1,
23243 IX86_BUILTIN_PFRSQRT,
23244 IX86_BUILTIN_PFSUB,
23245 IX86_BUILTIN_PFSUBR,
23246 IX86_BUILTIN_PI2FD,
23247 IX86_BUILTIN_PMULHRW,
23249 /* 3DNow! Athlon Extensions */
23250 IX86_BUILTIN_PF2IW,
23251 IX86_BUILTIN_PFNACC,
23252 IX86_BUILTIN_PFPNACC,
23253 IX86_BUILTIN_PI2FW,
23254 IX86_BUILTIN_PSWAPDSI,
23255 IX86_BUILTIN_PSWAPDSF,
23258 IX86_BUILTIN_ADDPD,
23259 IX86_BUILTIN_ADDSD,
23260 IX86_BUILTIN_DIVPD,
23261 IX86_BUILTIN_DIVSD,
23262 IX86_BUILTIN_MULPD,
23263 IX86_BUILTIN_MULSD,
23264 IX86_BUILTIN_SUBPD,
23265 IX86_BUILTIN_SUBSD,
23267 IX86_BUILTIN_CMPEQPD,
23268 IX86_BUILTIN_CMPLTPD,
23269 IX86_BUILTIN_CMPLEPD,
23270 IX86_BUILTIN_CMPGTPD,
23271 IX86_BUILTIN_CMPGEPD,
23272 IX86_BUILTIN_CMPNEQPD,
23273 IX86_BUILTIN_CMPNLTPD,
23274 IX86_BUILTIN_CMPNLEPD,
23275 IX86_BUILTIN_CMPNGTPD,
23276 IX86_BUILTIN_CMPNGEPD,
23277 IX86_BUILTIN_CMPORDPD,
23278 IX86_BUILTIN_CMPUNORDPD,
23279 IX86_BUILTIN_CMPEQSD,
23280 IX86_BUILTIN_CMPLTSD,
23281 IX86_BUILTIN_CMPLESD,
23282 IX86_BUILTIN_CMPNEQSD,
23283 IX86_BUILTIN_CMPNLTSD,
23284 IX86_BUILTIN_CMPNLESD,
23285 IX86_BUILTIN_CMPORDSD,
23286 IX86_BUILTIN_CMPUNORDSD,
23288 IX86_BUILTIN_COMIEQSD,
23289 IX86_BUILTIN_COMILTSD,
23290 IX86_BUILTIN_COMILESD,
23291 IX86_BUILTIN_COMIGTSD,
23292 IX86_BUILTIN_COMIGESD,
23293 IX86_BUILTIN_COMINEQSD,
23294 IX86_BUILTIN_UCOMIEQSD,
23295 IX86_BUILTIN_UCOMILTSD,
23296 IX86_BUILTIN_UCOMILESD,
23297 IX86_BUILTIN_UCOMIGTSD,
23298 IX86_BUILTIN_UCOMIGESD,
23299 IX86_BUILTIN_UCOMINEQSD,
23301 IX86_BUILTIN_MAXPD,
23302 IX86_BUILTIN_MAXSD,
23303 IX86_BUILTIN_MINPD,
23304 IX86_BUILTIN_MINSD,
23306 IX86_BUILTIN_ANDPD,
23307 IX86_BUILTIN_ANDNPD,
23309 IX86_BUILTIN_XORPD,
23311 IX86_BUILTIN_SQRTPD,
23312 IX86_BUILTIN_SQRTSD,
23314 IX86_BUILTIN_UNPCKHPD,
23315 IX86_BUILTIN_UNPCKLPD,
23317 IX86_BUILTIN_SHUFPD,
23319 IX86_BUILTIN_LOADUPD,
23320 IX86_BUILTIN_STOREUPD,
23321 IX86_BUILTIN_MOVSD,
23323 IX86_BUILTIN_LOADHPD,
23324 IX86_BUILTIN_LOADLPD,
23326 IX86_BUILTIN_CVTDQ2PD,
23327 IX86_BUILTIN_CVTDQ2PS,
23329 IX86_BUILTIN_CVTPD2DQ,
23330 IX86_BUILTIN_CVTPD2PI,
23331 IX86_BUILTIN_CVTPD2PS,
23332 IX86_BUILTIN_CVTTPD2DQ,
23333 IX86_BUILTIN_CVTTPD2PI,
23335 IX86_BUILTIN_CVTPI2PD,
23336 IX86_BUILTIN_CVTSI2SD,
23337 IX86_BUILTIN_CVTSI642SD,
23339 IX86_BUILTIN_CVTSD2SI,
23340 IX86_BUILTIN_CVTSD2SI64,
23341 IX86_BUILTIN_CVTSD2SS,
23342 IX86_BUILTIN_CVTSS2SD,
23343 IX86_BUILTIN_CVTTSD2SI,
23344 IX86_BUILTIN_CVTTSD2SI64,
23346 IX86_BUILTIN_CVTPS2DQ,
23347 IX86_BUILTIN_CVTPS2PD,
23348 IX86_BUILTIN_CVTTPS2DQ,
23350 IX86_BUILTIN_MOVNTI,
23351 IX86_BUILTIN_MOVNTPD,
23352 IX86_BUILTIN_MOVNTDQ,
23354 IX86_BUILTIN_MOVQ128,
23357 IX86_BUILTIN_MASKMOVDQU,
23358 IX86_BUILTIN_MOVMSKPD,
23359 IX86_BUILTIN_PMOVMSKB128,
23361 IX86_BUILTIN_PACKSSWB128,
23362 IX86_BUILTIN_PACKSSDW128,
23363 IX86_BUILTIN_PACKUSWB128,
23365 IX86_BUILTIN_PADDB128,
23366 IX86_BUILTIN_PADDW128,
23367 IX86_BUILTIN_PADDD128,
23368 IX86_BUILTIN_PADDQ128,
23369 IX86_BUILTIN_PADDSB128,
23370 IX86_BUILTIN_PADDSW128,
23371 IX86_BUILTIN_PADDUSB128,
23372 IX86_BUILTIN_PADDUSW128,
23373 IX86_BUILTIN_PSUBB128,
23374 IX86_BUILTIN_PSUBW128,
23375 IX86_BUILTIN_PSUBD128,
23376 IX86_BUILTIN_PSUBQ128,
23377 IX86_BUILTIN_PSUBSB128,
23378 IX86_BUILTIN_PSUBSW128,
23379 IX86_BUILTIN_PSUBUSB128,
23380 IX86_BUILTIN_PSUBUSW128,
23382 IX86_BUILTIN_PAND128,
23383 IX86_BUILTIN_PANDN128,
23384 IX86_BUILTIN_POR128,
23385 IX86_BUILTIN_PXOR128,
23387 IX86_BUILTIN_PAVGB128,
23388 IX86_BUILTIN_PAVGW128,
23390 IX86_BUILTIN_PCMPEQB128,
23391 IX86_BUILTIN_PCMPEQW128,
23392 IX86_BUILTIN_PCMPEQD128,
23393 IX86_BUILTIN_PCMPGTB128,
23394 IX86_BUILTIN_PCMPGTW128,
23395 IX86_BUILTIN_PCMPGTD128,
23397 IX86_BUILTIN_PMADDWD128,
23399 IX86_BUILTIN_PMAXSW128,
23400 IX86_BUILTIN_PMAXUB128,
23401 IX86_BUILTIN_PMINSW128,
23402 IX86_BUILTIN_PMINUB128,
23404 IX86_BUILTIN_PMULUDQ,
23405 IX86_BUILTIN_PMULUDQ128,
23406 IX86_BUILTIN_PMULHUW128,
23407 IX86_BUILTIN_PMULHW128,
23408 IX86_BUILTIN_PMULLW128,
23410 IX86_BUILTIN_PSADBW128,
23411 IX86_BUILTIN_PSHUFHW,
23412 IX86_BUILTIN_PSHUFLW,
23413 IX86_BUILTIN_PSHUFD,
23415 IX86_BUILTIN_PSLLDQI128,
23416 IX86_BUILTIN_PSLLWI128,
23417 IX86_BUILTIN_PSLLDI128,
23418 IX86_BUILTIN_PSLLQI128,
23419 IX86_BUILTIN_PSRAWI128,
23420 IX86_BUILTIN_PSRADI128,
23421 IX86_BUILTIN_PSRLDQI128,
23422 IX86_BUILTIN_PSRLWI128,
23423 IX86_BUILTIN_PSRLDI128,
23424 IX86_BUILTIN_PSRLQI128,
23426 IX86_BUILTIN_PSLLDQ128,
23427 IX86_BUILTIN_PSLLW128,
23428 IX86_BUILTIN_PSLLD128,
23429 IX86_BUILTIN_PSLLQ128,
23430 IX86_BUILTIN_PSRAW128,
23431 IX86_BUILTIN_PSRAD128,
23432 IX86_BUILTIN_PSRLW128,
23433 IX86_BUILTIN_PSRLD128,
23434 IX86_BUILTIN_PSRLQ128,
23436 IX86_BUILTIN_PUNPCKHBW128,
23437 IX86_BUILTIN_PUNPCKHWD128,
23438 IX86_BUILTIN_PUNPCKHDQ128,
23439 IX86_BUILTIN_PUNPCKHQDQ128,
23440 IX86_BUILTIN_PUNPCKLBW128,
23441 IX86_BUILTIN_PUNPCKLWD128,
23442 IX86_BUILTIN_PUNPCKLDQ128,
23443 IX86_BUILTIN_PUNPCKLQDQ128,
23445 IX86_BUILTIN_CLFLUSH,
23446 IX86_BUILTIN_MFENCE,
23447 IX86_BUILTIN_LFENCE,
23448 IX86_BUILTIN_PAUSE,
23450 IX86_BUILTIN_BSRSI,
23451 IX86_BUILTIN_BSRDI,
23452 IX86_BUILTIN_RDPMC,
23453 IX86_BUILTIN_RDTSC,
23454 IX86_BUILTIN_RDTSCP,
23455 IX86_BUILTIN_ROLQI,
23456 IX86_BUILTIN_ROLHI,
23457 IX86_BUILTIN_RORQI,
23458 IX86_BUILTIN_RORHI,
23461 IX86_BUILTIN_ADDSUBPS,
23462 IX86_BUILTIN_HADDPS,
23463 IX86_BUILTIN_HSUBPS,
23464 IX86_BUILTIN_MOVSHDUP,
23465 IX86_BUILTIN_MOVSLDUP,
23466 IX86_BUILTIN_ADDSUBPD,
23467 IX86_BUILTIN_HADDPD,
23468 IX86_BUILTIN_HSUBPD,
23469 IX86_BUILTIN_LDDQU,
23471 IX86_BUILTIN_MONITOR,
23472 IX86_BUILTIN_MWAIT,
23475 IX86_BUILTIN_PHADDW,
23476 IX86_BUILTIN_PHADDD,
23477 IX86_BUILTIN_PHADDSW,
23478 IX86_BUILTIN_PHSUBW,
23479 IX86_BUILTIN_PHSUBD,
23480 IX86_BUILTIN_PHSUBSW,
23481 IX86_BUILTIN_PMADDUBSW,
23482 IX86_BUILTIN_PMULHRSW,
23483 IX86_BUILTIN_PSHUFB,
23484 IX86_BUILTIN_PSIGNB,
23485 IX86_BUILTIN_PSIGNW,
23486 IX86_BUILTIN_PSIGND,
23487 IX86_BUILTIN_PALIGNR,
23488 IX86_BUILTIN_PABSB,
23489 IX86_BUILTIN_PABSW,
23490 IX86_BUILTIN_PABSD,
23492 IX86_BUILTIN_PHADDW128,
23493 IX86_BUILTIN_PHADDD128,
23494 IX86_BUILTIN_PHADDSW128,
23495 IX86_BUILTIN_PHSUBW128,
23496 IX86_BUILTIN_PHSUBD128,
23497 IX86_BUILTIN_PHSUBSW128,
23498 IX86_BUILTIN_PMADDUBSW128,
23499 IX86_BUILTIN_PMULHRSW128,
23500 IX86_BUILTIN_PSHUFB128,
23501 IX86_BUILTIN_PSIGNB128,
23502 IX86_BUILTIN_PSIGNW128,
23503 IX86_BUILTIN_PSIGND128,
23504 IX86_BUILTIN_PALIGNR128,
23505 IX86_BUILTIN_PABSB128,
23506 IX86_BUILTIN_PABSW128,
23507 IX86_BUILTIN_PABSD128,
23509 /* AMDFAM10 - SSE4A New Instructions. */
23510 IX86_BUILTIN_MOVNTSD,
23511 IX86_BUILTIN_MOVNTSS,
23512 IX86_BUILTIN_EXTRQI,
23513 IX86_BUILTIN_EXTRQ,
23514 IX86_BUILTIN_INSERTQI,
23515 IX86_BUILTIN_INSERTQ,
23518 IX86_BUILTIN_BLENDPD,
23519 IX86_BUILTIN_BLENDPS,
23520 IX86_BUILTIN_BLENDVPD,
23521 IX86_BUILTIN_BLENDVPS,
23522 IX86_BUILTIN_PBLENDVB128,
23523 IX86_BUILTIN_PBLENDW128,
23528 IX86_BUILTIN_INSERTPS128,
23530 IX86_BUILTIN_MOVNTDQA,
23531 IX86_BUILTIN_MPSADBW128,
23532 IX86_BUILTIN_PACKUSDW128,
23533 IX86_BUILTIN_PCMPEQQ,
23534 IX86_BUILTIN_PHMINPOSUW128,
23536 IX86_BUILTIN_PMAXSB128,
23537 IX86_BUILTIN_PMAXSD128,
23538 IX86_BUILTIN_PMAXUD128,
23539 IX86_BUILTIN_PMAXUW128,
23541 IX86_BUILTIN_PMINSB128,
23542 IX86_BUILTIN_PMINSD128,
23543 IX86_BUILTIN_PMINUD128,
23544 IX86_BUILTIN_PMINUW128,
23546 IX86_BUILTIN_PMOVSXBW128,
23547 IX86_BUILTIN_PMOVSXBD128,
23548 IX86_BUILTIN_PMOVSXBQ128,
23549 IX86_BUILTIN_PMOVSXWD128,
23550 IX86_BUILTIN_PMOVSXWQ128,
23551 IX86_BUILTIN_PMOVSXDQ128,
23553 IX86_BUILTIN_PMOVZXBW128,
23554 IX86_BUILTIN_PMOVZXBD128,
23555 IX86_BUILTIN_PMOVZXBQ128,
23556 IX86_BUILTIN_PMOVZXWD128,
23557 IX86_BUILTIN_PMOVZXWQ128,
23558 IX86_BUILTIN_PMOVZXDQ128,
23560 IX86_BUILTIN_PMULDQ128,
23561 IX86_BUILTIN_PMULLD128,
23563 IX86_BUILTIN_ROUNDPD,
23564 IX86_BUILTIN_ROUNDPS,
23565 IX86_BUILTIN_ROUNDSD,
23566 IX86_BUILTIN_ROUNDSS,
23568 IX86_BUILTIN_FLOORPD,
23569 IX86_BUILTIN_CEILPD,
23570 IX86_BUILTIN_TRUNCPD,
23571 IX86_BUILTIN_RINTPD,
23572 IX86_BUILTIN_FLOORPS,
23573 IX86_BUILTIN_CEILPS,
23574 IX86_BUILTIN_TRUNCPS,
23575 IX86_BUILTIN_RINTPS,
23577 IX86_BUILTIN_PTESTZ,
23578 IX86_BUILTIN_PTESTC,
23579 IX86_BUILTIN_PTESTNZC,
23581 IX86_BUILTIN_VEC_INIT_V2SI,
23582 IX86_BUILTIN_VEC_INIT_V4HI,
23583 IX86_BUILTIN_VEC_INIT_V8QI,
23584 IX86_BUILTIN_VEC_EXT_V2DF,
23585 IX86_BUILTIN_VEC_EXT_V2DI,
23586 IX86_BUILTIN_VEC_EXT_V4SF,
23587 IX86_BUILTIN_VEC_EXT_V4SI,
23588 IX86_BUILTIN_VEC_EXT_V8HI,
23589 IX86_BUILTIN_VEC_EXT_V2SI,
23590 IX86_BUILTIN_VEC_EXT_V4HI,
23591 IX86_BUILTIN_VEC_EXT_V16QI,
23592 IX86_BUILTIN_VEC_SET_V2DI,
23593 IX86_BUILTIN_VEC_SET_V4SF,
23594 IX86_BUILTIN_VEC_SET_V4SI,
23595 IX86_BUILTIN_VEC_SET_V8HI,
23596 IX86_BUILTIN_VEC_SET_V4HI,
23597 IX86_BUILTIN_VEC_SET_V16QI,
23599 IX86_BUILTIN_VEC_PACK_SFIX,
23602 IX86_BUILTIN_CRC32QI,
23603 IX86_BUILTIN_CRC32HI,
23604 IX86_BUILTIN_CRC32SI,
23605 IX86_BUILTIN_CRC32DI,
23607 IX86_BUILTIN_PCMPESTRI128,
23608 IX86_BUILTIN_PCMPESTRM128,
23609 IX86_BUILTIN_PCMPESTRA128,
23610 IX86_BUILTIN_PCMPESTRC128,
23611 IX86_BUILTIN_PCMPESTRO128,
23612 IX86_BUILTIN_PCMPESTRS128,
23613 IX86_BUILTIN_PCMPESTRZ128,
23614 IX86_BUILTIN_PCMPISTRI128,
23615 IX86_BUILTIN_PCMPISTRM128,
23616 IX86_BUILTIN_PCMPISTRA128,
23617 IX86_BUILTIN_PCMPISTRC128,
23618 IX86_BUILTIN_PCMPISTRO128,
23619 IX86_BUILTIN_PCMPISTRS128,
23620 IX86_BUILTIN_PCMPISTRZ128,
23622 IX86_BUILTIN_PCMPGTQ,
23624 /* AES instructions */
23625 IX86_BUILTIN_AESENC128,
23626 IX86_BUILTIN_AESENCLAST128,
23627 IX86_BUILTIN_AESDEC128,
23628 IX86_BUILTIN_AESDECLAST128,
23629 IX86_BUILTIN_AESIMC128,
23630 IX86_BUILTIN_AESKEYGENASSIST128,
23632 /* PCLMUL instruction */
23633 IX86_BUILTIN_PCLMULQDQ128,
23636 IX86_BUILTIN_ADDPD256,
23637 IX86_BUILTIN_ADDPS256,
23638 IX86_BUILTIN_ADDSUBPD256,
23639 IX86_BUILTIN_ADDSUBPS256,
23640 IX86_BUILTIN_ANDPD256,
23641 IX86_BUILTIN_ANDPS256,
23642 IX86_BUILTIN_ANDNPD256,
23643 IX86_BUILTIN_ANDNPS256,
23644 IX86_BUILTIN_BLENDPD256,
23645 IX86_BUILTIN_BLENDPS256,
23646 IX86_BUILTIN_BLENDVPD256,
23647 IX86_BUILTIN_BLENDVPS256,
23648 IX86_BUILTIN_DIVPD256,
23649 IX86_BUILTIN_DIVPS256,
23650 IX86_BUILTIN_DPPS256,
23651 IX86_BUILTIN_HADDPD256,
23652 IX86_BUILTIN_HADDPS256,
23653 IX86_BUILTIN_HSUBPD256,
23654 IX86_BUILTIN_HSUBPS256,
23655 IX86_BUILTIN_MAXPD256,
23656 IX86_BUILTIN_MAXPS256,
23657 IX86_BUILTIN_MINPD256,
23658 IX86_BUILTIN_MINPS256,
23659 IX86_BUILTIN_MULPD256,
23660 IX86_BUILTIN_MULPS256,
23661 IX86_BUILTIN_ORPD256,
23662 IX86_BUILTIN_ORPS256,
23663 IX86_BUILTIN_SHUFPD256,
23664 IX86_BUILTIN_SHUFPS256,
23665 IX86_BUILTIN_SUBPD256,
23666 IX86_BUILTIN_SUBPS256,
23667 IX86_BUILTIN_XORPD256,
23668 IX86_BUILTIN_XORPS256,
23669 IX86_BUILTIN_CMPSD,
23670 IX86_BUILTIN_CMPSS,
23671 IX86_BUILTIN_CMPPD,
23672 IX86_BUILTIN_CMPPS,
23673 IX86_BUILTIN_CMPPD256,
23674 IX86_BUILTIN_CMPPS256,
23675 IX86_BUILTIN_CVTDQ2PD256,
23676 IX86_BUILTIN_CVTDQ2PS256,
23677 IX86_BUILTIN_CVTPD2PS256,
23678 IX86_BUILTIN_CVTPS2DQ256,
23679 IX86_BUILTIN_CVTPS2PD256,
23680 IX86_BUILTIN_CVTTPD2DQ256,
23681 IX86_BUILTIN_CVTPD2DQ256,
23682 IX86_BUILTIN_CVTTPS2DQ256,
23683 IX86_BUILTIN_EXTRACTF128PD256,
23684 IX86_BUILTIN_EXTRACTF128PS256,
23685 IX86_BUILTIN_EXTRACTF128SI256,
23686 IX86_BUILTIN_VZEROALL,
23687 IX86_BUILTIN_VZEROUPPER,
23688 IX86_BUILTIN_VPERMILVARPD,
23689 IX86_BUILTIN_VPERMILVARPS,
23690 IX86_BUILTIN_VPERMILVARPD256,
23691 IX86_BUILTIN_VPERMILVARPS256,
23692 IX86_BUILTIN_VPERMILPD,
23693 IX86_BUILTIN_VPERMILPS,
23694 IX86_BUILTIN_VPERMILPD256,
23695 IX86_BUILTIN_VPERMILPS256,
23696 IX86_BUILTIN_VPERMIL2PD,
23697 IX86_BUILTIN_VPERMIL2PS,
23698 IX86_BUILTIN_VPERMIL2PD256,
23699 IX86_BUILTIN_VPERMIL2PS256,
23700 IX86_BUILTIN_VPERM2F128PD256,
23701 IX86_BUILTIN_VPERM2F128PS256,
23702 IX86_BUILTIN_VPERM2F128SI256,
23703 IX86_BUILTIN_VBROADCASTSS,
23704 IX86_BUILTIN_VBROADCASTSD256,
23705 IX86_BUILTIN_VBROADCASTSS256,
23706 IX86_BUILTIN_VBROADCASTPD256,
23707 IX86_BUILTIN_VBROADCASTPS256,
23708 IX86_BUILTIN_VINSERTF128PD256,
23709 IX86_BUILTIN_VINSERTF128PS256,
23710 IX86_BUILTIN_VINSERTF128SI256,
23711 IX86_BUILTIN_LOADUPD256,
23712 IX86_BUILTIN_LOADUPS256,
23713 IX86_BUILTIN_STOREUPD256,
23714 IX86_BUILTIN_STOREUPS256,
23715 IX86_BUILTIN_LDDQU256,
23716 IX86_BUILTIN_MOVNTDQ256,
23717 IX86_BUILTIN_MOVNTPD256,
23718 IX86_BUILTIN_MOVNTPS256,
23719 IX86_BUILTIN_LOADDQU256,
23720 IX86_BUILTIN_STOREDQU256,
23721 IX86_BUILTIN_MASKLOADPD,
23722 IX86_BUILTIN_MASKLOADPS,
23723 IX86_BUILTIN_MASKSTOREPD,
23724 IX86_BUILTIN_MASKSTOREPS,
23725 IX86_BUILTIN_MASKLOADPD256,
23726 IX86_BUILTIN_MASKLOADPS256,
23727 IX86_BUILTIN_MASKSTOREPD256,
23728 IX86_BUILTIN_MASKSTOREPS256,
23729 IX86_BUILTIN_MOVSHDUP256,
23730 IX86_BUILTIN_MOVSLDUP256,
23731 IX86_BUILTIN_MOVDDUP256,
23733 IX86_BUILTIN_SQRTPD256,
23734 IX86_BUILTIN_SQRTPS256,
23735 IX86_BUILTIN_SQRTPS_NR256,
23736 IX86_BUILTIN_RSQRTPS256,
23737 IX86_BUILTIN_RSQRTPS_NR256,
23739 IX86_BUILTIN_RCPPS256,
23741 IX86_BUILTIN_ROUNDPD256,
23742 IX86_BUILTIN_ROUNDPS256,
23744 IX86_BUILTIN_FLOORPD256,
23745 IX86_BUILTIN_CEILPD256,
23746 IX86_BUILTIN_TRUNCPD256,
23747 IX86_BUILTIN_RINTPD256,
23748 IX86_BUILTIN_FLOORPS256,
23749 IX86_BUILTIN_CEILPS256,
23750 IX86_BUILTIN_TRUNCPS256,
23751 IX86_BUILTIN_RINTPS256,
23753 IX86_BUILTIN_UNPCKHPD256,
23754 IX86_BUILTIN_UNPCKLPD256,
23755 IX86_BUILTIN_UNPCKHPS256,
23756 IX86_BUILTIN_UNPCKLPS256,
23758 IX86_BUILTIN_SI256_SI,
23759 IX86_BUILTIN_PS256_PS,
23760 IX86_BUILTIN_PD256_PD,
23761 IX86_BUILTIN_SI_SI256,
23762 IX86_BUILTIN_PS_PS256,
23763 IX86_BUILTIN_PD_PD256,
23765 IX86_BUILTIN_VTESTZPD,
23766 IX86_BUILTIN_VTESTCPD,
23767 IX86_BUILTIN_VTESTNZCPD,
23768 IX86_BUILTIN_VTESTZPS,
23769 IX86_BUILTIN_VTESTCPS,
23770 IX86_BUILTIN_VTESTNZCPS,
23771 IX86_BUILTIN_VTESTZPD256,
23772 IX86_BUILTIN_VTESTCPD256,
23773 IX86_BUILTIN_VTESTNZCPD256,
23774 IX86_BUILTIN_VTESTZPS256,
23775 IX86_BUILTIN_VTESTCPS256,
23776 IX86_BUILTIN_VTESTNZCPS256,
23777 IX86_BUILTIN_PTESTZ256,
23778 IX86_BUILTIN_PTESTC256,
23779 IX86_BUILTIN_PTESTNZC256,
23781 IX86_BUILTIN_MOVMSKPD256,
23782 IX86_BUILTIN_MOVMSKPS256,
23784 /* TFmode support builtins. */
23786 IX86_BUILTIN_HUGE_VALQ,
23787 IX86_BUILTIN_FABSQ,
23788 IX86_BUILTIN_COPYSIGNQ,
23790 /* Vectorizer support builtins. */
23791 IX86_BUILTIN_CPYSGNPS,
23792 IX86_BUILTIN_CPYSGNPD,
23793 IX86_BUILTIN_CPYSGNPS256,
23794 IX86_BUILTIN_CPYSGNPD256,
23796 IX86_BUILTIN_CVTUDQ2PS,
23798 IX86_BUILTIN_VEC_PERM_V2DF,
23799 IX86_BUILTIN_VEC_PERM_V4SF,
23800 IX86_BUILTIN_VEC_PERM_V2DI,
23801 IX86_BUILTIN_VEC_PERM_V4SI,
23802 IX86_BUILTIN_VEC_PERM_V8HI,
23803 IX86_BUILTIN_VEC_PERM_V16QI,
23804 IX86_BUILTIN_VEC_PERM_V2DI_U,
23805 IX86_BUILTIN_VEC_PERM_V4SI_U,
23806 IX86_BUILTIN_VEC_PERM_V8HI_U,
23807 IX86_BUILTIN_VEC_PERM_V16QI_U,
23808 IX86_BUILTIN_VEC_PERM_V4DF,
23809 IX86_BUILTIN_VEC_PERM_V8SF,
23811 /* FMA4 and XOP instructions. */
23812 IX86_BUILTIN_VFMADDSS,
23813 IX86_BUILTIN_VFMADDSD,
23814 IX86_BUILTIN_VFMADDPS,
23815 IX86_BUILTIN_VFMADDPD,
23816 IX86_BUILTIN_VFMADDPS256,
23817 IX86_BUILTIN_VFMADDPD256,
23818 IX86_BUILTIN_VFMADDSUBPS,
23819 IX86_BUILTIN_VFMADDSUBPD,
23820 IX86_BUILTIN_VFMADDSUBPS256,
23821 IX86_BUILTIN_VFMADDSUBPD256,
23823 IX86_BUILTIN_VPCMOV,
23824 IX86_BUILTIN_VPCMOV_V2DI,
23825 IX86_BUILTIN_VPCMOV_V4SI,
23826 IX86_BUILTIN_VPCMOV_V8HI,
23827 IX86_BUILTIN_VPCMOV_V16QI,
23828 IX86_BUILTIN_VPCMOV_V4SF,
23829 IX86_BUILTIN_VPCMOV_V2DF,
23830 IX86_BUILTIN_VPCMOV256,
23831 IX86_BUILTIN_VPCMOV_V4DI256,
23832 IX86_BUILTIN_VPCMOV_V8SI256,
23833 IX86_BUILTIN_VPCMOV_V16HI256,
23834 IX86_BUILTIN_VPCMOV_V32QI256,
23835 IX86_BUILTIN_VPCMOV_V8SF256,
23836 IX86_BUILTIN_VPCMOV_V4DF256,
23838 IX86_BUILTIN_VPPERM,
23840 IX86_BUILTIN_VPMACSSWW,
23841 IX86_BUILTIN_VPMACSWW,
23842 IX86_BUILTIN_VPMACSSWD,
23843 IX86_BUILTIN_VPMACSWD,
23844 IX86_BUILTIN_VPMACSSDD,
23845 IX86_BUILTIN_VPMACSDD,
23846 IX86_BUILTIN_VPMACSSDQL,
23847 IX86_BUILTIN_VPMACSSDQH,
23848 IX86_BUILTIN_VPMACSDQL,
23849 IX86_BUILTIN_VPMACSDQH,
23850 IX86_BUILTIN_VPMADCSSWD,
23851 IX86_BUILTIN_VPMADCSWD,
23853 IX86_BUILTIN_VPHADDBW,
23854 IX86_BUILTIN_VPHADDBD,
23855 IX86_BUILTIN_VPHADDBQ,
23856 IX86_BUILTIN_VPHADDWD,
23857 IX86_BUILTIN_VPHADDWQ,
23858 IX86_BUILTIN_VPHADDDQ,
23859 IX86_BUILTIN_VPHADDUBW,
23860 IX86_BUILTIN_VPHADDUBD,
23861 IX86_BUILTIN_VPHADDUBQ,
23862 IX86_BUILTIN_VPHADDUWD,
23863 IX86_BUILTIN_VPHADDUWQ,
23864 IX86_BUILTIN_VPHADDUDQ,
23865 IX86_BUILTIN_VPHSUBBW,
23866 IX86_BUILTIN_VPHSUBWD,
23867 IX86_BUILTIN_VPHSUBDQ,
23869 IX86_BUILTIN_VPROTB,
23870 IX86_BUILTIN_VPROTW,
23871 IX86_BUILTIN_VPROTD,
23872 IX86_BUILTIN_VPROTQ,
23873 IX86_BUILTIN_VPROTB_IMM,
23874 IX86_BUILTIN_VPROTW_IMM,
23875 IX86_BUILTIN_VPROTD_IMM,
23876 IX86_BUILTIN_VPROTQ_IMM,
23878 IX86_BUILTIN_VPSHLB,
23879 IX86_BUILTIN_VPSHLW,
23880 IX86_BUILTIN_VPSHLD,
23881 IX86_BUILTIN_VPSHLQ,
23882 IX86_BUILTIN_VPSHAB,
23883 IX86_BUILTIN_VPSHAW,
23884 IX86_BUILTIN_VPSHAD,
23885 IX86_BUILTIN_VPSHAQ,
23887 IX86_BUILTIN_VFRCZSS,
23888 IX86_BUILTIN_VFRCZSD,
23889 IX86_BUILTIN_VFRCZPS,
23890 IX86_BUILTIN_VFRCZPD,
23891 IX86_BUILTIN_VFRCZPS256,
23892 IX86_BUILTIN_VFRCZPD256,
23894 IX86_BUILTIN_VPCOMEQUB,
23895 IX86_BUILTIN_VPCOMNEUB,
23896 IX86_BUILTIN_VPCOMLTUB,
23897 IX86_BUILTIN_VPCOMLEUB,
23898 IX86_BUILTIN_VPCOMGTUB,
23899 IX86_BUILTIN_VPCOMGEUB,
23900 IX86_BUILTIN_VPCOMFALSEUB,
23901 IX86_BUILTIN_VPCOMTRUEUB,
23903 IX86_BUILTIN_VPCOMEQUW,
23904 IX86_BUILTIN_VPCOMNEUW,
23905 IX86_BUILTIN_VPCOMLTUW,
23906 IX86_BUILTIN_VPCOMLEUW,
23907 IX86_BUILTIN_VPCOMGTUW,
23908 IX86_BUILTIN_VPCOMGEUW,
23909 IX86_BUILTIN_VPCOMFALSEUW,
23910 IX86_BUILTIN_VPCOMTRUEUW,
23912 IX86_BUILTIN_VPCOMEQUD,
23913 IX86_BUILTIN_VPCOMNEUD,
23914 IX86_BUILTIN_VPCOMLTUD,
23915 IX86_BUILTIN_VPCOMLEUD,
23916 IX86_BUILTIN_VPCOMGTUD,
23917 IX86_BUILTIN_VPCOMGEUD,
23918 IX86_BUILTIN_VPCOMFALSEUD,
23919 IX86_BUILTIN_VPCOMTRUEUD,
23921 IX86_BUILTIN_VPCOMEQUQ,
23922 IX86_BUILTIN_VPCOMNEUQ,
23923 IX86_BUILTIN_VPCOMLTUQ,
23924 IX86_BUILTIN_VPCOMLEUQ,
23925 IX86_BUILTIN_VPCOMGTUQ,
23926 IX86_BUILTIN_VPCOMGEUQ,
23927 IX86_BUILTIN_VPCOMFALSEUQ,
23928 IX86_BUILTIN_VPCOMTRUEUQ,
23930 IX86_BUILTIN_VPCOMEQB,
23931 IX86_BUILTIN_VPCOMNEB,
23932 IX86_BUILTIN_VPCOMLTB,
23933 IX86_BUILTIN_VPCOMLEB,
23934 IX86_BUILTIN_VPCOMGTB,
23935 IX86_BUILTIN_VPCOMGEB,
23936 IX86_BUILTIN_VPCOMFALSEB,
23937 IX86_BUILTIN_VPCOMTRUEB,
23939 IX86_BUILTIN_VPCOMEQW,
23940 IX86_BUILTIN_VPCOMNEW,
23941 IX86_BUILTIN_VPCOMLTW,
23942 IX86_BUILTIN_VPCOMLEW,
23943 IX86_BUILTIN_VPCOMGTW,
23944 IX86_BUILTIN_VPCOMGEW,
23945 IX86_BUILTIN_VPCOMFALSEW,
23946 IX86_BUILTIN_VPCOMTRUEW,
23948 IX86_BUILTIN_VPCOMEQD,
23949 IX86_BUILTIN_VPCOMNED,
23950 IX86_BUILTIN_VPCOMLTD,
23951 IX86_BUILTIN_VPCOMLED,
23952 IX86_BUILTIN_VPCOMGTD,
23953 IX86_BUILTIN_VPCOMGED,
23954 IX86_BUILTIN_VPCOMFALSED,
23955 IX86_BUILTIN_VPCOMTRUED,
23957 IX86_BUILTIN_VPCOMEQQ,
23958 IX86_BUILTIN_VPCOMNEQ,
23959 IX86_BUILTIN_VPCOMLTQ,
23960 IX86_BUILTIN_VPCOMLEQ,
23961 IX86_BUILTIN_VPCOMGTQ,
23962 IX86_BUILTIN_VPCOMGEQ,
23963 IX86_BUILTIN_VPCOMFALSEQ,
23964 IX86_BUILTIN_VPCOMTRUEQ,
23966 /* LWP instructions. */
23967 IX86_BUILTIN_LLWPCB,
23968 IX86_BUILTIN_SLWPCB,
23969 IX86_BUILTIN_LWPVAL32,
23970 IX86_BUILTIN_LWPVAL64,
23971 IX86_BUILTIN_LWPINS32,
23972 IX86_BUILTIN_LWPINS64,
23976 /* BMI instructions. */
23977 IX86_BUILTIN_BEXTR32,
23978 IX86_BUILTIN_BEXTR64,
23981 /* TBM instructions. */
23982 IX86_BUILTIN_BEXTRI32,
23983 IX86_BUILTIN_BEXTRI64,
23986 /* FSGSBASE instructions. */
23987 IX86_BUILTIN_RDFSBASE32,
23988 IX86_BUILTIN_RDFSBASE64,
23989 IX86_BUILTIN_RDGSBASE32,
23990 IX86_BUILTIN_RDGSBASE64,
23991 IX86_BUILTIN_WRFSBASE32,
23992 IX86_BUILTIN_WRFSBASE64,
23993 IX86_BUILTIN_WRGSBASE32,
23994 IX86_BUILTIN_WRGSBASE64,
23996 /* RDRND instructions. */
23997 IX86_BUILTIN_RDRAND16_STEP,
23998 IX86_BUILTIN_RDRAND32_STEP,
23999 IX86_BUILTIN_RDRAND64_STEP,
24001 /* F16C instructions. */
24002 IX86_BUILTIN_CVTPH2PS,
24003 IX86_BUILTIN_CVTPH2PS256,
24004 IX86_BUILTIN_CVTPS2PH,
24005 IX86_BUILTIN_CVTPS2PH256,
24007 /* CFString built-in for darwin */
24008 IX86_BUILTIN_CFSTRING,
24013 /* Table for the ix86 builtin decls. */
24014 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24016 /* Table of all of the builtin functions that are possible with different ISA's
24017 but are waiting to be built until a function is declared to use that
24019 struct builtin_isa {
24020 const char *name; /* function name */
24021 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24022 int isa; /* isa_flags this builtin is defined for */
24023 bool const_p; /* true if the declaration is constant */
24024 bool set_and_not_built_p;
24027 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24030 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24031 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24032 function decl in the ix86_builtins array. Returns the function decl or
24033 NULL_TREE, if the builtin was not added.
24035 If the front end has a special hook for builtin functions, delay adding
24036 builtin functions that aren't in the current ISA until the ISA is changed
24037 with function specific optimization. Doing so, can save about 300K for the
24038 default compiler. When the builtin is expanded, check at that time whether
24041 If the front end doesn't have a special hook, record all builtins, even if
24042 it isn't an instruction set in the current ISA in case the user uses
24043 function specific options for a different ISA, so that we don't get scope
24044 errors if a builtin is added in the middle of a function scope. */
24047 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
24048 enum ix86_builtins code)
24050 tree decl = NULL_TREE;
24052 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24054 ix86_builtins_isa[(int) code].isa = mask;
24056 mask &= ~OPTION_MASK_ISA_64BIT;
24058 || (mask & ix86_isa_flags) != 0
24059 || (lang_hooks.builtin_function
24060 == lang_hooks.builtin_function_ext_scope))
24063 tree type = ix86_get_builtin_func_type (tcode);
24064 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24066 ix86_builtins[(int) code] = decl;
24067 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24071 ix86_builtins[(int) code] = NULL_TREE;
24072 ix86_builtins_isa[(int) code].tcode = tcode;
24073 ix86_builtins_isa[(int) code].name = name;
24074 ix86_builtins_isa[(int) code].const_p = false;
24075 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24082 /* Like def_builtin, but also marks the function decl "const". */
24085 def_builtin_const (int mask, const char *name,
24086 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24088 tree decl = def_builtin (mask, name, tcode, code);
24090 TREE_READONLY (decl) = 1;
24092 ix86_builtins_isa[(int) code].const_p = true;
24097 /* Add any new builtin functions for a given ISA that may not have been
24098 declared. This saves a bit of space compared to adding all of the
24099 declarations to the tree, even if we didn't use them. */
24102 ix86_add_new_builtins (int isa)
24106 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24108 if ((ix86_builtins_isa[i].isa & isa) != 0
24109 && ix86_builtins_isa[i].set_and_not_built_p)
24113 /* Don't define the builtin again. */
24114 ix86_builtins_isa[i].set_and_not_built_p = false;
24116 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24117 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24118 type, i, BUILT_IN_MD, NULL,
24121 ix86_builtins[i] = decl;
24122 if (ix86_builtins_isa[i].const_p)
24123 TREE_READONLY (decl) = 1;
24128 /* Bits for builtin_description.flag. */
24130 /* Set when we don't support the comparison natively, and should
24131 swap_comparison in order to support it. */
24132 #define BUILTIN_DESC_SWAP_OPERANDS 1
24134 struct builtin_description
24136 const unsigned int mask;
24137 const enum insn_code icode;
24138 const char *const name;
24139 const enum ix86_builtins code;
24140 const enum rtx_code comparison;
24144 static const struct builtin_description bdesc_comi[] =
24146 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24147 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24148 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24149 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24150 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24151 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24152 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24153 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24154 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24155 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24156 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24157 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24172 static const struct builtin_description bdesc_pcmpestr[] =
24175 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24176 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24177 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24178 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24179 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24180 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24181 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24184 static const struct builtin_description bdesc_pcmpistr[] =
24187 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24188 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24189 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24190 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24191 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24192 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24193 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24196 /* Special builtins with variable number of arguments. */
24197 static const struct builtin_description bdesc_special_args[] =
24199 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24200 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24201 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
24204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24207 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24210 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24211 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24212 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24214 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24215 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24216 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24217 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24219 /* SSE or 3DNow!A */
24220 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24221 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24238 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24241 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24244 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24245 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24248 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24249 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24251 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24252 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24253 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24254 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24255 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24257 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24258 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24259 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24260 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24261 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24262 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24263 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24265 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24266 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24267 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24269 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
24270 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
24271 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
24272 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
24273 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
24274 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
24275 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
24276 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
24278 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24279 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24280 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24281 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24282 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24283 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24286 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24287 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24288 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24289 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24290 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24291 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24292 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24293 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24296 /* Builtins with variable number of arguments. */
24297 static const struct builtin_description bdesc_args[] =
24299 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24300 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24301 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24302 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24303 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24304 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24305 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24308 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24309 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24310 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24311 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24312 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24313 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24315 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24317 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24318 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24319 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24320 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24321 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24322 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24324 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24325 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24328 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24329 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24330 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24332 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24333 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24334 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24337 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24339 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24341 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24342 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24343 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24344 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24347 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24348 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24350 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24352 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24353 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24354 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24355 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24356 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24357 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24359 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24362 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24364 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24366 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24368 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24369 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24372 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24373 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24374 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24375 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24377 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24378 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24379 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24380 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24381 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24382 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24383 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24384 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24385 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24386 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24387 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24388 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24389 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24390 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24391 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24394 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24395 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24396 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24397 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24398 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24399 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24402 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24403 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24404 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24405 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24406 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24407 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24408 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24409 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24410 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24411 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24412 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24413 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24417 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24418 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24419 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24420 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24423 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24424 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24426 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24427 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24428 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24429 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24430 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24431 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24432 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24433 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24434 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24436 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24437 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24438 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24439 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24440 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24441 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24442 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24443 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24444 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24445 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24446 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24447 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24449 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24450 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24451 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24452 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24454 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24455 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24456 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24457 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24459 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24461 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24462 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24463 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24464 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24465 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24469 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24471 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24473 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24475 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24477 /* SSE MMX or 3Dnow!A */
24478 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24479 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24480 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24482 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24483 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24484 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24485 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24487 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24488 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24490 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24495 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24496 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24497 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24498 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24499 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24500 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24501 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24502 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24503 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24504 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24505 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24506 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24525 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24526 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24532 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24533 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24534 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24535 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
24546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24563 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24567 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24569 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24570 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24572 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24575 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24576 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24578 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
24580 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24581 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24582 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24583 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24584 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24585 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24586 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24587 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24598 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24599 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
24601 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24603 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24604 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24616 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24617 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24618 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24621 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24622 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24623 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24624 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24625 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24626 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24627 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24628 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24631 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24634 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
24637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
24638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
24642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
24643 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
24644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
24645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
24647 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24648 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24649 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24650 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24651 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24652 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24653 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24656 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24657 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24658 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24659 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24660 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24661 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24663 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24664 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24665 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24666 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24668 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
24669 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24670 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
24674 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
24675 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
24677 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24680 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24681 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24684 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
24685 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24687 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24688 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24689 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24690 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24691 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24692 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24695 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
24696 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
24697 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24698 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
24699 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
24700 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24702 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24703 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24704 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24705 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24706 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24707 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24708 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24709 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24710 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24711 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24712 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24713 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24714 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
24715 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
24716 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24717 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24718 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24719 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24720 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24721 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24722 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24723 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24724 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24725 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24728 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
24729 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
24732 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24733 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24734 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
24735 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
24736 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24737 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24738 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24739 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
24740 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24741 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
24743 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24744 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24745 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24746 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24747 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24748 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24749 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24750 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24751 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24752 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24753 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24754 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24755 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24757 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24758 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24759 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24760 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24761 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24762 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24763 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24764 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24765 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24766 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24767 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24768 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24771 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24772 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24773 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24774 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24776 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
24777 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
24778 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
24779 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
24781 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
24782 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
24783 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
24784 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
24786 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24787 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24788 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24791 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24792 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
24793 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
24794 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24795 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24798 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
24799 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
24800 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
24801 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24804 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
24805 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24807 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24808 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24809 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24810 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24813 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
24816 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24817 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24820 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24821 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24824 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24830 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24831 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24832 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24833 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24834 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24835 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24836 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24837 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24838 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24839 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24840 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24841 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
24844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
24845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
24846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
24848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
24851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
24852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24860 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
24862 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
24863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
24864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
24865 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
24866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
24867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
24869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24870 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24871 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
24875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
24880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
24881 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
24883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24887 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24889 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24891 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
24899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
24900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
24901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
24903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
24904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
24905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
24906 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
24908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24910 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
24914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
24915 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
24916 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
24917 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
24918 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
24920 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24921 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24922 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24923 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24924 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24925 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24926 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24927 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24928 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24929 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24930 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24931 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24932 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24933 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24934 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24936 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
24937 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
24939 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24940 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24942 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
24945 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24946 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24947 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
24950 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24951 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24954 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
24955 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
24956 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
24957 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
24960 /* FMA4 and XOP. */
24961 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
24962 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
24963 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
24964 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
24965 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
24966 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
24967 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
24968 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
24969 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
24970 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
24971 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
24972 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
24973 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
24974 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
24975 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
24976 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
24977 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
24978 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
24979 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
24980 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
24981 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
24982 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
24983 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
24984 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
24985 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
24986 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
24987 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
24988 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
24989 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
24990 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
24991 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
24992 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
24993 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
24994 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
24995 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
24996 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
24997 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
24998 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
24999 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25000 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25001 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25002 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25003 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25004 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25005 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25006 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25007 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25008 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25009 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25010 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25011 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25012 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25014 static const struct builtin_description bdesc_multi_arg[] =
25016 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25017 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25018 UNKNOWN, (int)MULTI_ARG_3_SF },
25019 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25020 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25021 UNKNOWN, (int)MULTI_ARG_3_DF },
25023 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
25024 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
25025 UNKNOWN, (int)MULTI_ARG_3_SF },
25026 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
25027 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
25028 UNKNOWN, (int)MULTI_ARG_3_DF },
25029 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
25030 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
25031 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25032 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
25033 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
25034 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25036 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
25037 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
25038 UNKNOWN, (int)MULTI_ARG_3_SF },
25039 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
25040 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
25041 UNKNOWN, (int)MULTI_ARG_3_DF },
25042 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
25043 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
25044 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25045 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
25046 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
25047 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
25050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
25051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
25052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
25053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
25054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
25055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25058 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25065 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25069 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25075 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25077 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25081 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
25082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
25083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
25084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
25085 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25089 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25093 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25097 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25101 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25106 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25109 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25125 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25133 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25172 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25209 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25210 in the current target ISA to allow the user to compile particular modules
25211 with different target specific options that differ from the command line
25214 ix86_init_mmx_sse_builtins (void)
25216 const struct builtin_description * d;
25217 enum ix86_builtin_func_type ftype;
25220 /* Add all special builtins with variable number of operands. */
25221 for (i = 0, d = bdesc_special_args;
25222 i < ARRAY_SIZE (bdesc_special_args);
25228 ftype = (enum ix86_builtin_func_type) d->flag;
25229 def_builtin (d->mask, d->name, ftype, d->code);
25232 /* Add all builtins with variable number of operands. */
25233 for (i = 0, d = bdesc_args;
25234 i < ARRAY_SIZE (bdesc_args);
25240 ftype = (enum ix86_builtin_func_type) d->flag;
25241 def_builtin_const (d->mask, d->name, ftype, d->code);
25244 /* pcmpestr[im] insns. */
25245 for (i = 0, d = bdesc_pcmpestr;
25246 i < ARRAY_SIZE (bdesc_pcmpestr);
25249 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25250 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25252 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25253 def_builtin_const (d->mask, d->name, ftype, d->code);
25256 /* pcmpistr[im] insns. */
25257 for (i = 0, d = bdesc_pcmpistr;
25258 i < ARRAY_SIZE (bdesc_pcmpistr);
25261 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25262 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25264 ftype = INT_FTYPE_V16QI_V16QI_INT;
25265 def_builtin_const (d->mask, d->name, ftype, d->code);
25268 /* comi/ucomi insns. */
25269 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25271 if (d->mask == OPTION_MASK_ISA_SSE2)
25272 ftype = INT_FTYPE_V2DF_V2DF;
25274 ftype = INT_FTYPE_V4SF_V4SF;
25275 def_builtin_const (d->mask, d->name, ftype, d->code);
25279 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25280 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25281 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25282 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25284 /* SSE or 3DNow!A */
25285 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25286 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25287 IX86_BUILTIN_MASKMOVQ);
25290 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25291 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25293 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25294 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25295 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25296 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25299 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25300 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25301 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25302 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25305 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25306 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25307 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25308 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25309 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25310 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25311 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25312 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25313 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25314 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25315 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25316 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25319 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25320 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25323 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
25324 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
25325 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
25326 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
25327 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
25328 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
25329 IX86_BUILTIN_RDRAND64_STEP);
25331 /* MMX access to the vec_init patterns. */
25332 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25333 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25335 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25336 V4HI_FTYPE_HI_HI_HI_HI,
25337 IX86_BUILTIN_VEC_INIT_V4HI);
25339 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25340 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25341 IX86_BUILTIN_VEC_INIT_V8QI);
25343 /* Access to the vec_extract patterns. */
25344 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25345 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25346 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25347 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25348 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25349 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25350 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25351 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25352 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25353 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25355 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25356 "__builtin_ia32_vec_ext_v4hi",
25357 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25359 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25360 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25362 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25363 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25365 /* Access to the vec_set patterns. */
25366 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25367 "__builtin_ia32_vec_set_v2di",
25368 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25370 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25371 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25373 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25374 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25376 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25377 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25379 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25380 "__builtin_ia32_vec_set_v4hi",
25381 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25383 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25384 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25386 /* Add FMA4 multi-arg argument instructions */
25387 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25392 ftype = (enum ix86_builtin_func_type) d->flag;
25393 def_builtin_const (d->mask, d->name, ftype, d->code);
25397 /* Internal method for ix86_init_builtins. */
25400 ix86_init_builtins_va_builtins_abi (void)
25402 tree ms_va_ref, sysv_va_ref;
25403 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25404 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25405 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25406 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25410 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25411 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25412 ms_va_ref = build_reference_type (ms_va_list_type_node);
25414 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25417 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25418 fnvoid_va_start_ms =
25419 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25420 fnvoid_va_end_sysv =
25421 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25422 fnvoid_va_start_sysv =
25423 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25425 fnvoid_va_copy_ms =
25426 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25428 fnvoid_va_copy_sysv =
25429 build_function_type_list (void_type_node, sysv_va_ref,
25430 sysv_va_ref, NULL_TREE);
25432 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25433 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25434 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25435 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25436 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25437 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25438 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25439 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25440 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25441 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25442 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25443 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25447 ix86_init_builtin_types (void)
25449 tree float128_type_node, float80_type_node;
25451 /* The __float80 type. */
25452 float80_type_node = long_double_type_node;
25453 if (TYPE_MODE (float80_type_node) != XFmode)
25455 /* The __float80 type. */
25456 float80_type_node = make_node (REAL_TYPE);
25458 TYPE_PRECISION (float80_type_node) = 80;
25459 layout_type (float80_type_node);
25461 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25463 /* The __float128 type. */
25464 float128_type_node = make_node (REAL_TYPE);
25465 TYPE_PRECISION (float128_type_node) = 128;
25466 layout_type (float128_type_node);
25467 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25469 /* This macro is built by i386-builtin-types.awk. */
25470 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25474 ix86_init_builtins (void)
25478 ix86_init_builtin_types ();
25480 /* TFmode support builtins. */
25481 def_builtin_const (0, "__builtin_infq",
25482 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25483 def_builtin_const (0, "__builtin_huge_valq",
25484 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25486 /* We will expand them to normal call if SSE2 isn't available since
25487 they are used by libgcc. */
25488 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25489 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25490 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25491 TREE_READONLY (t) = 1;
25492 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25494 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25495 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25496 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25497 TREE_READONLY (t) = 1;
25498 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25500 ix86_init_mmx_sse_builtins ();
25503 ix86_init_builtins_va_builtins_abi ();
25505 #ifdef SUBTARGET_INIT_BUILTINS
25506 SUBTARGET_INIT_BUILTINS;
25510 /* Return the ix86 builtin for CODE. */
25513 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25515 if (code >= IX86_BUILTIN_MAX)
25516 return error_mark_node;
25518 return ix86_builtins[code];
25521 /* Errors in the source file can cause expand_expr to return const0_rtx
25522 where we expect a vector. To avoid crashing, use one of the vector
25523 clear instructions. */
25525 safe_vector_operand (rtx x, enum machine_mode mode)
25527 if (x == const0_rtx)
25528 x = CONST0_RTX (mode);
25532 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25535 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
25538 tree arg0 = CALL_EXPR_ARG (exp, 0);
25539 tree arg1 = CALL_EXPR_ARG (exp, 1);
25540 rtx op0 = expand_normal (arg0);
25541 rtx op1 = expand_normal (arg1);
25542 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25543 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25544 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25546 if (VECTOR_MODE_P (mode0))
25547 op0 = safe_vector_operand (op0, mode0);
25548 if (VECTOR_MODE_P (mode1))
25549 op1 = safe_vector_operand (op1, mode1);
25551 if (optimize || !target
25552 || GET_MODE (target) != tmode
25553 || !insn_data[icode].operand[0].predicate (target, tmode))
25554 target = gen_reg_rtx (tmode);
25556 if (GET_MODE (op1) == SImode && mode1 == TImode)
25558 rtx x = gen_reg_rtx (V4SImode);
25559 emit_insn (gen_sse2_loadd (x, op1));
25560 op1 = gen_lowpart (TImode, x);
25563 if (!insn_data[icode].operand[1].predicate (op0, mode0))
25564 op0 = copy_to_mode_reg (mode0, op0);
25565 if (!insn_data[icode].operand[2].predicate (op1, mode1))
25566 op1 = copy_to_mode_reg (mode1, op1);
25568 pat = GEN_FCN (icode) (target, op0, op1);
25577 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
25580 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
25581 enum ix86_builtin_func_type m_type,
25582 enum rtx_code sub_code)
25587 bool comparison_p = false;
25589 bool last_arg_constant = false;
25590 int num_memory = 0;
25593 enum machine_mode mode;
25596 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25600 case MULTI_ARG_4_DF2_DI_I:
25601 case MULTI_ARG_4_DF2_DI_I1:
25602 case MULTI_ARG_4_SF2_SI_I:
25603 case MULTI_ARG_4_SF2_SI_I1:
25605 last_arg_constant = true;
25608 case MULTI_ARG_3_SF:
25609 case MULTI_ARG_3_DF:
25610 case MULTI_ARG_3_SF2:
25611 case MULTI_ARG_3_DF2:
25612 case MULTI_ARG_3_DI:
25613 case MULTI_ARG_3_SI:
25614 case MULTI_ARG_3_SI_DI:
25615 case MULTI_ARG_3_HI:
25616 case MULTI_ARG_3_HI_SI:
25617 case MULTI_ARG_3_QI:
25618 case MULTI_ARG_3_DI2:
25619 case MULTI_ARG_3_SI2:
25620 case MULTI_ARG_3_HI2:
25621 case MULTI_ARG_3_QI2:
25625 case MULTI_ARG_2_SF:
25626 case MULTI_ARG_2_DF:
25627 case MULTI_ARG_2_DI:
25628 case MULTI_ARG_2_SI:
25629 case MULTI_ARG_2_HI:
25630 case MULTI_ARG_2_QI:
25634 case MULTI_ARG_2_DI_IMM:
25635 case MULTI_ARG_2_SI_IMM:
25636 case MULTI_ARG_2_HI_IMM:
25637 case MULTI_ARG_2_QI_IMM:
25639 last_arg_constant = true;
25642 case MULTI_ARG_1_SF:
25643 case MULTI_ARG_1_DF:
25644 case MULTI_ARG_1_SF2:
25645 case MULTI_ARG_1_DF2:
25646 case MULTI_ARG_1_DI:
25647 case MULTI_ARG_1_SI:
25648 case MULTI_ARG_1_HI:
25649 case MULTI_ARG_1_QI:
25650 case MULTI_ARG_1_SI_DI:
25651 case MULTI_ARG_1_HI_DI:
25652 case MULTI_ARG_1_HI_SI:
25653 case MULTI_ARG_1_QI_DI:
25654 case MULTI_ARG_1_QI_SI:
25655 case MULTI_ARG_1_QI_HI:
25659 case MULTI_ARG_2_DI_CMP:
25660 case MULTI_ARG_2_SI_CMP:
25661 case MULTI_ARG_2_HI_CMP:
25662 case MULTI_ARG_2_QI_CMP:
25664 comparison_p = true;
25667 case MULTI_ARG_2_SF_TF:
25668 case MULTI_ARG_2_DF_TF:
25669 case MULTI_ARG_2_DI_TF:
25670 case MULTI_ARG_2_SI_TF:
25671 case MULTI_ARG_2_HI_TF:
25672 case MULTI_ARG_2_QI_TF:
25678 gcc_unreachable ();
25681 if (optimize || !target
25682 || GET_MODE (target) != tmode
25683 || !insn_data[icode].operand[0].predicate (target, tmode))
25684 target = gen_reg_rtx (tmode);
25686 gcc_assert (nargs <= 4);
25688 for (i = 0; i < nargs; i++)
25690 tree arg = CALL_EXPR_ARG (exp, i);
25691 rtx op = expand_normal (arg);
25692 int adjust = (comparison_p) ? 1 : 0;
25693 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
25695 if (last_arg_constant && i == nargs - 1)
25697 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
25699 enum insn_code new_icode = icode;
25702 case CODE_FOR_xop_vpermil2v2df3:
25703 case CODE_FOR_xop_vpermil2v4sf3:
25704 case CODE_FOR_xop_vpermil2v4df3:
25705 case CODE_FOR_xop_vpermil2v8sf3:
25706 error ("the last argument must be a 2-bit immediate");
25707 return gen_reg_rtx (tmode);
25708 case CODE_FOR_xop_rotlv2di3:
25709 new_icode = CODE_FOR_rotlv2di3;
25711 case CODE_FOR_xop_rotlv4si3:
25712 new_icode = CODE_FOR_rotlv4si3;
25714 case CODE_FOR_xop_rotlv8hi3:
25715 new_icode = CODE_FOR_rotlv8hi3;
25717 case CODE_FOR_xop_rotlv16qi3:
25718 new_icode = CODE_FOR_rotlv16qi3;
25720 if (CONST_INT_P (op))
25722 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
25723 op = GEN_INT (INTVAL (op) & mask);
25724 gcc_checking_assert
25725 (insn_data[icode].operand[i + 1].predicate (op, mode));
25729 gcc_checking_assert
25731 && insn_data[new_icode].operand[0].mode == tmode
25732 && insn_data[new_icode].operand[1].mode == tmode
25733 && insn_data[new_icode].operand[2].mode == mode
25734 && insn_data[new_icode].operand[0].predicate
25735 == insn_data[icode].operand[0].predicate
25736 && insn_data[new_icode].operand[1].predicate
25737 == insn_data[icode].operand[1].predicate);
25743 gcc_unreachable ();
25750 if (VECTOR_MODE_P (mode))
25751 op = safe_vector_operand (op, mode);
25753 /* If we aren't optimizing, only allow one memory operand to be
25755 if (memory_operand (op, mode))
25758 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
25761 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
25763 op = force_reg (mode, op);
25767 args[i].mode = mode;
25773 pat = GEN_FCN (icode) (target, args[0].op);
25778 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
25779 GEN_INT ((int)sub_code));
25780 else if (! comparison_p)
25781 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25784 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
25788 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
25793 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25797 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
25801 gcc_unreachable ();
25811 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
25812 insns with vec_merge. */
25815 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
25819 tree arg0 = CALL_EXPR_ARG (exp, 0);
25820 rtx op1, op0 = expand_normal (arg0);
25821 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25822 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25824 if (optimize || !target
25825 || GET_MODE (target) != tmode
25826 || !insn_data[icode].operand[0].predicate (target, tmode))
25827 target = gen_reg_rtx (tmode);
25829 if (VECTOR_MODE_P (mode0))
25830 op0 = safe_vector_operand (op0, mode0);
25832 if ((optimize && !register_operand (op0, mode0))
25833 || !insn_data[icode].operand[1].predicate (op0, mode0))
25834 op0 = copy_to_mode_reg (mode0, op0);
25837 if (!insn_data[icode].operand[2].predicate (op1, mode0))
25838 op1 = copy_to_mode_reg (mode0, op1);
25840 pat = GEN_FCN (icode) (target, op0, op1);
25847 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
25850 ix86_expand_sse_compare (const struct builtin_description *d,
25851 tree exp, rtx target, bool swap)
25854 tree arg0 = CALL_EXPR_ARG (exp, 0);
25855 tree arg1 = CALL_EXPR_ARG (exp, 1);
25856 rtx op0 = expand_normal (arg0);
25857 rtx op1 = expand_normal (arg1);
25859 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
25860 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
25861 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
25862 enum rtx_code comparison = d->comparison;
25864 if (VECTOR_MODE_P (mode0))
25865 op0 = safe_vector_operand (op0, mode0);
25866 if (VECTOR_MODE_P (mode1))
25867 op1 = safe_vector_operand (op1, mode1);
25869 /* Swap operands if we have a comparison that isn't available in
25873 rtx tmp = gen_reg_rtx (mode1);
25874 emit_move_insn (tmp, op1);
25879 if (optimize || !target
25880 || GET_MODE (target) != tmode
25881 || !insn_data[d->icode].operand[0].predicate (target, tmode))
25882 target = gen_reg_rtx (tmode);
25884 if ((optimize && !register_operand (op0, mode0))
25885 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
25886 op0 = copy_to_mode_reg (mode0, op0);
25887 if ((optimize && !register_operand (op1, mode1))
25888 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
25889 op1 = copy_to_mode_reg (mode1, op1);
25891 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
25892 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
25899 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
25902 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
25906 tree arg0 = CALL_EXPR_ARG (exp, 0);
25907 tree arg1 = CALL_EXPR_ARG (exp, 1);
25908 rtx op0 = expand_normal (arg0);
25909 rtx op1 = expand_normal (arg1);
25910 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
25911 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
25912 enum rtx_code comparison = d->comparison;
25914 if (VECTOR_MODE_P (mode0))
25915 op0 = safe_vector_operand (op0, mode0);
25916 if (VECTOR_MODE_P (mode1))
25917 op1 = safe_vector_operand (op1, mode1);
25919 /* Swap operands if we have a comparison that isn't available in
25921 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
25928 target = gen_reg_rtx (SImode);
25929 emit_move_insn (target, const0_rtx);
25930 target = gen_rtx_SUBREG (QImode, target, 0);
25932 if ((optimize && !register_operand (op0, mode0))
25933 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25934 op0 = copy_to_mode_reg (mode0, op0);
25935 if ((optimize && !register_operand (op1, mode1))
25936 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
25937 op1 = copy_to_mode_reg (mode1, op1);
25939 pat = GEN_FCN (d->icode) (op0, op1);
25943 emit_insn (gen_rtx_SET (VOIDmode,
25944 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25945 gen_rtx_fmt_ee (comparison, QImode,
25949 return SUBREG_REG (target);
25952 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
25955 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
25959 tree arg0 = CALL_EXPR_ARG (exp, 0);
25960 rtx op1, op0 = expand_normal (arg0);
25961 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
25962 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
25964 if (optimize || target == 0
25965 || GET_MODE (target) != tmode
25966 || !insn_data[d->icode].operand[0].predicate (target, tmode))
25967 target = gen_reg_rtx (tmode);
25969 if (VECTOR_MODE_P (mode0))
25970 op0 = safe_vector_operand (op0, mode0);
25972 if ((optimize && !register_operand (op0, mode0))
25973 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25974 op0 = copy_to_mode_reg (mode0, op0);
25976 op1 = GEN_INT (d->comparison);
25978 pat = GEN_FCN (d->icode) (target, op0, op1);
25985 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
25988 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
25992 tree arg0 = CALL_EXPR_ARG (exp, 0);
25993 tree arg1 = CALL_EXPR_ARG (exp, 1);
25994 rtx op0 = expand_normal (arg0);
25995 rtx op1 = expand_normal (arg1);
25996 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
25997 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
25998 enum rtx_code comparison = d->comparison;
26000 if (VECTOR_MODE_P (mode0))
26001 op0 = safe_vector_operand (op0, mode0);
26002 if (VECTOR_MODE_P (mode1))
26003 op1 = safe_vector_operand (op1, mode1);
26005 target = gen_reg_rtx (SImode);
26006 emit_move_insn (target, const0_rtx);
26007 target = gen_rtx_SUBREG (QImode, target, 0);
26009 if ((optimize && !register_operand (op0, mode0))
26010 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26011 op0 = copy_to_mode_reg (mode0, op0);
26012 if ((optimize && !register_operand (op1, mode1))
26013 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26014 op1 = copy_to_mode_reg (mode1, op1);
26016 pat = GEN_FCN (d->icode) (op0, op1);
26020 emit_insn (gen_rtx_SET (VOIDmode,
26021 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26022 gen_rtx_fmt_ee (comparison, QImode,
26026 return SUBREG_REG (target);
26029 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26032 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
26033 tree exp, rtx target)
26036 tree arg0 = CALL_EXPR_ARG (exp, 0);
26037 tree arg1 = CALL_EXPR_ARG (exp, 1);
26038 tree arg2 = CALL_EXPR_ARG (exp, 2);
26039 tree arg3 = CALL_EXPR_ARG (exp, 3);
26040 tree arg4 = CALL_EXPR_ARG (exp, 4);
26041 rtx scratch0, scratch1;
26042 rtx op0 = expand_normal (arg0);
26043 rtx op1 = expand_normal (arg1);
26044 rtx op2 = expand_normal (arg2);
26045 rtx op3 = expand_normal (arg3);
26046 rtx op4 = expand_normal (arg4);
26047 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
26049 tmode0 = insn_data[d->icode].operand[0].mode;
26050 tmode1 = insn_data[d->icode].operand[1].mode;
26051 modev2 = insn_data[d->icode].operand[2].mode;
26052 modei3 = insn_data[d->icode].operand[3].mode;
26053 modev4 = insn_data[d->icode].operand[4].mode;
26054 modei5 = insn_data[d->icode].operand[5].mode;
26055 modeimm = insn_data[d->icode].operand[6].mode;
26057 if (VECTOR_MODE_P (modev2))
26058 op0 = safe_vector_operand (op0, modev2);
26059 if (VECTOR_MODE_P (modev4))
26060 op2 = safe_vector_operand (op2, modev4);
26062 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26063 op0 = copy_to_mode_reg (modev2, op0);
26064 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
26065 op1 = copy_to_mode_reg (modei3, op1);
26066 if ((optimize && !register_operand (op2, modev4))
26067 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
26068 op2 = copy_to_mode_reg (modev4, op2);
26069 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
26070 op3 = copy_to_mode_reg (modei5, op3);
26072 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
26074 error ("the fifth argument must be an 8-bit immediate");
26078 if (d->code == IX86_BUILTIN_PCMPESTRI128)
26080 if (optimize || !target
26081 || GET_MODE (target) != tmode0
26082 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26083 target = gen_reg_rtx (tmode0);
26085 scratch1 = gen_reg_rtx (tmode1);
26087 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
26089 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
26091 if (optimize || !target
26092 || GET_MODE (target) != tmode1
26093 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26094 target = gen_reg_rtx (tmode1);
26096 scratch0 = gen_reg_rtx (tmode0);
26098 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
26102 gcc_assert (d->flag);
26104 scratch0 = gen_reg_rtx (tmode0);
26105 scratch1 = gen_reg_rtx (tmode1);
26107 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
26117 target = gen_reg_rtx (SImode);
26118 emit_move_insn (target, const0_rtx);
26119 target = gen_rtx_SUBREG (QImode, target, 0);
26122 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26123 gen_rtx_fmt_ee (EQ, QImode,
26124 gen_rtx_REG ((enum machine_mode) d->flag,
26127 return SUBREG_REG (target);
26134 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26137 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
26138 tree exp, rtx target)
26141 tree arg0 = CALL_EXPR_ARG (exp, 0);
26142 tree arg1 = CALL_EXPR_ARG (exp, 1);
26143 tree arg2 = CALL_EXPR_ARG (exp, 2);
26144 rtx scratch0, scratch1;
26145 rtx op0 = expand_normal (arg0);
26146 rtx op1 = expand_normal (arg1);
26147 rtx op2 = expand_normal (arg2);
26148 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26150 tmode0 = insn_data[d->icode].operand[0].mode;
26151 tmode1 = insn_data[d->icode].operand[1].mode;
26152 modev2 = insn_data[d->icode].operand[2].mode;
26153 modev3 = insn_data[d->icode].operand[3].mode;
26154 modeimm = insn_data[d->icode].operand[4].mode;
26156 if (VECTOR_MODE_P (modev2))
26157 op0 = safe_vector_operand (op0, modev2);
26158 if (VECTOR_MODE_P (modev3))
26159 op1 = safe_vector_operand (op1, modev3);
26161 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26162 op0 = copy_to_mode_reg (modev2, op0);
26163 if ((optimize && !register_operand (op1, modev3))
26164 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26165 op1 = copy_to_mode_reg (modev3, op1);
26167 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26169 error ("the third argument must be an 8-bit immediate");
26173 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26175 if (optimize || !target
26176 || GET_MODE (target) != tmode0
26177 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26178 target = gen_reg_rtx (tmode0);
26180 scratch1 = gen_reg_rtx (tmode1);
26182 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26184 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26186 if (optimize || !target
26187 || GET_MODE (target) != tmode1
26188 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26189 target = gen_reg_rtx (tmode1);
26191 scratch0 = gen_reg_rtx (tmode0);
26193 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26197 gcc_assert (d->flag);
26199 scratch0 = gen_reg_rtx (tmode0);
26200 scratch1 = gen_reg_rtx (tmode1);
26202 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26212 target = gen_reg_rtx (SImode);
26213 emit_move_insn (target, const0_rtx);
26214 target = gen_rtx_SUBREG (QImode, target, 0);
26217 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26218 gen_rtx_fmt_ee (EQ, QImode,
26219 gen_rtx_REG ((enum machine_mode) d->flag,
26222 return SUBREG_REG (target);
26228 /* Subroutine of ix86_expand_builtin to take care of insns with
26229 variable number of operands. */
26232 ix86_expand_args_builtin (const struct builtin_description *d,
26233 tree exp, rtx target)
26235 rtx pat, real_target;
26236 unsigned int i, nargs;
26237 unsigned int nargs_constant = 0;
26238 int num_memory = 0;
26242 enum machine_mode mode;
26244 bool last_arg_count = false;
26245 enum insn_code icode = d->icode;
26246 const struct insn_data_d *insn_p = &insn_data[icode];
26247 enum machine_mode tmode = insn_p->operand[0].mode;
26248 enum machine_mode rmode = VOIDmode;
26250 enum rtx_code comparison = d->comparison;
26252 switch ((enum ix86_builtin_func_type) d->flag)
26254 case V2DF_FTYPE_V2DF_ROUND:
26255 case V4DF_FTYPE_V4DF_ROUND:
26256 case V4SF_FTYPE_V4SF_ROUND:
26257 case V8SF_FTYPE_V8SF_ROUND:
26258 return ix86_expand_sse_round (d, exp, target);
26259 case INT_FTYPE_V8SF_V8SF_PTEST:
26260 case INT_FTYPE_V4DI_V4DI_PTEST:
26261 case INT_FTYPE_V4DF_V4DF_PTEST:
26262 case INT_FTYPE_V4SF_V4SF_PTEST:
26263 case INT_FTYPE_V2DI_V2DI_PTEST:
26264 case INT_FTYPE_V2DF_V2DF_PTEST:
26265 return ix86_expand_sse_ptest (d, exp, target);
26266 case FLOAT128_FTYPE_FLOAT128:
26267 case FLOAT_FTYPE_FLOAT:
26268 case INT_FTYPE_INT:
26269 case UINT64_FTYPE_INT:
26270 case UINT16_FTYPE_UINT16:
26271 case INT64_FTYPE_INT64:
26272 case INT64_FTYPE_V4SF:
26273 case INT64_FTYPE_V2DF:
26274 case INT_FTYPE_V16QI:
26275 case INT_FTYPE_V8QI:
26276 case INT_FTYPE_V8SF:
26277 case INT_FTYPE_V4DF:
26278 case INT_FTYPE_V4SF:
26279 case INT_FTYPE_V2DF:
26280 case V16QI_FTYPE_V16QI:
26281 case V8SI_FTYPE_V8SF:
26282 case V8SI_FTYPE_V4SI:
26283 case V8HI_FTYPE_V8HI:
26284 case V8HI_FTYPE_V16QI:
26285 case V8QI_FTYPE_V8QI:
26286 case V8SF_FTYPE_V8SF:
26287 case V8SF_FTYPE_V8SI:
26288 case V8SF_FTYPE_V4SF:
26289 case V8SF_FTYPE_V8HI:
26290 case V4SI_FTYPE_V4SI:
26291 case V4SI_FTYPE_V16QI:
26292 case V4SI_FTYPE_V4SF:
26293 case V4SI_FTYPE_V8SI:
26294 case V4SI_FTYPE_V8HI:
26295 case V4SI_FTYPE_V4DF:
26296 case V4SI_FTYPE_V2DF:
26297 case V4HI_FTYPE_V4HI:
26298 case V4DF_FTYPE_V4DF:
26299 case V4DF_FTYPE_V4SI:
26300 case V4DF_FTYPE_V4SF:
26301 case V4DF_FTYPE_V2DF:
26302 case V4SF_FTYPE_V4SF:
26303 case V4SF_FTYPE_V4SI:
26304 case V4SF_FTYPE_V8SF:
26305 case V4SF_FTYPE_V4DF:
26306 case V4SF_FTYPE_V8HI:
26307 case V4SF_FTYPE_V2DF:
26308 case V2DI_FTYPE_V2DI:
26309 case V2DI_FTYPE_V16QI:
26310 case V2DI_FTYPE_V8HI:
26311 case V2DI_FTYPE_V4SI:
26312 case V2DF_FTYPE_V2DF:
26313 case V2DF_FTYPE_V4SI:
26314 case V2DF_FTYPE_V4DF:
26315 case V2DF_FTYPE_V4SF:
26316 case V2DF_FTYPE_V2SI:
26317 case V2SI_FTYPE_V2SI:
26318 case V2SI_FTYPE_V4SF:
26319 case V2SI_FTYPE_V2SF:
26320 case V2SI_FTYPE_V2DF:
26321 case V2SF_FTYPE_V2SF:
26322 case V2SF_FTYPE_V2SI:
26325 case V4SF_FTYPE_V4SF_VEC_MERGE:
26326 case V2DF_FTYPE_V2DF_VEC_MERGE:
26327 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26328 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26329 case V16QI_FTYPE_V16QI_V16QI:
26330 case V16QI_FTYPE_V8HI_V8HI:
26331 case V8QI_FTYPE_V8QI_V8QI:
26332 case V8QI_FTYPE_V4HI_V4HI:
26333 case V8HI_FTYPE_V8HI_V8HI:
26334 case V8HI_FTYPE_V16QI_V16QI:
26335 case V8HI_FTYPE_V4SI_V4SI:
26336 case V8SF_FTYPE_V8SF_V8SF:
26337 case V8SF_FTYPE_V8SF_V8SI:
26338 case V4SI_FTYPE_V4SI_V4SI:
26339 case V4SI_FTYPE_V8HI_V8HI:
26340 case V4SI_FTYPE_V4SF_V4SF:
26341 case V4SI_FTYPE_V2DF_V2DF:
26342 case V4HI_FTYPE_V4HI_V4HI:
26343 case V4HI_FTYPE_V8QI_V8QI:
26344 case V4HI_FTYPE_V2SI_V2SI:
26345 case V4DF_FTYPE_V4DF_V4DF:
26346 case V4DF_FTYPE_V4DF_V4DI:
26347 case V4SF_FTYPE_V4SF_V4SF:
26348 case V4SF_FTYPE_V4SF_V4SI:
26349 case V4SF_FTYPE_V4SF_V2SI:
26350 case V4SF_FTYPE_V4SF_V2DF:
26351 case V4SF_FTYPE_V4SF_DI:
26352 case V4SF_FTYPE_V4SF_SI:
26353 case V2DI_FTYPE_V2DI_V2DI:
26354 case V2DI_FTYPE_V16QI_V16QI:
26355 case V2DI_FTYPE_V4SI_V4SI:
26356 case V2DI_FTYPE_V2DI_V16QI:
26357 case V2DI_FTYPE_V2DF_V2DF:
26358 case V2SI_FTYPE_V2SI_V2SI:
26359 case V2SI_FTYPE_V4HI_V4HI:
26360 case V2SI_FTYPE_V2SF_V2SF:
26361 case V2DF_FTYPE_V2DF_V2DF:
26362 case V2DF_FTYPE_V2DF_V4SF:
26363 case V2DF_FTYPE_V2DF_V2DI:
26364 case V2DF_FTYPE_V2DF_DI:
26365 case V2DF_FTYPE_V2DF_SI:
26366 case V2SF_FTYPE_V2SF_V2SF:
26367 case V1DI_FTYPE_V1DI_V1DI:
26368 case V1DI_FTYPE_V8QI_V8QI:
26369 case V1DI_FTYPE_V2SI_V2SI:
26370 if (comparison == UNKNOWN)
26371 return ix86_expand_binop_builtin (icode, exp, target);
26374 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26375 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26376 gcc_assert (comparison != UNKNOWN);
26380 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26381 case V8HI_FTYPE_V8HI_SI_COUNT:
26382 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26383 case V4SI_FTYPE_V4SI_SI_COUNT:
26384 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26385 case V4HI_FTYPE_V4HI_SI_COUNT:
26386 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26387 case V2DI_FTYPE_V2DI_SI_COUNT:
26388 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26389 case V2SI_FTYPE_V2SI_SI_COUNT:
26390 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26391 case V1DI_FTYPE_V1DI_SI_COUNT:
26393 last_arg_count = true;
26395 case UINT64_FTYPE_UINT64_UINT64:
26396 case UINT_FTYPE_UINT_UINT:
26397 case UINT_FTYPE_UINT_USHORT:
26398 case UINT_FTYPE_UINT_UCHAR:
26399 case UINT16_FTYPE_UINT16_INT:
26400 case UINT8_FTYPE_UINT8_INT:
26403 case V2DI_FTYPE_V2DI_INT_CONVERT:
26406 nargs_constant = 1;
26408 case V8HI_FTYPE_V8HI_INT:
26409 case V8HI_FTYPE_V8SF_INT:
26410 case V8HI_FTYPE_V4SF_INT:
26411 case V8SF_FTYPE_V8SF_INT:
26412 case V4SI_FTYPE_V4SI_INT:
26413 case V4SI_FTYPE_V8SI_INT:
26414 case V4HI_FTYPE_V4HI_INT:
26415 case V4DF_FTYPE_V4DF_INT:
26416 case V4SF_FTYPE_V4SF_INT:
26417 case V4SF_FTYPE_V8SF_INT:
26418 case V2DI_FTYPE_V2DI_INT:
26419 case V2DF_FTYPE_V2DF_INT:
26420 case V2DF_FTYPE_V4DF_INT:
26422 nargs_constant = 1;
26424 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26425 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26426 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26427 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26428 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26431 case V16QI_FTYPE_V16QI_V16QI_INT:
26432 case V8HI_FTYPE_V8HI_V8HI_INT:
26433 case V8SI_FTYPE_V8SI_V8SI_INT:
26434 case V8SI_FTYPE_V8SI_V4SI_INT:
26435 case V8SF_FTYPE_V8SF_V8SF_INT:
26436 case V8SF_FTYPE_V8SF_V4SF_INT:
26437 case V4SI_FTYPE_V4SI_V4SI_INT:
26438 case V4DF_FTYPE_V4DF_V4DF_INT:
26439 case V4DF_FTYPE_V4DF_V2DF_INT:
26440 case V4SF_FTYPE_V4SF_V4SF_INT:
26441 case V2DI_FTYPE_V2DI_V2DI_INT:
26442 case V2DF_FTYPE_V2DF_V2DF_INT:
26444 nargs_constant = 1;
26446 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26449 nargs_constant = 1;
26451 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26454 nargs_constant = 1;
26456 case V2DI_FTYPE_V2DI_UINT_UINT:
26458 nargs_constant = 2;
26460 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26461 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26462 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26463 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26465 nargs_constant = 1;
26467 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26469 nargs_constant = 2;
26472 gcc_unreachable ();
26475 gcc_assert (nargs <= ARRAY_SIZE (args));
26477 if (comparison != UNKNOWN)
26479 gcc_assert (nargs == 2);
26480 return ix86_expand_sse_compare (d, exp, target, swap);
26483 if (rmode == VOIDmode || rmode == tmode)
26487 || GET_MODE (target) != tmode
26488 || !insn_p->operand[0].predicate (target, tmode))
26489 target = gen_reg_rtx (tmode);
26490 real_target = target;
26494 target = gen_reg_rtx (rmode);
26495 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26498 for (i = 0; i < nargs; i++)
26500 tree arg = CALL_EXPR_ARG (exp, i);
26501 rtx op = expand_normal (arg);
26502 enum machine_mode mode = insn_p->operand[i + 1].mode;
26503 bool match = insn_p->operand[i + 1].predicate (op, mode);
26505 if (last_arg_count && (i + 1) == nargs)
26507 /* SIMD shift insns take either an 8-bit immediate or
26508 register as count. But builtin functions take int as
26509 count. If count doesn't match, we put it in register. */
26512 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26513 if (!insn_p->operand[i + 1].predicate (op, mode))
26514 op = copy_to_reg (op);
26517 else if ((nargs - i) <= nargs_constant)
26522 case CODE_FOR_sse4_1_roundpd:
26523 case CODE_FOR_sse4_1_roundps:
26524 case CODE_FOR_sse4_1_roundsd:
26525 case CODE_FOR_sse4_1_roundss:
26526 case CODE_FOR_sse4_1_blendps:
26527 case CODE_FOR_avx_blendpd256:
26528 case CODE_FOR_avx_vpermilv4df:
26529 case CODE_FOR_avx_roundpd256:
26530 case CODE_FOR_avx_roundps256:
26531 error ("the last argument must be a 4-bit immediate");
26534 case CODE_FOR_sse4_1_blendpd:
26535 case CODE_FOR_avx_vpermilv2df:
26536 case CODE_FOR_xop_vpermil2v2df3:
26537 case CODE_FOR_xop_vpermil2v4sf3:
26538 case CODE_FOR_xop_vpermil2v4df3:
26539 case CODE_FOR_xop_vpermil2v8sf3:
26540 error ("the last argument must be a 2-bit immediate");
26543 case CODE_FOR_avx_vextractf128v4df:
26544 case CODE_FOR_avx_vextractf128v8sf:
26545 case CODE_FOR_avx_vextractf128v8si:
26546 case CODE_FOR_avx_vinsertf128v4df:
26547 case CODE_FOR_avx_vinsertf128v8sf:
26548 case CODE_FOR_avx_vinsertf128v8si:
26549 error ("the last argument must be a 1-bit immediate");
26552 case CODE_FOR_avx_vmcmpv2df3:
26553 case CODE_FOR_avx_vmcmpv4sf3:
26554 case CODE_FOR_avx_cmpv2df3:
26555 case CODE_FOR_avx_cmpv4sf3:
26556 case CODE_FOR_avx_cmpv4df3:
26557 case CODE_FOR_avx_cmpv8sf3:
26558 error ("the last argument must be a 5-bit immediate");
26562 switch (nargs_constant)
26565 if ((nargs - i) == nargs_constant)
26567 error ("the next to last argument must be an 8-bit immediate");
26571 error ("the last argument must be an 8-bit immediate");
26574 gcc_unreachable ();
26581 if (VECTOR_MODE_P (mode))
26582 op = safe_vector_operand (op, mode);
26584 /* If we aren't optimizing, only allow one memory operand to
26586 if (memory_operand (op, mode))
26589 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
26591 if (optimize || !match || num_memory > 1)
26592 op = copy_to_mode_reg (mode, op);
26596 op = copy_to_reg (op);
26597 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
26602 args[i].mode = mode;
26608 pat = GEN_FCN (icode) (real_target, args[0].op);
26611 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
26614 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26618 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26619 args[2].op, args[3].op);
26622 gcc_unreachable ();
26632 /* Subroutine of ix86_expand_builtin to take care of special insns
26633 with variable number of operands. */
26636 ix86_expand_special_args_builtin (const struct builtin_description *d,
26637 tree exp, rtx target)
26641 unsigned int i, nargs, arg_adjust, memory;
26645 enum machine_mode mode;
26647 enum insn_code icode = d->icode;
26648 bool last_arg_constant = false;
26649 const struct insn_data_d *insn_p = &insn_data[icode];
26650 enum machine_mode tmode = insn_p->operand[0].mode;
26651 enum { load, store } klass;
26653 switch ((enum ix86_builtin_func_type) d->flag)
26655 case VOID_FTYPE_VOID:
26656 if (icode == CODE_FOR_avx_vzeroupper)
26657 target = GEN_INT (vzeroupper_intrinsic);
26658 emit_insn (GEN_FCN (icode) (target));
26660 case VOID_FTYPE_UINT64:
26661 case VOID_FTYPE_UNSIGNED:
26667 case UINT64_FTYPE_VOID:
26668 case UNSIGNED_FTYPE_VOID:
26673 case UINT64_FTYPE_PUNSIGNED:
26674 case V2DI_FTYPE_PV2DI:
26675 case V32QI_FTYPE_PCCHAR:
26676 case V16QI_FTYPE_PCCHAR:
26677 case V8SF_FTYPE_PCV4SF:
26678 case V8SF_FTYPE_PCFLOAT:
26679 case V4SF_FTYPE_PCFLOAT:
26680 case V4DF_FTYPE_PCV2DF:
26681 case V4DF_FTYPE_PCDOUBLE:
26682 case V2DF_FTYPE_PCDOUBLE:
26683 case VOID_FTYPE_PVOID:
26688 case VOID_FTYPE_PV2SF_V4SF:
26689 case VOID_FTYPE_PV4DI_V4DI:
26690 case VOID_FTYPE_PV2DI_V2DI:
26691 case VOID_FTYPE_PCHAR_V32QI:
26692 case VOID_FTYPE_PCHAR_V16QI:
26693 case VOID_FTYPE_PFLOAT_V8SF:
26694 case VOID_FTYPE_PFLOAT_V4SF:
26695 case VOID_FTYPE_PDOUBLE_V4DF:
26696 case VOID_FTYPE_PDOUBLE_V2DF:
26697 case VOID_FTYPE_PULONGLONG_ULONGLONG:
26698 case VOID_FTYPE_PINT_INT:
26701 /* Reserve memory operand for target. */
26702 memory = ARRAY_SIZE (args);
26704 case V4SF_FTYPE_V4SF_PCV2SF:
26705 case V2DF_FTYPE_V2DF_PCDOUBLE:
26710 case V8SF_FTYPE_PCV8SF_V8SI:
26711 case V4DF_FTYPE_PCV4DF_V4DI:
26712 case V4SF_FTYPE_PCV4SF_V4SI:
26713 case V2DF_FTYPE_PCV2DF_V2DI:
26718 case VOID_FTYPE_PV8SF_V8SI_V8SF:
26719 case VOID_FTYPE_PV4DF_V4DI_V4DF:
26720 case VOID_FTYPE_PV4SF_V4SI_V4SF:
26721 case VOID_FTYPE_PV2DF_V2DI_V2DF:
26724 /* Reserve memory operand for target. */
26725 memory = ARRAY_SIZE (args);
26727 case VOID_FTYPE_UINT_UINT_UINT:
26728 case VOID_FTYPE_UINT64_UINT_UINT:
26729 case UCHAR_FTYPE_UINT_UINT_UINT:
26730 case UCHAR_FTYPE_UINT64_UINT_UINT:
26733 memory = ARRAY_SIZE (args);
26734 last_arg_constant = true;
26737 gcc_unreachable ();
26740 gcc_assert (nargs <= ARRAY_SIZE (args));
26742 if (klass == store)
26744 arg = CALL_EXPR_ARG (exp, 0);
26745 op = expand_normal (arg);
26746 gcc_assert (target == 0);
26749 if (GET_MODE (op) != Pmode)
26750 op = convert_to_mode (Pmode, op, 1);
26751 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
26754 target = force_reg (tmode, op);
26762 || GET_MODE (target) != tmode
26763 || !insn_p->operand[0].predicate (target, tmode))
26764 target = gen_reg_rtx (tmode);
26767 for (i = 0; i < nargs; i++)
26769 enum machine_mode mode = insn_p->operand[i + 1].mode;
26772 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
26773 op = expand_normal (arg);
26774 match = insn_p->operand[i + 1].predicate (op, mode);
26776 if (last_arg_constant && (i + 1) == nargs)
26780 if (icode == CODE_FOR_lwp_lwpvalsi3
26781 || icode == CODE_FOR_lwp_lwpinssi3
26782 || icode == CODE_FOR_lwp_lwpvaldi3
26783 || icode == CODE_FOR_lwp_lwpinsdi3)
26784 error ("the last argument must be a 32-bit immediate");
26786 error ("the last argument must be an 8-bit immediate");
26794 /* This must be the memory operand. */
26795 if (GET_MODE (op) != Pmode)
26796 op = convert_to_mode (Pmode, op, 1);
26797 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
26798 gcc_assert (GET_MODE (op) == mode
26799 || GET_MODE (op) == VOIDmode);
26803 /* This must be register. */
26804 if (VECTOR_MODE_P (mode))
26805 op = safe_vector_operand (op, mode);
26807 gcc_assert (GET_MODE (op) == mode
26808 || GET_MODE (op) == VOIDmode);
26809 op = copy_to_mode_reg (mode, op);
26814 args[i].mode = mode;
26820 pat = GEN_FCN (icode) (target);
26823 pat = GEN_FCN (icode) (target, args[0].op);
26826 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26829 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26832 gcc_unreachable ();
26838 return klass == store ? 0 : target;
26841 /* Return the integer constant in ARG. Constrain it to be in the range
26842 of the subparts of VEC_TYPE; issue an error if not. */
26845 get_element_number (tree vec_type, tree arg)
26847 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
26849 if (!host_integerp (arg, 1)
26850 || (elt = tree_low_cst (arg, 1), elt > max))
26852 error ("selector must be an integer constant in the range 0..%wi", max);
26859 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26860 ix86_expand_vector_init. We DO have language-level syntax for this, in
26861 the form of (type){ init-list }. Except that since we can't place emms
26862 instructions from inside the compiler, we can't allow the use of MMX
26863 registers unless the user explicitly asks for it. So we do *not* define
26864 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
26865 we have builtins invoked by mmintrin.h that gives us license to emit
26866 these sorts of instructions. */
26869 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
26871 enum machine_mode tmode = TYPE_MODE (type);
26872 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
26873 int i, n_elt = GET_MODE_NUNITS (tmode);
26874 rtvec v = rtvec_alloc (n_elt);
26876 gcc_assert (VECTOR_MODE_P (tmode));
26877 gcc_assert (call_expr_nargs (exp) == n_elt);
26879 for (i = 0; i < n_elt; ++i)
26881 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
26882 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
26885 if (!target || !register_operand (target, tmode))
26886 target = gen_reg_rtx (tmode);
26888 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
26892 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26893 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
26894 had a language-level syntax for referencing vector elements. */
26897 ix86_expand_vec_ext_builtin (tree exp, rtx target)
26899 enum machine_mode tmode, mode0;
26904 arg0 = CALL_EXPR_ARG (exp, 0);
26905 arg1 = CALL_EXPR_ARG (exp, 1);
26907 op0 = expand_normal (arg0);
26908 elt = get_element_number (TREE_TYPE (arg0), arg1);
26910 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26911 mode0 = TYPE_MODE (TREE_TYPE (arg0));
26912 gcc_assert (VECTOR_MODE_P (mode0));
26914 op0 = force_reg (mode0, op0);
26916 if (optimize || !target || !register_operand (target, tmode))
26917 target = gen_reg_rtx (tmode);
26919 ix86_expand_vector_extract (true, target, op0, elt);
26924 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26925 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
26926 a language-level syntax for referencing vector elements. */
26929 ix86_expand_vec_set_builtin (tree exp)
26931 enum machine_mode tmode, mode1;
26932 tree arg0, arg1, arg2;
26934 rtx op0, op1, target;
26936 arg0 = CALL_EXPR_ARG (exp, 0);
26937 arg1 = CALL_EXPR_ARG (exp, 1);
26938 arg2 = CALL_EXPR_ARG (exp, 2);
26940 tmode = TYPE_MODE (TREE_TYPE (arg0));
26941 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26942 gcc_assert (VECTOR_MODE_P (tmode));
26944 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
26945 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
26946 elt = get_element_number (TREE_TYPE (arg0), arg2);
26948 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
26949 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
26951 op0 = force_reg (tmode, op0);
26952 op1 = force_reg (mode1, op1);
26954 /* OP0 is the source of these builtin functions and shouldn't be
26955 modified. Create a copy, use it and return it as target. */
26956 target = gen_reg_rtx (tmode);
26957 emit_move_insn (target, op0);
26958 ix86_expand_vector_set (true, target, op1, elt);
26963 /* Expand an expression EXP that calls a built-in function,
26964 with result going to TARGET if that's convenient
26965 (and in mode MODE if that's convenient).
26966 SUBTARGET may be used as the target for computing one of EXP's operands.
26967 IGNORE is nonzero if the value is to be ignored. */
26970 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
26971 enum machine_mode mode ATTRIBUTE_UNUSED,
26972 int ignore ATTRIBUTE_UNUSED)
26974 const struct builtin_description *d;
26976 enum insn_code icode;
26977 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
26978 tree arg0, arg1, arg2;
26979 rtx op0, op1, op2, pat;
26980 enum machine_mode mode0, mode1, mode2;
26981 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
26983 /* Determine whether the builtin function is available under the current ISA.
26984 Originally the builtin was not created if it wasn't applicable to the
26985 current ISA based on the command line switches. With function specific
26986 options, we need to check in the context of the function making the call
26987 whether it is supported. */
26988 if (ix86_builtins_isa[fcode].isa
26989 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
26991 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
26992 NULL, (enum fpmath_unit) 0, false);
26995 error ("%qE needs unknown isa option", fndecl);
26998 gcc_assert (opts != NULL);
26999 error ("%qE needs isa option %s", fndecl, opts);
27007 case IX86_BUILTIN_MASKMOVQ:
27008 case IX86_BUILTIN_MASKMOVDQU:
27009 icode = (fcode == IX86_BUILTIN_MASKMOVQ
27010 ? CODE_FOR_mmx_maskmovq
27011 : CODE_FOR_sse2_maskmovdqu);
27012 /* Note the arg order is different from the operand order. */
27013 arg1 = CALL_EXPR_ARG (exp, 0);
27014 arg2 = CALL_EXPR_ARG (exp, 1);
27015 arg0 = CALL_EXPR_ARG (exp, 2);
27016 op0 = expand_normal (arg0);
27017 op1 = expand_normal (arg1);
27018 op2 = expand_normal (arg2);
27019 mode0 = insn_data[icode].operand[0].mode;
27020 mode1 = insn_data[icode].operand[1].mode;
27021 mode2 = insn_data[icode].operand[2].mode;
27023 if (GET_MODE (op0) != Pmode)
27024 op0 = convert_to_mode (Pmode, op0, 1);
27025 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
27027 if (!insn_data[icode].operand[0].predicate (op0, mode0))
27028 op0 = copy_to_mode_reg (mode0, op0);
27029 if (!insn_data[icode].operand[1].predicate (op1, mode1))
27030 op1 = copy_to_mode_reg (mode1, op1);
27031 if (!insn_data[icode].operand[2].predicate (op2, mode2))
27032 op2 = copy_to_mode_reg (mode2, op2);
27033 pat = GEN_FCN (icode) (op0, op1, op2);
27039 case IX86_BUILTIN_LDMXCSR:
27040 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
27041 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27042 emit_move_insn (target, op0);
27043 emit_insn (gen_sse_ldmxcsr (target));
27046 case IX86_BUILTIN_STMXCSR:
27047 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27048 emit_insn (gen_sse_stmxcsr (target));
27049 return copy_to_mode_reg (SImode, target);
27051 case IX86_BUILTIN_CLFLUSH:
27052 arg0 = CALL_EXPR_ARG (exp, 0);
27053 op0 = expand_normal (arg0);
27054 icode = CODE_FOR_sse2_clflush;
27055 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27057 if (GET_MODE (op0) != Pmode)
27058 op0 = convert_to_mode (Pmode, op0, 1);
27059 op0 = force_reg (Pmode, op0);
27062 emit_insn (gen_sse2_clflush (op0));
27065 case IX86_BUILTIN_MONITOR:
27066 arg0 = CALL_EXPR_ARG (exp, 0);
27067 arg1 = CALL_EXPR_ARG (exp, 1);
27068 arg2 = CALL_EXPR_ARG (exp, 2);
27069 op0 = expand_normal (arg0);
27070 op1 = expand_normal (arg1);
27071 op2 = expand_normal (arg2);
27074 if (GET_MODE (op0) != Pmode)
27075 op0 = convert_to_mode (Pmode, op0, 1);
27076 op0 = force_reg (Pmode, op0);
27079 op1 = copy_to_mode_reg (SImode, op1);
27081 op2 = copy_to_mode_reg (SImode, op2);
27082 emit_insn (ix86_gen_monitor (op0, op1, op2));
27085 case IX86_BUILTIN_MWAIT:
27086 arg0 = CALL_EXPR_ARG (exp, 0);
27087 arg1 = CALL_EXPR_ARG (exp, 1);
27088 op0 = expand_normal (arg0);
27089 op1 = expand_normal (arg1);
27091 op0 = copy_to_mode_reg (SImode, op0);
27093 op1 = copy_to_mode_reg (SImode, op1);
27094 emit_insn (gen_sse3_mwait (op0, op1));
27097 case IX86_BUILTIN_VEC_INIT_V2SI:
27098 case IX86_BUILTIN_VEC_INIT_V4HI:
27099 case IX86_BUILTIN_VEC_INIT_V8QI:
27100 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
27102 case IX86_BUILTIN_VEC_EXT_V2DF:
27103 case IX86_BUILTIN_VEC_EXT_V2DI:
27104 case IX86_BUILTIN_VEC_EXT_V4SF:
27105 case IX86_BUILTIN_VEC_EXT_V4SI:
27106 case IX86_BUILTIN_VEC_EXT_V8HI:
27107 case IX86_BUILTIN_VEC_EXT_V2SI:
27108 case IX86_BUILTIN_VEC_EXT_V4HI:
27109 case IX86_BUILTIN_VEC_EXT_V16QI:
27110 return ix86_expand_vec_ext_builtin (exp, target);
27112 case IX86_BUILTIN_VEC_SET_V2DI:
27113 case IX86_BUILTIN_VEC_SET_V4SF:
27114 case IX86_BUILTIN_VEC_SET_V4SI:
27115 case IX86_BUILTIN_VEC_SET_V8HI:
27116 case IX86_BUILTIN_VEC_SET_V4HI:
27117 case IX86_BUILTIN_VEC_SET_V16QI:
27118 return ix86_expand_vec_set_builtin (exp);
27120 case IX86_BUILTIN_VEC_PERM_V2DF:
27121 case IX86_BUILTIN_VEC_PERM_V4SF:
27122 case IX86_BUILTIN_VEC_PERM_V2DI:
27123 case IX86_BUILTIN_VEC_PERM_V4SI:
27124 case IX86_BUILTIN_VEC_PERM_V8HI:
27125 case IX86_BUILTIN_VEC_PERM_V16QI:
27126 case IX86_BUILTIN_VEC_PERM_V2DI_U:
27127 case IX86_BUILTIN_VEC_PERM_V4SI_U:
27128 case IX86_BUILTIN_VEC_PERM_V8HI_U:
27129 case IX86_BUILTIN_VEC_PERM_V16QI_U:
27130 case IX86_BUILTIN_VEC_PERM_V4DF:
27131 case IX86_BUILTIN_VEC_PERM_V8SF:
27132 return ix86_expand_vec_perm_builtin (exp);
27134 case IX86_BUILTIN_INFQ:
27135 case IX86_BUILTIN_HUGE_VALQ:
27137 REAL_VALUE_TYPE inf;
27141 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
27143 tmp = validize_mem (force_const_mem (mode, tmp));
27146 target = gen_reg_rtx (mode);
27148 emit_move_insn (target, tmp);
27152 case IX86_BUILTIN_LLWPCB:
27153 arg0 = CALL_EXPR_ARG (exp, 0);
27154 op0 = expand_normal (arg0);
27155 icode = CODE_FOR_lwp_llwpcb;
27156 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27158 if (GET_MODE (op0) != Pmode)
27159 op0 = convert_to_mode (Pmode, op0, 1);
27160 op0 = force_reg (Pmode, op0);
27162 emit_insn (gen_lwp_llwpcb (op0));
27165 case IX86_BUILTIN_SLWPCB:
27166 icode = CODE_FOR_lwp_slwpcb;
27168 || !insn_data[icode].operand[0].predicate (target, Pmode))
27169 target = gen_reg_rtx (Pmode);
27170 emit_insn (gen_lwp_slwpcb (target));
27173 case IX86_BUILTIN_BEXTRI32:
27174 case IX86_BUILTIN_BEXTRI64:
27175 arg0 = CALL_EXPR_ARG (exp, 0);
27176 arg1 = CALL_EXPR_ARG (exp, 1);
27177 op0 = expand_normal (arg0);
27178 op1 = expand_normal (arg1);
27179 icode = (fcode == IX86_BUILTIN_BEXTRI32
27180 ? CODE_FOR_tbm_bextri_si
27181 : CODE_FOR_tbm_bextri_di);
27182 if (!CONST_INT_P (op1))
27184 error ("last argument must be an immediate");
27189 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
27190 unsigned char lsb_index = INTVAL (op1) & 0xFF;
27191 op1 = GEN_INT (length);
27192 op2 = GEN_INT (lsb_index);
27193 pat = GEN_FCN (icode) (target, op0, op1, op2);
27199 case IX86_BUILTIN_RDRAND16_STEP:
27200 icode = CODE_FOR_rdrandhi_1;
27204 case IX86_BUILTIN_RDRAND32_STEP:
27205 icode = CODE_FOR_rdrandsi_1;
27209 case IX86_BUILTIN_RDRAND64_STEP:
27210 icode = CODE_FOR_rdranddi_1;
27214 op0 = gen_reg_rtx (mode0);
27215 emit_insn (GEN_FCN (icode) (op0));
27217 arg0 = CALL_EXPR_ARG (exp, 0);
27218 op1 = expand_normal (arg0);
27219 if (!address_operand (op1, VOIDmode))
27221 op1 = convert_memory_address (Pmode, op1);
27222 op1 = copy_addr_to_reg (op1);
27224 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
27226 op1 = gen_reg_rtx (SImode);
27227 emit_move_insn (op1, CONST1_RTX (SImode));
27229 /* Emit SImode conditional move. */
27230 if (mode0 == HImode)
27232 op2 = gen_reg_rtx (SImode);
27233 emit_insn (gen_zero_extendhisi2 (op2, op0));
27235 else if (mode0 == SImode)
27238 op2 = gen_rtx_SUBREG (SImode, op0, 0);
27241 target = gen_reg_rtx (SImode);
27243 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
27245 emit_insn (gen_rtx_SET (VOIDmode, target,
27246 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
27253 for (i = 0, d = bdesc_special_args;
27254 i < ARRAY_SIZE (bdesc_special_args);
27256 if (d->code == fcode)
27257 return ix86_expand_special_args_builtin (d, exp, target);
27259 for (i = 0, d = bdesc_args;
27260 i < ARRAY_SIZE (bdesc_args);
27262 if (d->code == fcode)
27265 case IX86_BUILTIN_FABSQ:
27266 case IX86_BUILTIN_COPYSIGNQ:
27268 /* Emit a normal call if SSE2 isn't available. */
27269 return expand_call (exp, target, ignore);
27271 return ix86_expand_args_builtin (d, exp, target);
27274 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27275 if (d->code == fcode)
27276 return ix86_expand_sse_comi (d, exp, target);
27278 for (i = 0, d = bdesc_pcmpestr;
27279 i < ARRAY_SIZE (bdesc_pcmpestr);
27281 if (d->code == fcode)
27282 return ix86_expand_sse_pcmpestr (d, exp, target);
27284 for (i = 0, d = bdesc_pcmpistr;
27285 i < ARRAY_SIZE (bdesc_pcmpistr);
27287 if (d->code == fcode)
27288 return ix86_expand_sse_pcmpistr (d, exp, target);
27290 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27291 if (d->code == fcode)
27292 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27293 (enum ix86_builtin_func_type)
27294 d->flag, d->comparison);
27296 gcc_unreachable ();
27299 /* Returns a function decl for a vectorized version of the builtin function
27300 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27301 if it is not available. */
27304 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27307 enum machine_mode in_mode, out_mode;
27309 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27311 if (TREE_CODE (type_out) != VECTOR_TYPE
27312 || TREE_CODE (type_in) != VECTOR_TYPE
27313 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27316 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27317 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27318 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27319 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27323 case BUILT_IN_SQRT:
27324 if (out_mode == DFmode && in_mode == DFmode)
27326 if (out_n == 2 && in_n == 2)
27327 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27328 else if (out_n == 4 && in_n == 4)
27329 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27333 case BUILT_IN_SQRTF:
27334 if (out_mode == SFmode && in_mode == SFmode)
27336 if (out_n == 4 && in_n == 4)
27337 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27338 else if (out_n == 8 && in_n == 8)
27339 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27343 case BUILT_IN_LRINT:
27344 if (out_mode == SImode && out_n == 4
27345 && in_mode == DFmode && in_n == 2)
27346 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27349 case BUILT_IN_LRINTF:
27350 if (out_mode == SImode && in_mode == SFmode)
27352 if (out_n == 4 && in_n == 4)
27353 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27354 else if (out_n == 8 && in_n == 8)
27355 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27359 case BUILT_IN_COPYSIGN:
27360 if (out_mode == DFmode && in_mode == DFmode)
27362 if (out_n == 2 && in_n == 2)
27363 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27364 else if (out_n == 4 && in_n == 4)
27365 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27369 case BUILT_IN_COPYSIGNF:
27370 if (out_mode == SFmode && in_mode == SFmode)
27372 if (out_n == 4 && in_n == 4)
27373 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27374 else if (out_n == 8 && in_n == 8)
27375 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27379 case BUILT_IN_FLOOR:
27380 /* The round insn does not trap on denormals. */
27381 if (flag_trapping_math || !TARGET_ROUND)
27384 if (out_mode == DFmode && in_mode == DFmode)
27386 if (out_n == 2 && in_n == 2)
27387 return ix86_builtins[IX86_BUILTIN_FLOORPD];
27388 else if (out_n == 4 && in_n == 4)
27389 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
27393 case BUILT_IN_FLOORF:
27394 /* The round insn does not trap on denormals. */
27395 if (flag_trapping_math || !TARGET_ROUND)
27398 if (out_mode == SFmode && in_mode == SFmode)
27400 if (out_n == 4 && in_n == 4)
27401 return ix86_builtins[IX86_BUILTIN_FLOORPS];
27402 else if (out_n == 8 && in_n == 8)
27403 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
27407 case BUILT_IN_CEIL:
27408 /* The round insn does not trap on denormals. */
27409 if (flag_trapping_math || !TARGET_ROUND)
27412 if (out_mode == DFmode && in_mode == DFmode)
27414 if (out_n == 2 && in_n == 2)
27415 return ix86_builtins[IX86_BUILTIN_CEILPD];
27416 else if (out_n == 4 && in_n == 4)
27417 return ix86_builtins[IX86_BUILTIN_CEILPD256];
27421 case BUILT_IN_CEILF:
27422 /* The round insn does not trap on denormals. */
27423 if (flag_trapping_math || !TARGET_ROUND)
27426 if (out_mode == SFmode && in_mode == SFmode)
27428 if (out_n == 4 && in_n == 4)
27429 return ix86_builtins[IX86_BUILTIN_CEILPS];
27430 else if (out_n == 8 && in_n == 8)
27431 return ix86_builtins[IX86_BUILTIN_CEILPS256];
27435 case BUILT_IN_TRUNC:
27436 /* The round insn does not trap on denormals. */
27437 if (flag_trapping_math || !TARGET_ROUND)
27440 if (out_mode == DFmode && in_mode == DFmode)
27442 if (out_n == 2 && in_n == 2)
27443 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
27444 else if (out_n == 4 && in_n == 4)
27445 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
27449 case BUILT_IN_TRUNCF:
27450 /* The round insn does not trap on denormals. */
27451 if (flag_trapping_math || !TARGET_ROUND)
27454 if (out_mode == SFmode && in_mode == SFmode)
27456 if (out_n == 4 && in_n == 4)
27457 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
27458 else if (out_n == 8 && in_n == 8)
27459 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
27463 case BUILT_IN_RINT:
27464 /* The round insn does not trap on denormals. */
27465 if (flag_trapping_math || !TARGET_ROUND)
27468 if (out_mode == DFmode && in_mode == DFmode)
27470 if (out_n == 2 && in_n == 2)
27471 return ix86_builtins[IX86_BUILTIN_RINTPD];
27472 else if (out_n == 4 && in_n == 4)
27473 return ix86_builtins[IX86_BUILTIN_RINTPD256];
27477 case BUILT_IN_RINTF:
27478 /* The round insn does not trap on denormals. */
27479 if (flag_trapping_math || !TARGET_ROUND)
27482 if (out_mode == SFmode && in_mode == SFmode)
27484 if (out_n == 4 && in_n == 4)
27485 return ix86_builtins[IX86_BUILTIN_RINTPS];
27486 else if (out_n == 8 && in_n == 8)
27487 return ix86_builtins[IX86_BUILTIN_RINTPS256];
27492 if (out_mode == DFmode && in_mode == DFmode)
27494 if (out_n == 2 && in_n == 2)
27495 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27496 if (out_n == 4 && in_n == 4)
27497 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27501 case BUILT_IN_FMAF:
27502 if (out_mode == SFmode && in_mode == SFmode)
27504 if (out_n == 4 && in_n == 4)
27505 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27506 if (out_n == 8 && in_n == 8)
27507 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27515 /* Dispatch to a handler for a vectorization library. */
27516 if (ix86_veclib_handler)
27517 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27523 /* Handler for an SVML-style interface to
27524 a library with vectorized intrinsics. */
27527 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27530 tree fntype, new_fndecl, args;
27533 enum machine_mode el_mode, in_mode;
27536 /* The SVML is suitable for unsafe math only. */
27537 if (!flag_unsafe_math_optimizations)
27540 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27541 n = TYPE_VECTOR_SUBPARTS (type_out);
27542 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27543 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27544 if (el_mode != in_mode
27552 case BUILT_IN_LOG10:
27554 case BUILT_IN_TANH:
27556 case BUILT_IN_ATAN:
27557 case BUILT_IN_ATAN2:
27558 case BUILT_IN_ATANH:
27559 case BUILT_IN_CBRT:
27560 case BUILT_IN_SINH:
27562 case BUILT_IN_ASINH:
27563 case BUILT_IN_ASIN:
27564 case BUILT_IN_COSH:
27566 case BUILT_IN_ACOSH:
27567 case BUILT_IN_ACOS:
27568 if (el_mode != DFmode || n != 2)
27572 case BUILT_IN_EXPF:
27573 case BUILT_IN_LOGF:
27574 case BUILT_IN_LOG10F:
27575 case BUILT_IN_POWF:
27576 case BUILT_IN_TANHF:
27577 case BUILT_IN_TANF:
27578 case BUILT_IN_ATANF:
27579 case BUILT_IN_ATAN2F:
27580 case BUILT_IN_ATANHF:
27581 case BUILT_IN_CBRTF:
27582 case BUILT_IN_SINHF:
27583 case BUILT_IN_SINF:
27584 case BUILT_IN_ASINHF:
27585 case BUILT_IN_ASINF:
27586 case BUILT_IN_COSHF:
27587 case BUILT_IN_COSF:
27588 case BUILT_IN_ACOSHF:
27589 case BUILT_IN_ACOSF:
27590 if (el_mode != SFmode || n != 4)
27598 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27600 if (fn == BUILT_IN_LOGF)
27601 strcpy (name, "vmlsLn4");
27602 else if (fn == BUILT_IN_LOG)
27603 strcpy (name, "vmldLn2");
27606 sprintf (name, "vmls%s", bname+10);
27607 name[strlen (name)-1] = '4';
27610 sprintf (name, "vmld%s2", bname+10);
27612 /* Convert to uppercase. */
27616 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27617 args = TREE_CHAIN (args))
27621 fntype = build_function_type_list (type_out, type_in, NULL);
27623 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27625 /* Build a function declaration for the vectorized function. */
27626 new_fndecl = build_decl (BUILTINS_LOCATION,
27627 FUNCTION_DECL, get_identifier (name), fntype);
27628 TREE_PUBLIC (new_fndecl) = 1;
27629 DECL_EXTERNAL (new_fndecl) = 1;
27630 DECL_IS_NOVOPS (new_fndecl) = 1;
27631 TREE_READONLY (new_fndecl) = 1;
27636 /* Handler for an ACML-style interface to
27637 a library with vectorized intrinsics. */
27640 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
27642 char name[20] = "__vr.._";
27643 tree fntype, new_fndecl, args;
27646 enum machine_mode el_mode, in_mode;
27649 /* The ACML is 64bits only and suitable for unsafe math only as
27650 it does not correctly support parts of IEEE with the required
27651 precision such as denormals. */
27653 || !flag_unsafe_math_optimizations)
27656 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27657 n = TYPE_VECTOR_SUBPARTS (type_out);
27658 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27659 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27660 if (el_mode != in_mode
27670 case BUILT_IN_LOG2:
27671 case BUILT_IN_LOG10:
27674 if (el_mode != DFmode
27679 case BUILT_IN_SINF:
27680 case BUILT_IN_COSF:
27681 case BUILT_IN_EXPF:
27682 case BUILT_IN_POWF:
27683 case BUILT_IN_LOGF:
27684 case BUILT_IN_LOG2F:
27685 case BUILT_IN_LOG10F:
27688 if (el_mode != SFmode
27697 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27698 sprintf (name + 7, "%s", bname+10);
27701 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27702 args = TREE_CHAIN (args))
27706 fntype = build_function_type_list (type_out, type_in, NULL);
27708 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27710 /* Build a function declaration for the vectorized function. */
27711 new_fndecl = build_decl (BUILTINS_LOCATION,
27712 FUNCTION_DECL, get_identifier (name), fntype);
27713 TREE_PUBLIC (new_fndecl) = 1;
27714 DECL_EXTERNAL (new_fndecl) = 1;
27715 DECL_IS_NOVOPS (new_fndecl) = 1;
27716 TREE_READONLY (new_fndecl) = 1;
27722 /* Returns a decl of a function that implements conversion of an integer vector
27723 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
27724 are the types involved when converting according to CODE.
27725 Return NULL_TREE if it is not available. */
27728 ix86_vectorize_builtin_conversion (unsigned int code,
27729 tree dest_type, tree src_type)
27737 switch (TYPE_MODE (src_type))
27740 switch (TYPE_MODE (dest_type))
27743 return (TYPE_UNSIGNED (src_type)
27744 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
27745 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
27747 return (TYPE_UNSIGNED (src_type)
27749 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
27755 switch (TYPE_MODE (dest_type))
27758 return (TYPE_UNSIGNED (src_type)
27760 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
27769 case FIX_TRUNC_EXPR:
27770 switch (TYPE_MODE (dest_type))
27773 switch (TYPE_MODE (src_type))
27776 return (TYPE_UNSIGNED (dest_type)
27778 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
27780 return (TYPE_UNSIGNED (dest_type)
27782 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
27789 switch (TYPE_MODE (src_type))
27792 return (TYPE_UNSIGNED (dest_type)
27794 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
27811 /* Returns a code for a target-specific builtin that implements
27812 reciprocal of the function, or NULL_TREE if not available. */
27815 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
27816 bool sqrt ATTRIBUTE_UNUSED)
27818 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
27819 && flag_finite_math_only && !flag_trapping_math
27820 && flag_unsafe_math_optimizations))
27824 /* Machine dependent builtins. */
27827 /* Vectorized version of sqrt to rsqrt conversion. */
27828 case IX86_BUILTIN_SQRTPS_NR:
27829 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
27831 case IX86_BUILTIN_SQRTPS_NR256:
27832 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
27838 /* Normal builtins. */
27841 /* Sqrt to rsqrt conversion. */
27842 case BUILT_IN_SQRTF:
27843 return ix86_builtins[IX86_BUILTIN_RSQRTF];
27850 /* Helper for avx_vpermilps256_operand et al. This is also used by
27851 the expansion functions to turn the parallel back into a mask.
27852 The return value is 0 for no match and the imm8+1 for a match. */
27855 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
27857 unsigned i, nelt = GET_MODE_NUNITS (mode);
27859 unsigned char ipar[8];
27861 if (XVECLEN (par, 0) != (int) nelt)
27864 /* Validate that all of the elements are constants, and not totally
27865 out of range. Copy the data into an integral array to make the
27866 subsequent checks easier. */
27867 for (i = 0; i < nelt; ++i)
27869 rtx er = XVECEXP (par, 0, i);
27870 unsigned HOST_WIDE_INT ei;
27872 if (!CONST_INT_P (er))
27883 /* In the 256-bit DFmode case, we can only move elements within
27885 for (i = 0; i < 2; ++i)
27889 mask |= ipar[i] << i;
27891 for (i = 2; i < 4; ++i)
27895 mask |= (ipar[i] - 2) << i;
27900 /* In the 256-bit SFmode case, we have full freedom of movement
27901 within the low 128-bit lane, but the high 128-bit lane must
27902 mirror the exact same pattern. */
27903 for (i = 0; i < 4; ++i)
27904 if (ipar[i] + 4 != ipar[i + 4])
27911 /* In the 128-bit case, we've full freedom in the placement of
27912 the elements from the source operand. */
27913 for (i = 0; i < nelt; ++i)
27914 mask |= ipar[i] << (i * (nelt / 2));
27918 gcc_unreachable ();
27921 /* Make sure success has a non-zero value by adding one. */
27925 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
27926 the expansion functions to turn the parallel back into a mask.
27927 The return value is 0 for no match and the imm8+1 for a match. */
27930 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
27932 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
27934 unsigned char ipar[8];
27936 if (XVECLEN (par, 0) != (int) nelt)
27939 /* Validate that all of the elements are constants, and not totally
27940 out of range. Copy the data into an integral array to make the
27941 subsequent checks easier. */
27942 for (i = 0; i < nelt; ++i)
27944 rtx er = XVECEXP (par, 0, i);
27945 unsigned HOST_WIDE_INT ei;
27947 if (!CONST_INT_P (er))
27950 if (ei >= 2 * nelt)
27955 /* Validate that the halves of the permute are halves. */
27956 for (i = 0; i < nelt2 - 1; ++i)
27957 if (ipar[i] + 1 != ipar[i + 1])
27959 for (i = nelt2; i < nelt - 1; ++i)
27960 if (ipar[i] + 1 != ipar[i + 1])
27963 /* Reconstruct the mask. */
27964 for (i = 0; i < 2; ++i)
27966 unsigned e = ipar[i * nelt2];
27970 mask |= e << (i * 4);
27973 /* Make sure success has a non-zero value by adding one. */
27978 /* Store OPERAND to the memory after reload is completed. This means
27979 that we can't easily use assign_stack_local. */
27981 ix86_force_to_memory (enum machine_mode mode, rtx operand)
27985 gcc_assert (reload_completed);
27986 if (ix86_using_red_zone ())
27988 result = gen_rtx_MEM (mode,
27989 gen_rtx_PLUS (Pmode,
27991 GEN_INT (-RED_ZONE_SIZE)));
27992 emit_move_insn (result, operand);
27994 else if (TARGET_64BIT)
28000 operand = gen_lowpart (DImode, operand);
28004 gen_rtx_SET (VOIDmode,
28005 gen_rtx_MEM (DImode,
28006 gen_rtx_PRE_DEC (DImode,
28007 stack_pointer_rtx)),
28011 gcc_unreachable ();
28013 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28022 split_double_mode (mode, &operand, 1, operands, operands + 1);
28024 gen_rtx_SET (VOIDmode,
28025 gen_rtx_MEM (SImode,
28026 gen_rtx_PRE_DEC (Pmode,
28027 stack_pointer_rtx)),
28030 gen_rtx_SET (VOIDmode,
28031 gen_rtx_MEM (SImode,
28032 gen_rtx_PRE_DEC (Pmode,
28033 stack_pointer_rtx)),
28038 /* Store HImodes as SImodes. */
28039 operand = gen_lowpart (SImode, operand);
28043 gen_rtx_SET (VOIDmode,
28044 gen_rtx_MEM (GET_MODE (operand),
28045 gen_rtx_PRE_DEC (SImode,
28046 stack_pointer_rtx)),
28050 gcc_unreachable ();
28052 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28057 /* Free operand from the memory. */
28059 ix86_free_from_memory (enum machine_mode mode)
28061 if (!ix86_using_red_zone ())
28065 if (mode == DImode || TARGET_64BIT)
28069 /* Use LEA to deallocate stack space. In peephole2 it will be converted
28070 to pop or add instruction if registers are available. */
28071 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
28072 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28077 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
28079 Put float CONST_DOUBLE in the constant pool instead of fp regs.
28080 QImode must go into class Q_REGS.
28081 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
28082 movdf to do mem-to-mem moves through integer regs. */
28085 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
28087 enum machine_mode mode = GET_MODE (x);
28089 /* We're only allowed to return a subclass of CLASS. Many of the
28090 following checks fail for NO_REGS, so eliminate that early. */
28091 if (regclass == NO_REGS)
28094 /* All classes can load zeros. */
28095 if (x == CONST0_RTX (mode))
28098 /* Force constants into memory if we are loading a (nonzero) constant into
28099 an MMX or SSE register. This is because there are no MMX/SSE instructions
28100 to load from a constant. */
28102 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
28105 /* Prefer SSE regs only, if we can use them for math. */
28106 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
28107 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
28109 /* Floating-point constants need more complex checks. */
28110 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
28112 /* General regs can load everything. */
28113 if (reg_class_subset_p (regclass, GENERAL_REGS))
28116 /* Floats can load 0 and 1 plus some others. Note that we eliminated
28117 zero above. We only want to wind up preferring 80387 registers if
28118 we plan on doing computation with them. */
28120 && standard_80387_constant_p (x) > 0)
28122 /* Limit class to non-sse. */
28123 if (regclass == FLOAT_SSE_REGS)
28125 if (regclass == FP_TOP_SSE_REGS)
28127 if (regclass == FP_SECOND_SSE_REGS)
28128 return FP_SECOND_REG;
28129 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
28136 /* Generally when we see PLUS here, it's the function invariant
28137 (plus soft-fp const_int). Which can only be computed into general
28139 if (GET_CODE (x) == PLUS)
28140 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
28142 /* QImode constants are easy to load, but non-constant QImode data
28143 must go into Q_REGS. */
28144 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
28146 if (reg_class_subset_p (regclass, Q_REGS))
28148 if (reg_class_subset_p (Q_REGS, regclass))
28156 /* Discourage putting floating-point values in SSE registers unless
28157 SSE math is being used, and likewise for the 387 registers. */
28159 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
28161 enum machine_mode mode = GET_MODE (x);
28163 /* Restrict the output reload class to the register bank that we are doing
28164 math on. If we would like not to return a subset of CLASS, reject this
28165 alternative: if reload cannot do this, it will still use its choice. */
28166 mode = GET_MODE (x);
28167 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
28168 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
28170 if (X87_FLOAT_MODE_P (mode))
28172 if (regclass == FP_TOP_SSE_REGS)
28174 else if (regclass == FP_SECOND_SSE_REGS)
28175 return FP_SECOND_REG;
28177 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
28184 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
28185 enum machine_mode mode,
28186 secondary_reload_info *sri ATTRIBUTE_UNUSED)
28188 /* QImode spills from non-QI registers require
28189 intermediate register on 32bit targets. */
28191 && !in_p && mode == QImode
28192 && (rclass == GENERAL_REGS
28193 || rclass == LEGACY_REGS
28194 || rclass == INDEX_REGS))
28203 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28204 regno = true_regnum (x);
28206 /* Return Q_REGS if the operand is in memory. */
28211 /* This condition handles corner case where an expression involving
28212 pointers gets vectorized. We're trying to use the address of a
28213 stack slot as a vector initializer.
28215 (set (reg:V2DI 74 [ vect_cst_.2 ])
28216 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
28218 Eventually frame gets turned into sp+offset like this:
28220 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28221 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28222 (const_int 392 [0x188]))))
28224 That later gets turned into:
28226 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28227 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28228 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
28230 We'll have the following reload recorded:
28232 Reload 0: reload_in (DI) =
28233 (plus:DI (reg/f:DI 7 sp)
28234 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
28235 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28236 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
28237 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
28238 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28239 reload_reg_rtx: (reg:V2DI 22 xmm1)
28241 Which isn't going to work since SSE instructions can't handle scalar
28242 additions. Returning GENERAL_REGS forces the addition into integer
28243 register and reload can handle subsequent reloads without problems. */
28245 if (in_p && GET_CODE (x) == PLUS
28246 && SSE_CLASS_P (rclass)
28247 && SCALAR_INT_MODE_P (mode))
28248 return GENERAL_REGS;
28253 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28256 ix86_class_likely_spilled_p (reg_class_t rclass)
28267 case SSE_FIRST_REG:
28269 case FP_SECOND_REG:
28279 /* If we are copying between general and FP registers, we need a memory
28280 location. The same is true for SSE and MMX registers.
28282 To optimize register_move_cost performance, allow inline variant.
28284 The macro can't work reliably when one of the CLASSES is class containing
28285 registers from multiple units (SSE, MMX, integer). We avoid this by never
28286 combining those units in single alternative in the machine description.
28287 Ensure that this constraint holds to avoid unexpected surprises.
28289 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28290 enforce these sanity checks. */
28293 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28294 enum machine_mode mode, int strict)
28296 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
28297 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
28298 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
28299 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
28300 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
28301 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
28303 gcc_assert (!strict);
28307 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
28310 /* ??? This is a lie. We do have moves between mmx/general, and for
28311 mmx/sse2. But by saying we need secondary memory we discourage the
28312 register allocator from using the mmx registers unless needed. */
28313 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
28316 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28318 /* SSE1 doesn't have any direct moves from other classes. */
28322 /* If the target says that inter-unit moves are more expensive
28323 than moving through memory, then don't generate them. */
28324 if (!TARGET_INTER_UNIT_MOVES)
28327 /* Between SSE and general, we have moves no larger than word size. */
28328 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
28336 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28337 enum machine_mode mode, int strict)
28339 return inline_secondary_memory_needed (class1, class2, mode, strict);
28342 /* Implement the TARGET_CLASS_MAX_NREGS hook.
28344 On the 80386, this is the size of MODE in words,
28345 except in the FP regs, where a single reg is always enough. */
28347 static unsigned char
28348 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
28350 if (MAYBE_INTEGER_CLASS_P (rclass))
28352 if (mode == XFmode)
28353 return (TARGET_64BIT ? 2 : 3);
28354 else if (mode == XCmode)
28355 return (TARGET_64BIT ? 4 : 6);
28357 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
28361 if (COMPLEX_MODE_P (mode))
28368 /* Return true if the registers in CLASS cannot represent the change from
28369 modes FROM to TO. */
28372 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
28373 enum reg_class regclass)
28378 /* x87 registers can't do subreg at all, as all values are reformatted
28379 to extended precision. */
28380 if (MAYBE_FLOAT_CLASS_P (regclass))
28383 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
28385 /* Vector registers do not support QI or HImode loads. If we don't
28386 disallow a change to these modes, reload will assume it's ok to
28387 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28388 the vec_dupv4hi pattern. */
28389 if (GET_MODE_SIZE (from) < 4)
28392 /* Vector registers do not support subreg with nonzero offsets, which
28393 are otherwise valid for integer registers. Since we can't see
28394 whether we have a nonzero offset from here, prohibit all
28395 nonparadoxical subregs changing size. */
28396 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
28403 /* Return the cost of moving data of mode M between a
28404 register and memory. A value of 2 is the default; this cost is
28405 relative to those in `REGISTER_MOVE_COST'.
28407 This function is used extensively by register_move_cost that is used to
28408 build tables at startup. Make it inline in this case.
28409 When IN is 2, return maximum of in and out move cost.
28411 If moving between registers and memory is more expensive than
28412 between two registers, you should define this macro to express the
28415 Model also increased moving costs of QImode registers in non
28419 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28423 if (FLOAT_CLASS_P (regclass))
28441 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28442 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28444 if (SSE_CLASS_P (regclass))
28447 switch (GET_MODE_SIZE (mode))
28462 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28463 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28465 if (MMX_CLASS_P (regclass))
28468 switch (GET_MODE_SIZE (mode))
28480 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28481 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28483 switch (GET_MODE_SIZE (mode))
28486 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28489 return ix86_cost->int_store[0];
28490 if (TARGET_PARTIAL_REG_DEPENDENCY
28491 && optimize_function_for_speed_p (cfun))
28492 cost = ix86_cost->movzbl_load;
28494 cost = ix86_cost->int_load[0];
28496 return MAX (cost, ix86_cost->int_store[0]);
28502 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28504 return ix86_cost->movzbl_load;
28506 return ix86_cost->int_store[0] + 4;
28511 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28512 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28514 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28515 if (mode == TFmode)
28518 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28520 cost = ix86_cost->int_load[2];
28522 cost = ix86_cost->int_store[2];
28523 return (cost * (((int) GET_MODE_SIZE (mode)
28524 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28529 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28532 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28536 /* Return the cost of moving data from a register in class CLASS1 to
28537 one in class CLASS2.
28539 It is not required that the cost always equal 2 when FROM is the same as TO;
28540 on some machines it is expensive to move between registers if they are not
28541 general registers. */
28544 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28545 reg_class_t class2_i)
28547 enum reg_class class1 = (enum reg_class) class1_i;
28548 enum reg_class class2 = (enum reg_class) class2_i;
28550 /* In case we require secondary memory, compute cost of the store followed
28551 by load. In order to avoid bad register allocation choices, we need
28552 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28554 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28558 cost += inline_memory_move_cost (mode, class1, 2);
28559 cost += inline_memory_move_cost (mode, class2, 2);
28561 /* In case of copying from general_purpose_register we may emit multiple
28562 stores followed by single load causing memory size mismatch stall.
28563 Count this as arbitrarily high cost of 20. */
28564 if (targetm.class_max_nregs (class1, mode)
28565 > targetm.class_max_nregs (class2, mode))
28568 /* In the case of FP/MMX moves, the registers actually overlap, and we
28569 have to switch modes in order to treat them differently. */
28570 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28571 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28577 /* Moves between SSE/MMX and integer unit are expensive. */
28578 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28579 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28581 /* ??? By keeping returned value relatively high, we limit the number
28582 of moves between integer and MMX/SSE registers for all targets.
28583 Additionally, high value prevents problem with x86_modes_tieable_p(),
28584 where integer modes in MMX/SSE registers are not tieable
28585 because of missing QImode and HImode moves to, from or between
28586 MMX/SSE registers. */
28587 return MAX (8, ix86_cost->mmxsse_to_integer);
28589 if (MAYBE_FLOAT_CLASS_P (class1))
28590 return ix86_cost->fp_move;
28591 if (MAYBE_SSE_CLASS_P (class1))
28592 return ix86_cost->sse_move;
28593 if (MAYBE_MMX_CLASS_P (class1))
28594 return ix86_cost->mmx_move;
28598 /* Return TRUE if hard register REGNO can hold a value of machine-mode
28602 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28604 /* Flags and only flags can only hold CCmode values. */
28605 if (CC_REGNO_P (regno))
28606 return GET_MODE_CLASS (mode) == MODE_CC;
28607 if (GET_MODE_CLASS (mode) == MODE_CC
28608 || GET_MODE_CLASS (mode) == MODE_RANDOM
28609 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28611 if (FP_REGNO_P (regno))
28612 return VALID_FP_MODE_P (mode);
28613 if (SSE_REGNO_P (regno))
28615 /* We implement the move patterns for all vector modes into and
28616 out of SSE registers, even when no operation instructions
28617 are available. OImode move is available only when AVX is
28619 return ((TARGET_AVX && mode == OImode)
28620 || VALID_AVX256_REG_MODE (mode)
28621 || VALID_SSE_REG_MODE (mode)
28622 || VALID_SSE2_REG_MODE (mode)
28623 || VALID_MMX_REG_MODE (mode)
28624 || VALID_MMX_REG_MODE_3DNOW (mode));
28626 if (MMX_REGNO_P (regno))
28628 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28629 so if the register is available at all, then we can move data of
28630 the given mode into or out of it. */
28631 return (VALID_MMX_REG_MODE (mode)
28632 || VALID_MMX_REG_MODE_3DNOW (mode));
28635 if (mode == QImode)
28637 /* Take care for QImode values - they can be in non-QI regs,
28638 but then they do cause partial register stalls. */
28639 if (regno <= BX_REG || TARGET_64BIT)
28641 if (!TARGET_PARTIAL_REG_STALL)
28643 return !can_create_pseudo_p ();
28645 /* We handle both integer and floats in the general purpose registers. */
28646 else if (VALID_INT_MODE_P (mode))
28648 else if (VALID_FP_MODE_P (mode))
28650 else if (VALID_DFP_MODE_P (mode))
28652 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
28653 on to use that value in smaller contexts, this can easily force a
28654 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
28655 supporting DImode, allow it. */
28656 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
28662 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
28663 tieable integer mode. */
28666 ix86_tieable_integer_mode_p (enum machine_mode mode)
28675 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
28678 return TARGET_64BIT;
28685 /* Return true if MODE1 is accessible in a register that can hold MODE2
28686 without copying. That is, all register classes that can hold MODE2
28687 can also hold MODE1. */
28690 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
28692 if (mode1 == mode2)
28695 if (ix86_tieable_integer_mode_p (mode1)
28696 && ix86_tieable_integer_mode_p (mode2))
28699 /* MODE2 being XFmode implies fp stack or general regs, which means we
28700 can tie any smaller floating point modes to it. Note that we do not
28701 tie this with TFmode. */
28702 if (mode2 == XFmode)
28703 return mode1 == SFmode || mode1 == DFmode;
28705 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
28706 that we can tie it with SFmode. */
28707 if (mode2 == DFmode)
28708 return mode1 == SFmode;
28710 /* If MODE2 is only appropriate for an SSE register, then tie with
28711 any other mode acceptable to SSE registers. */
28712 if (GET_MODE_SIZE (mode2) == 16
28713 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
28714 return (GET_MODE_SIZE (mode1) == 16
28715 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
28717 /* If MODE2 is appropriate for an MMX register, then tie
28718 with any other mode acceptable to MMX registers. */
28719 if (GET_MODE_SIZE (mode2) == 8
28720 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
28721 return (GET_MODE_SIZE (mode1) == 8
28722 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
28727 /* Compute a (partial) cost for rtx X. Return true if the complete
28728 cost has been computed, and false if subexpressions should be
28729 scanned. In either case, *TOTAL contains the cost result. */
28732 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
28734 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
28735 enum machine_mode mode = GET_MODE (x);
28736 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
28744 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
28746 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
28748 else if (flag_pic && SYMBOLIC_CONST (x)
28750 || (!GET_CODE (x) != LABEL_REF
28751 && (GET_CODE (x) != SYMBOL_REF
28752 || !SYMBOL_REF_LOCAL_P (x)))))
28759 if (mode == VOIDmode)
28762 switch (standard_80387_constant_p (x))
28767 default: /* Other constants */
28772 /* Start with (MEM (SYMBOL_REF)), since that's where
28773 it'll probably end up. Add a penalty for size. */
28774 *total = (COSTS_N_INSNS (1)
28775 + (flag_pic != 0 && !TARGET_64BIT)
28776 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
28782 /* The zero extensions is often completely free on x86_64, so make
28783 it as cheap as possible. */
28784 if (TARGET_64BIT && mode == DImode
28785 && GET_MODE (XEXP (x, 0)) == SImode)
28787 else if (TARGET_ZERO_EXTEND_WITH_AND)
28788 *total = cost->add;
28790 *total = cost->movzx;
28794 *total = cost->movsx;
28798 if (CONST_INT_P (XEXP (x, 1))
28799 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
28801 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28804 *total = cost->add;
28807 if ((value == 2 || value == 3)
28808 && cost->lea <= cost->shift_const)
28810 *total = cost->lea;
28820 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
28822 if (CONST_INT_P (XEXP (x, 1)))
28824 if (INTVAL (XEXP (x, 1)) > 32)
28825 *total = cost->shift_const + COSTS_N_INSNS (2);
28827 *total = cost->shift_const * 2;
28831 if (GET_CODE (XEXP (x, 1)) == AND)
28832 *total = cost->shift_var * 2;
28834 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
28839 if (CONST_INT_P (XEXP (x, 1)))
28840 *total = cost->shift_const;
28842 *total = cost->shift_var;
28850 gcc_assert (FLOAT_MODE_P (mode));
28851 gcc_assert (TARGET_FMA || TARGET_FMA4);
28853 /* ??? SSE scalar/vector cost should be used here. */
28854 /* ??? Bald assumption that fma has the same cost as fmul. */
28855 *total = cost->fmul;
28856 *total += rtx_cost (XEXP (x, 1), FMA, speed);
28858 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
28860 if (GET_CODE (sub) == NEG)
28861 sub = XEXP (sub, 0);
28862 *total += rtx_cost (sub, FMA, speed);
28865 if (GET_CODE (sub) == NEG)
28866 sub = XEXP (sub, 0);
28867 *total += rtx_cost (sub, FMA, speed);
28872 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28874 /* ??? SSE scalar cost should be used here. */
28875 *total = cost->fmul;
28878 else if (X87_FLOAT_MODE_P (mode))
28880 *total = cost->fmul;
28883 else if (FLOAT_MODE_P (mode))
28885 /* ??? SSE vector cost should be used here. */
28886 *total = cost->fmul;
28891 rtx op0 = XEXP (x, 0);
28892 rtx op1 = XEXP (x, 1);
28894 if (CONST_INT_P (XEXP (x, 1)))
28896 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28897 for (nbits = 0; value != 0; value &= value - 1)
28901 /* This is arbitrary. */
28904 /* Compute costs correctly for widening multiplication. */
28905 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
28906 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
28907 == GET_MODE_SIZE (mode))
28909 int is_mulwiden = 0;
28910 enum machine_mode inner_mode = GET_MODE (op0);
28912 if (GET_CODE (op0) == GET_CODE (op1))
28913 is_mulwiden = 1, op1 = XEXP (op1, 0);
28914 else if (CONST_INT_P (op1))
28916 if (GET_CODE (op0) == SIGN_EXTEND)
28917 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
28920 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
28924 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
28927 *total = (cost->mult_init[MODE_INDEX (mode)]
28928 + nbits * cost->mult_bit
28929 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
28938 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28939 /* ??? SSE cost should be used here. */
28940 *total = cost->fdiv;
28941 else if (X87_FLOAT_MODE_P (mode))
28942 *total = cost->fdiv;
28943 else if (FLOAT_MODE_P (mode))
28944 /* ??? SSE vector cost should be used here. */
28945 *total = cost->fdiv;
28947 *total = cost->divide[MODE_INDEX (mode)];
28951 if (GET_MODE_CLASS (mode) == MODE_INT
28952 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
28954 if (GET_CODE (XEXP (x, 0)) == PLUS
28955 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
28956 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
28957 && CONSTANT_P (XEXP (x, 1)))
28959 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
28960 if (val == 2 || val == 4 || val == 8)
28962 *total = cost->lea;
28963 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28964 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
28965 outer_code, speed);
28966 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28970 else if (GET_CODE (XEXP (x, 0)) == MULT
28971 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
28973 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
28974 if (val == 2 || val == 4 || val == 8)
28976 *total = cost->lea;
28977 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28978 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28982 else if (GET_CODE (XEXP (x, 0)) == PLUS)
28984 *total = cost->lea;
28985 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28986 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28987 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28994 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28996 /* ??? SSE cost should be used here. */
28997 *total = cost->fadd;
29000 else if (X87_FLOAT_MODE_P (mode))
29002 *total = cost->fadd;
29005 else if (FLOAT_MODE_P (mode))
29007 /* ??? SSE vector cost should be used here. */
29008 *total = cost->fadd;
29016 if (!TARGET_64BIT && mode == DImode)
29018 *total = (cost->add * 2
29019 + (rtx_cost (XEXP (x, 0), outer_code, speed)
29020 << (GET_MODE (XEXP (x, 0)) != DImode))
29021 + (rtx_cost (XEXP (x, 1), outer_code, speed)
29022 << (GET_MODE (XEXP (x, 1)) != DImode)));
29028 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29030 /* ??? SSE cost should be used here. */
29031 *total = cost->fchs;
29034 else if (X87_FLOAT_MODE_P (mode))
29036 *total = cost->fchs;
29039 else if (FLOAT_MODE_P (mode))
29041 /* ??? SSE vector cost should be used here. */
29042 *total = cost->fchs;
29048 if (!TARGET_64BIT && mode == DImode)
29049 *total = cost->add * 2;
29051 *total = cost->add;
29055 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
29056 && XEXP (XEXP (x, 0), 1) == const1_rtx
29057 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
29058 && XEXP (x, 1) == const0_rtx)
29060 /* This kind of construct is implemented using test[bwl].
29061 Treat it as if we had an AND. */
29062 *total = (cost->add
29063 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
29064 + rtx_cost (const1_rtx, outer_code, speed));
29070 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
29075 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29076 /* ??? SSE cost should be used here. */
29077 *total = cost->fabs;
29078 else if (X87_FLOAT_MODE_P (mode))
29079 *total = cost->fabs;
29080 else if (FLOAT_MODE_P (mode))
29081 /* ??? SSE vector cost should be used here. */
29082 *total = cost->fabs;
29086 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29087 /* ??? SSE cost should be used here. */
29088 *total = cost->fsqrt;
29089 else if (X87_FLOAT_MODE_P (mode))
29090 *total = cost->fsqrt;
29091 else if (FLOAT_MODE_P (mode))
29092 /* ??? SSE vector cost should be used here. */
29093 *total = cost->fsqrt;
29097 if (XINT (x, 1) == UNSPEC_TP)
29104 case VEC_DUPLICATE:
29105 /* ??? Assume all of these vector manipulation patterns are
29106 recognizable. In which case they all pretty much have the
29108 *total = COSTS_N_INSNS (1);
29118 static int current_machopic_label_num;
29120 /* Given a symbol name and its associated stub, write out the
29121 definition of the stub. */
29124 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29126 unsigned int length;
29127 char *binder_name, *symbol_name, lazy_ptr_name[32];
29128 int label = ++current_machopic_label_num;
29130 /* For 64-bit we shouldn't get here. */
29131 gcc_assert (!TARGET_64BIT);
29133 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29134 symb = targetm.strip_name_encoding (symb);
29136 length = strlen (stub);
29137 binder_name = XALLOCAVEC (char, length + 32);
29138 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
29140 length = strlen (symb);
29141 symbol_name = XALLOCAVEC (char, length + 32);
29142 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29144 sprintf (lazy_ptr_name, "L%d$lz", label);
29146 if (MACHOPIC_ATT_STUB)
29147 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
29148 else if (MACHOPIC_PURE)
29149 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
29151 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
29153 fprintf (file, "%s:\n", stub);
29154 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29156 if (MACHOPIC_ATT_STUB)
29158 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
29160 else if (MACHOPIC_PURE)
29163 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29164 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
29165 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
29166 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
29167 label, lazy_ptr_name, label);
29168 fprintf (file, "\tjmp\t*%%ecx\n");
29171 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
29173 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
29174 it needs no stub-binding-helper. */
29175 if (MACHOPIC_ATT_STUB)
29178 fprintf (file, "%s:\n", binder_name);
29182 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
29183 fprintf (file, "\tpushl\t%%ecx\n");
29186 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
29188 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
29190 /* N.B. Keep the correspondence of these
29191 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29192 old-pic/new-pic/non-pic stubs; altering this will break
29193 compatibility with existing dylibs. */
29196 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29197 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
29200 /* 16-byte -mdynamic-no-pic stub. */
29201 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
29203 fprintf (file, "%s:\n", lazy_ptr_name);
29204 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29205 fprintf (file, ASM_LONG "%s\n", binder_name);
29207 #endif /* TARGET_MACHO */
29209 /* Order the registers for register allocator. */
29212 x86_order_regs_for_local_alloc (void)
29217 /* First allocate the local general purpose registers. */
29218 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29219 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29220 reg_alloc_order [pos++] = i;
29222 /* Global general purpose registers. */
29223 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29224 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29225 reg_alloc_order [pos++] = i;
29227 /* x87 registers come first in case we are doing FP math
29229 if (!TARGET_SSE_MATH)
29230 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29231 reg_alloc_order [pos++] = i;
29233 /* SSE registers. */
29234 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29235 reg_alloc_order [pos++] = i;
29236 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29237 reg_alloc_order [pos++] = i;
29239 /* x87 registers. */
29240 if (TARGET_SSE_MATH)
29241 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29242 reg_alloc_order [pos++] = i;
29244 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29245 reg_alloc_order [pos++] = i;
29247 /* Initialize the rest of array as we do not allocate some registers
29249 while (pos < FIRST_PSEUDO_REGISTER)
29250 reg_alloc_order [pos++] = 0;
29253 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29254 in struct attribute_spec handler. */
29256 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
29258 int flags ATTRIBUTE_UNUSED,
29259 bool *no_add_attrs)
29261 if (TREE_CODE (*node) != FUNCTION_TYPE
29262 && TREE_CODE (*node) != METHOD_TYPE
29263 && TREE_CODE (*node) != FIELD_DECL
29264 && TREE_CODE (*node) != TYPE_DECL)
29266 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29268 *no_add_attrs = true;
29273 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
29275 *no_add_attrs = true;
29278 if (is_attribute_p ("callee_pop_aggregate_return", name))
29282 cst = TREE_VALUE (args);
29283 if (TREE_CODE (cst) != INTEGER_CST)
29285 warning (OPT_Wattributes,
29286 "%qE attribute requires an integer constant argument",
29288 *no_add_attrs = true;
29290 else if (compare_tree_int (cst, 0) != 0
29291 && compare_tree_int (cst, 1) != 0)
29293 warning (OPT_Wattributes,
29294 "argument to %qE attribute is neither zero, nor one",
29296 *no_add_attrs = true;
29305 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29306 struct attribute_spec.handler. */
29308 ix86_handle_abi_attribute (tree *node, tree name,
29309 tree args ATTRIBUTE_UNUSED,
29310 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29312 if (TREE_CODE (*node) != FUNCTION_TYPE
29313 && TREE_CODE (*node) != METHOD_TYPE
29314 && TREE_CODE (*node) != FIELD_DECL
29315 && TREE_CODE (*node) != TYPE_DECL)
29317 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29319 *no_add_attrs = true;
29324 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
29326 *no_add_attrs = true;
29330 /* Can combine regparm with all attributes but fastcall. */
29331 if (is_attribute_p ("ms_abi", name))
29333 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
29335 error ("ms_abi and sysv_abi attributes are not compatible");
29340 else if (is_attribute_p ("sysv_abi", name))
29342 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
29344 error ("ms_abi and sysv_abi attributes are not compatible");
29353 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29354 struct attribute_spec.handler. */
29356 ix86_handle_struct_attribute (tree *node, tree name,
29357 tree args ATTRIBUTE_UNUSED,
29358 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29361 if (DECL_P (*node))
29363 if (TREE_CODE (*node) == TYPE_DECL)
29364 type = &TREE_TYPE (*node);
29369 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29370 || TREE_CODE (*type) == UNION_TYPE)))
29372 warning (OPT_Wattributes, "%qE attribute ignored",
29374 *no_add_attrs = true;
29377 else if ((is_attribute_p ("ms_struct", name)
29378 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29379 || ((is_attribute_p ("gcc_struct", name)
29380 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29382 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29384 *no_add_attrs = true;
29391 ix86_handle_fndecl_attribute (tree *node, tree name,
29392 tree args ATTRIBUTE_UNUSED,
29393 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29395 if (TREE_CODE (*node) != FUNCTION_DECL)
29397 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29399 *no_add_attrs = true;
29405 ix86_ms_bitfield_layout_p (const_tree record_type)
29407 return ((TARGET_MS_BITFIELD_LAYOUT
29408 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29409 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
29412 /* Returns an expression indicating where the this parameter is
29413 located on entry to the FUNCTION. */
29416 x86_this_parameter (tree function)
29418 tree type = TREE_TYPE (function);
29419 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
29424 const int *parm_regs;
29426 if (ix86_function_type_abi (type) == MS_ABI)
29427 parm_regs = x86_64_ms_abi_int_parameter_registers;
29429 parm_regs = x86_64_int_parameter_registers;
29430 return gen_rtx_REG (DImode, parm_regs[aggr]);
29433 nregs = ix86_function_regparm (type, function);
29435 if (nregs > 0 && !stdarg_p (type))
29438 unsigned int ccvt = ix86_get_callcvt (type);
29440 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29441 regno = aggr ? DX_REG : CX_REG;
29442 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29446 return gen_rtx_MEM (SImode,
29447 plus_constant (stack_pointer_rtx, 4));
29456 return gen_rtx_MEM (SImode,
29457 plus_constant (stack_pointer_rtx, 4));
29460 return gen_rtx_REG (SImode, regno);
29463 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29466 /* Determine whether x86_output_mi_thunk can succeed. */
29469 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29470 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29471 HOST_WIDE_INT vcall_offset, const_tree function)
29473 /* 64-bit can handle anything. */
29477 /* For 32-bit, everything's fine if we have one free register. */
29478 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29481 /* Need a free register for vcall_offset. */
29485 /* Need a free register for GOT references. */
29486 if (flag_pic && !targetm.binds_local_p (function))
29489 /* Otherwise ok. */
29493 /* Output the assembler code for a thunk function. THUNK_DECL is the
29494 declaration for the thunk function itself, FUNCTION is the decl for
29495 the target function. DELTA is an immediate constant offset to be
29496 added to THIS. If VCALL_OFFSET is nonzero, the word at
29497 *(*this + vcall_offset) should be added to THIS. */
29500 x86_output_mi_thunk (FILE *file,
29501 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29502 HOST_WIDE_INT vcall_offset, tree function)
29504 rtx this_param = x86_this_parameter (function);
29505 rtx this_reg, tmp, fnaddr;
29507 emit_note (NOTE_INSN_PROLOGUE_END);
29509 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29510 pull it in now and let DELTA benefit. */
29511 if (REG_P (this_param))
29512 this_reg = this_param;
29513 else if (vcall_offset)
29515 /* Put the this parameter into %eax. */
29516 this_reg = gen_rtx_REG (Pmode, AX_REG);
29517 emit_move_insn (this_reg, this_param);
29520 this_reg = NULL_RTX;
29522 /* Adjust the this parameter by a fixed constant. */
29525 rtx delta_rtx = GEN_INT (delta);
29526 rtx delta_dst = this_reg ? this_reg : this_param;
29530 if (!x86_64_general_operand (delta_rtx, Pmode))
29532 tmp = gen_rtx_REG (Pmode, R10_REG);
29533 emit_move_insn (tmp, delta_rtx);
29538 emit_insn (ix86_gen_add3 (delta_dst, delta_dst, delta_rtx));
29541 /* Adjust the this parameter by a value stored in the vtable. */
29544 rtx vcall_addr, vcall_mem, this_mem;
29545 unsigned int tmp_regno;
29548 tmp_regno = R10_REG;
29551 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
29552 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
29553 tmp_regno = AX_REG;
29555 tmp_regno = CX_REG;
29557 tmp = gen_rtx_REG (Pmode, tmp_regno);
29559 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
29560 if (Pmode != ptr_mode)
29561 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
29562 emit_move_insn (tmp, this_mem);
29564 /* Adjust the this parameter. */
29565 vcall_addr = plus_constant (tmp, vcall_offset);
29567 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
29569 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
29570 emit_move_insn (tmp2, GEN_INT (vcall_offset));
29571 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
29574 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
29575 if (Pmode != ptr_mode)
29576 emit_insn (gen_addsi_1_zext (this_reg,
29577 gen_rtx_REG (ptr_mode,
29581 emit_insn (ix86_gen_add3 (this_reg, this_reg, vcall_mem));
29584 /* If necessary, drop THIS back to its stack slot. */
29585 if (this_reg && this_reg != this_param)
29586 emit_move_insn (this_param, this_reg);
29588 fnaddr = XEXP (DECL_RTL (function), 0);
29591 if (!flag_pic || targetm.binds_local_p (function)
29592 || cfun->machine->call_abi == MS_ABI)
29596 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
29597 tmp = gen_rtx_CONST (Pmode, tmp);
29598 fnaddr = gen_rtx_MEM (Pmode, tmp);
29603 if (!flag_pic || targetm.binds_local_p (function))
29606 else if (TARGET_MACHO)
29608 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
29609 fnaddr = XEXP (fnaddr, 0);
29611 #endif /* TARGET_MACHO */
29614 tmp = gen_rtx_REG (Pmode, CX_REG);
29615 output_set_got (tmp, NULL_RTX);
29617 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
29618 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
29619 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
29623 /* Our sibling call patterns do not allow memories, because we have no
29624 predicate that can distinguish between frame and non-frame memory.
29625 For our purposes here, we can get away with (ab)using a jump pattern,
29626 because we're going to do no optimization. */
29627 if (MEM_P (fnaddr))
29628 emit_jump_insn (gen_indirect_jump (fnaddr));
29631 tmp = gen_rtx_MEM (QImode, fnaddr);
29632 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
29633 tmp = emit_call_insn (tmp);
29634 SIBLING_CALL_P (tmp) = 1;
29638 /* Emit just enough of rest_of_compilation to get the insns emitted.
29639 Note that use_thunk calls assemble_start_function et al. */
29640 tmp = get_insns ();
29641 insn_locators_alloc ();
29642 shorten_branches (tmp);
29643 final_start_function (tmp, file, 1);
29644 final (tmp, file, 1);
29645 final_end_function ();
29649 x86_file_start (void)
29651 default_file_start ();
29653 darwin_file_start ();
29655 if (X86_FILE_START_VERSION_DIRECTIVE)
29656 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
29657 if (X86_FILE_START_FLTUSED)
29658 fputs ("\t.global\t__fltused\n", asm_out_file);
29659 if (ix86_asm_dialect == ASM_INTEL)
29660 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
29664 x86_field_alignment (tree field, int computed)
29666 enum machine_mode mode;
29667 tree type = TREE_TYPE (field);
29669 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
29671 mode = TYPE_MODE (strip_array_types (type));
29672 if (mode == DFmode || mode == DCmode
29673 || GET_MODE_CLASS (mode) == MODE_INT
29674 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
29675 return MIN (32, computed);
29679 /* Output assembler code to FILE to increment profiler label # LABELNO
29680 for profiling a function entry. */
29682 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
29684 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
29689 #ifndef NO_PROFILE_COUNTERS
29690 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
29693 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
29694 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
29696 fprintf (file, "\tcall\t%s\n", mcount_name);
29700 #ifndef NO_PROFILE_COUNTERS
29701 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
29704 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
29708 #ifndef NO_PROFILE_COUNTERS
29709 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
29712 fprintf (file, "\tcall\t%s\n", mcount_name);
29716 /* We don't have exact information about the insn sizes, but we may assume
29717 quite safely that we are informed about all 1 byte insns and memory
29718 address sizes. This is enough to eliminate unnecessary padding in
29722 min_insn_size (rtx insn)
29726 if (!INSN_P (insn) || !active_insn_p (insn))
29729 /* Discard alignments we've emit and jump instructions. */
29730 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
29731 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
29733 if (JUMP_TABLE_DATA_P (insn))
29736 /* Important case - calls are always 5 bytes.
29737 It is common to have many calls in the row. */
29739 && symbolic_reference_mentioned_p (PATTERN (insn))
29740 && !SIBLING_CALL_P (insn))
29742 len = get_attr_length (insn);
29746 /* For normal instructions we rely on get_attr_length being exact,
29747 with a few exceptions. */
29748 if (!JUMP_P (insn))
29750 enum attr_type type = get_attr_type (insn);
29755 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
29756 || asm_noperands (PATTERN (insn)) >= 0)
29763 /* Otherwise trust get_attr_length. */
29767 l = get_attr_length_address (insn);
29768 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
29777 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29779 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
29783 ix86_avoid_jump_mispredicts (void)
29785 rtx insn, start = get_insns ();
29786 int nbytes = 0, njumps = 0;
29789 /* Look for all minimal intervals of instructions containing 4 jumps.
29790 The intervals are bounded by START and INSN. NBYTES is the total
29791 size of instructions in the interval including INSN and not including
29792 START. When the NBYTES is smaller than 16 bytes, it is possible
29793 that the end of START and INSN ends up in the same 16byte page.
29795 The smallest offset in the page INSN can start is the case where START
29796 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
29797 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
29799 for (insn = start; insn; insn = NEXT_INSN (insn))
29803 if (LABEL_P (insn))
29805 int align = label_to_alignment (insn);
29806 int max_skip = label_to_max_skip (insn);
29810 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
29811 already in the current 16 byte page, because otherwise
29812 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
29813 bytes to reach 16 byte boundary. */
29815 || (align <= 3 && max_skip != (1 << align) - 1))
29818 fprintf (dump_file, "Label %i with max_skip %i\n",
29819 INSN_UID (insn), max_skip);
29822 while (nbytes + max_skip >= 16)
29824 start = NEXT_INSN (start);
29825 if ((JUMP_P (start)
29826 && GET_CODE (PATTERN (start)) != ADDR_VEC
29827 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29829 njumps--, isjump = 1;
29832 nbytes -= min_insn_size (start);
29838 min_size = min_insn_size (insn);
29839 nbytes += min_size;
29841 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
29842 INSN_UID (insn), min_size);
29844 && GET_CODE (PATTERN (insn)) != ADDR_VEC
29845 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
29853 start = NEXT_INSN (start);
29854 if ((JUMP_P (start)
29855 && GET_CODE (PATTERN (start)) != ADDR_VEC
29856 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29858 njumps--, isjump = 1;
29861 nbytes -= min_insn_size (start);
29863 gcc_assert (njumps >= 0);
29865 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
29866 INSN_UID (start), INSN_UID (insn), nbytes);
29868 if (njumps == 3 && isjump && nbytes < 16)
29870 int padsize = 15 - nbytes + min_insn_size (insn);
29873 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
29874 INSN_UID (insn), padsize);
29875 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
29881 /* AMD Athlon works faster
29882 when RET is not destination of conditional jump or directly preceded
29883 by other jump instruction. We avoid the penalty by inserting NOP just
29884 before the RET instructions in such cases. */
29886 ix86_pad_returns (void)
29891 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29893 basic_block bb = e->src;
29894 rtx ret = BB_END (bb);
29896 bool replace = false;
29898 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
29899 || optimize_bb_for_size_p (bb))
29901 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
29902 if (active_insn_p (prev) || LABEL_P (prev))
29904 if (prev && LABEL_P (prev))
29909 FOR_EACH_EDGE (e, ei, bb->preds)
29910 if (EDGE_FREQUENCY (e) && e->src->index >= 0
29911 && !(e->flags & EDGE_FALLTHRU))
29916 prev = prev_active_insn (ret);
29918 && ((JUMP_P (prev) && any_condjump_p (prev))
29921 /* Empty functions get branch mispredict even when
29922 the jump destination is not visible to us. */
29923 if (!prev && !optimize_function_for_size_p (cfun))
29928 emit_jump_insn_before (gen_return_internal_long (), ret);
29934 /* Count the minimum number of instructions in BB. Return 4 if the
29935 number of instructions >= 4. */
29938 ix86_count_insn_bb (basic_block bb)
29941 int insn_count = 0;
29943 /* Count number of instructions in this block. Return 4 if the number
29944 of instructions >= 4. */
29945 FOR_BB_INSNS (bb, insn)
29947 /* Only happen in exit blocks. */
29949 && GET_CODE (PATTERN (insn)) == RETURN)
29952 if (NONDEBUG_INSN_P (insn)
29953 && GET_CODE (PATTERN (insn)) != USE
29954 && GET_CODE (PATTERN (insn)) != CLOBBER)
29957 if (insn_count >= 4)
29966 /* Count the minimum number of instructions in code path in BB.
29967 Return 4 if the number of instructions >= 4. */
29970 ix86_count_insn (basic_block bb)
29974 int min_prev_count;
29976 /* Only bother counting instructions along paths with no
29977 more than 2 basic blocks between entry and exit. Given
29978 that BB has an edge to exit, determine if a predecessor
29979 of BB has an edge from entry. If so, compute the number
29980 of instructions in the predecessor block. If there
29981 happen to be multiple such blocks, compute the minimum. */
29982 min_prev_count = 4;
29983 FOR_EACH_EDGE (e, ei, bb->preds)
29986 edge_iterator prev_ei;
29988 if (e->src == ENTRY_BLOCK_PTR)
29990 min_prev_count = 0;
29993 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
29995 if (prev_e->src == ENTRY_BLOCK_PTR)
29997 int count = ix86_count_insn_bb (e->src);
29998 if (count < min_prev_count)
29999 min_prev_count = count;
30005 if (min_prev_count < 4)
30006 min_prev_count += ix86_count_insn_bb (bb);
30008 return min_prev_count;
30011 /* Pad short funtion to 4 instructions. */
30014 ix86_pad_short_function (void)
30019 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30021 rtx ret = BB_END (e->src);
30022 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
30024 int insn_count = ix86_count_insn (e->src);
30026 /* Pad short function. */
30027 if (insn_count < 4)
30031 /* Find epilogue. */
30034 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
30035 insn = PREV_INSN (insn);
30040 /* Two NOPs count as one instruction. */
30041 insn_count = 2 * (4 - insn_count);
30042 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
30048 /* Implement machine specific optimizations. We implement padding of returns
30049 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
30053 /* We are freeing block_for_insn in the toplev to keep compatibility
30054 with old MDEP_REORGS that are not CFG based. Recompute it now. */
30055 compute_bb_for_insn ();
30057 /* Run the vzeroupper optimization if needed. */
30058 if (TARGET_VZEROUPPER)
30059 move_or_delete_vzeroupper ();
30061 if (optimize && optimize_function_for_speed_p (cfun))
30063 if (TARGET_PAD_SHORT_FUNCTION)
30064 ix86_pad_short_function ();
30065 else if (TARGET_PAD_RETURNS)
30066 ix86_pad_returns ();
30067 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30068 if (TARGET_FOUR_JUMP_LIMIT)
30069 ix86_avoid_jump_mispredicts ();
30074 /* Return nonzero when QImode register that must be represented via REX prefix
30077 x86_extended_QIreg_mentioned_p (rtx insn)
30080 extract_insn_cached (insn);
30081 for (i = 0; i < recog_data.n_operands; i++)
30082 if (REG_P (recog_data.operand[i])
30083 && REGNO (recog_data.operand[i]) > BX_REG)
30088 /* Return nonzero when P points to register encoded via REX prefix.
30089 Called via for_each_rtx. */
30091 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
30093 unsigned int regno;
30096 regno = REGNO (*p);
30097 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
30100 /* Return true when INSN mentions register that must be encoded using REX
30103 x86_extended_reg_mentioned_p (rtx insn)
30105 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
30106 extended_reg_mentioned_1, NULL);
30109 /* If profitable, negate (without causing overflow) integer constant
30110 of mode MODE at location LOC. Return true in this case. */
30112 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
30116 if (!CONST_INT_P (*loc))
30122 /* DImode x86_64 constants must fit in 32 bits. */
30123 gcc_assert (x86_64_immediate_operand (*loc, mode));
30134 gcc_unreachable ();
30137 /* Avoid overflows. */
30138 if (mode_signbit_p (mode, *loc))
30141 val = INTVAL (*loc);
30143 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
30144 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
30145 if ((val < 0 && val != -128)
30148 *loc = GEN_INT (-val);
30155 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
30156 optabs would emit if we didn't have TFmode patterns. */
30159 x86_emit_floatuns (rtx operands[2])
30161 rtx neglab, donelab, i0, i1, f0, in, out;
30162 enum machine_mode mode, inmode;
30164 inmode = GET_MODE (operands[1]);
30165 gcc_assert (inmode == SImode || inmode == DImode);
30168 in = force_reg (inmode, operands[1]);
30169 mode = GET_MODE (out);
30170 neglab = gen_label_rtx ();
30171 donelab = gen_label_rtx ();
30172 f0 = gen_reg_rtx (mode);
30174 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
30176 expand_float (out, in, 0);
30178 emit_jump_insn (gen_jump (donelab));
30181 emit_label (neglab);
30183 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
30185 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
30187 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
30189 expand_float (f0, i0, 0);
30191 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
30193 emit_label (donelab);
30196 /* AVX does not support 32-byte integer vector operations,
30197 thus the longest vector we are faced with is V16QImode. */
30198 #define MAX_VECT_LEN 16
30200 struct expand_vec_perm_d
30202 rtx target, op0, op1;
30203 unsigned char perm[MAX_VECT_LEN];
30204 enum machine_mode vmode;
30205 unsigned char nelt;
30209 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
30210 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
30212 /* Get a vector mode of the same size as the original but with elements
30213 twice as wide. This is only guaranteed to apply to integral vectors. */
30215 static inline enum machine_mode
30216 get_mode_wider_vector (enum machine_mode o)
30218 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30219 enum machine_mode n = GET_MODE_WIDER_MODE (o);
30220 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
30221 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
30225 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30226 with all elements equal to VAR. Return true if successful. */
30229 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
30230 rtx target, rtx val)
30253 /* First attempt to recognize VAL as-is. */
30254 dup = gen_rtx_VEC_DUPLICATE (mode, val);
30255 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30256 if (recog_memoized (insn) < 0)
30259 /* If that fails, force VAL into a register. */
30262 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30263 seq = get_insns ();
30266 emit_insn_before (seq, insn);
30268 ok = recog_memoized (insn) >= 0;
30277 if (TARGET_SSE || TARGET_3DNOW_A)
30281 val = gen_lowpart (SImode, val);
30282 x = gen_rtx_TRUNCATE (HImode, val);
30283 x = gen_rtx_VEC_DUPLICATE (mode, x);
30284 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30297 struct expand_vec_perm_d dperm;
30301 memset (&dperm, 0, sizeof (dperm));
30302 dperm.target = target;
30303 dperm.vmode = mode;
30304 dperm.nelt = GET_MODE_NUNITS (mode);
30305 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
30307 /* Extend to SImode using a paradoxical SUBREG. */
30308 tmp1 = gen_reg_rtx (SImode);
30309 emit_move_insn (tmp1, gen_lowpart (SImode, val));
30311 /* Insert the SImode value as low element of a V4SImode vector. */
30312 tmp2 = gen_lowpart (V4SImode, dperm.op0);
30313 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
30315 ok = (expand_vec_perm_1 (&dperm)
30316 || expand_vec_perm_broadcast_1 (&dperm));
30328 /* Replicate the value once into the next wider mode and recurse. */
30330 enum machine_mode smode, wsmode, wvmode;
30333 smode = GET_MODE_INNER (mode);
30334 wvmode = get_mode_wider_vector (mode);
30335 wsmode = GET_MODE_INNER (wvmode);
30337 val = convert_modes (wsmode, smode, val, true);
30338 x = expand_simple_binop (wsmode, ASHIFT, val,
30339 GEN_INT (GET_MODE_BITSIZE (smode)),
30340 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30341 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
30343 x = gen_lowpart (wvmode, target);
30344 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
30352 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
30353 rtx x = gen_reg_rtx (hvmode);
30355 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
30358 x = gen_rtx_VEC_CONCAT (mode, x, x);
30359 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30368 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30369 whose ONE_VAR element is VAR, and other elements are zero. Return true
30373 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
30374 rtx target, rtx var, int one_var)
30376 enum machine_mode vsimode;
30379 bool use_vector_set = false;
30384 /* For SSE4.1, we normally use vector set. But if the second
30385 element is zero and inter-unit moves are OK, we use movq
30387 use_vector_set = (TARGET_64BIT
30389 && !(TARGET_INTER_UNIT_MOVES
30395 use_vector_set = TARGET_SSE4_1;
30398 use_vector_set = TARGET_SSE2;
30401 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
30408 use_vector_set = TARGET_AVX;
30411 /* Use ix86_expand_vector_set in 64bit mode only. */
30412 use_vector_set = TARGET_AVX && TARGET_64BIT;
30418 if (use_vector_set)
30420 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
30421 var = force_reg (GET_MODE_INNER (mode), var);
30422 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30438 var = force_reg (GET_MODE_INNER (mode), var);
30439 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
30440 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30445 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
30446 new_target = gen_reg_rtx (mode);
30448 new_target = target;
30449 var = force_reg (GET_MODE_INNER (mode), var);
30450 x = gen_rtx_VEC_DUPLICATE (mode, var);
30451 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
30452 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
30455 /* We need to shuffle the value to the correct position, so
30456 create a new pseudo to store the intermediate result. */
30458 /* With SSE2, we can use the integer shuffle insns. */
30459 if (mode != V4SFmode && TARGET_SSE2)
30461 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
30463 GEN_INT (one_var == 1 ? 0 : 1),
30464 GEN_INT (one_var == 2 ? 0 : 1),
30465 GEN_INT (one_var == 3 ? 0 : 1)));
30466 if (target != new_target)
30467 emit_move_insn (target, new_target);
30471 /* Otherwise convert the intermediate result to V4SFmode and
30472 use the SSE1 shuffle instructions. */
30473 if (mode != V4SFmode)
30475 tmp = gen_reg_rtx (V4SFmode);
30476 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30481 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30483 GEN_INT (one_var == 1 ? 0 : 1),
30484 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30485 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30487 if (mode != V4SFmode)
30488 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30489 else if (tmp != target)
30490 emit_move_insn (target, tmp);
30492 else if (target != new_target)
30493 emit_move_insn (target, new_target);
30498 vsimode = V4SImode;
30504 vsimode = V2SImode;
30510 /* Zero extend the variable element to SImode and recurse. */
30511 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30513 x = gen_reg_rtx (vsimode);
30514 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30516 gcc_unreachable ();
30518 emit_move_insn (target, gen_lowpart (mode, x));
30526 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30527 consisting of the values in VALS. It is known that all elements
30528 except ONE_VAR are constants. Return true if successful. */
30531 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30532 rtx target, rtx vals, int one_var)
30534 rtx var = XVECEXP (vals, 0, one_var);
30535 enum machine_mode wmode;
30538 const_vec = copy_rtx (vals);
30539 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30540 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30548 /* For the two element vectors, it's just as easy to use
30549 the general case. */
30553 /* Use ix86_expand_vector_set in 64bit mode only. */
30576 /* There's no way to set one QImode entry easily. Combine
30577 the variable value with its adjacent constant value, and
30578 promote to an HImode set. */
30579 x = XVECEXP (vals, 0, one_var ^ 1);
30582 var = convert_modes (HImode, QImode, var, true);
30583 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30584 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30585 x = GEN_INT (INTVAL (x) & 0xff);
30589 var = convert_modes (HImode, QImode, var, true);
30590 x = gen_int_mode (INTVAL (x) << 8, HImode);
30592 if (x != const0_rtx)
30593 var = expand_simple_binop (HImode, IOR, var, x, var,
30594 1, OPTAB_LIB_WIDEN);
30596 x = gen_reg_rtx (wmode);
30597 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30598 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30600 emit_move_insn (target, gen_lowpart (mode, x));
30607 emit_move_insn (target, const_vec);
30608 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30612 /* A subroutine of ix86_expand_vector_init_general. Use vector
30613 concatenate to handle the most general case: all values variable,
30614 and none identical. */
30617 ix86_expand_vector_init_concat (enum machine_mode mode,
30618 rtx target, rtx *ops, int n)
30620 enum machine_mode cmode, hmode = VOIDmode;
30621 rtx first[8], second[4];
30661 gcc_unreachable ();
30664 if (!register_operand (ops[1], cmode))
30665 ops[1] = force_reg (cmode, ops[1]);
30666 if (!register_operand (ops[0], cmode))
30667 ops[0] = force_reg (cmode, ops[0]);
30668 emit_insn (gen_rtx_SET (VOIDmode, target,
30669 gen_rtx_VEC_CONCAT (mode, ops[0],
30689 gcc_unreachable ();
30705 gcc_unreachable ();
30710 /* FIXME: We process inputs backward to help RA. PR 36222. */
30713 for (; i > 0; i -= 2, j--)
30715 first[j] = gen_reg_rtx (cmode);
30716 v = gen_rtvec (2, ops[i - 1], ops[i]);
30717 ix86_expand_vector_init (false, first[j],
30718 gen_rtx_PARALLEL (cmode, v));
30724 gcc_assert (hmode != VOIDmode);
30725 for (i = j = 0; i < n; i += 2, j++)
30727 second[j] = gen_reg_rtx (hmode);
30728 ix86_expand_vector_init_concat (hmode, second [j],
30732 ix86_expand_vector_init_concat (mode, target, second, n);
30735 ix86_expand_vector_init_concat (mode, target, first, n);
30739 gcc_unreachable ();
30743 /* A subroutine of ix86_expand_vector_init_general. Use vector
30744 interleave to handle the most general case: all values variable,
30745 and none identical. */
30748 ix86_expand_vector_init_interleave (enum machine_mode mode,
30749 rtx target, rtx *ops, int n)
30751 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
30754 rtx (*gen_load_even) (rtx, rtx, rtx);
30755 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
30756 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
30761 gen_load_even = gen_vec_setv8hi;
30762 gen_interleave_first_low = gen_vec_interleave_lowv4si;
30763 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30764 inner_mode = HImode;
30765 first_imode = V4SImode;
30766 second_imode = V2DImode;
30767 third_imode = VOIDmode;
30770 gen_load_even = gen_vec_setv16qi;
30771 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
30772 gen_interleave_second_low = gen_vec_interleave_lowv4si;
30773 inner_mode = QImode;
30774 first_imode = V8HImode;
30775 second_imode = V4SImode;
30776 third_imode = V2DImode;
30779 gcc_unreachable ();
30782 for (i = 0; i < n; i++)
30784 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
30785 op0 = gen_reg_rtx (SImode);
30786 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
30788 /* Insert the SImode value as low element of V4SImode vector. */
30789 op1 = gen_reg_rtx (V4SImode);
30790 op0 = gen_rtx_VEC_MERGE (V4SImode,
30791 gen_rtx_VEC_DUPLICATE (V4SImode,
30793 CONST0_RTX (V4SImode),
30795 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
30797 /* Cast the V4SImode vector back to a vector in orignal mode. */
30798 op0 = gen_reg_rtx (mode);
30799 emit_move_insn (op0, gen_lowpart (mode, op1));
30801 /* Load even elements into the second positon. */
30802 emit_insn (gen_load_even (op0,
30803 force_reg (inner_mode,
30807 /* Cast vector to FIRST_IMODE vector. */
30808 ops[i] = gen_reg_rtx (first_imode);
30809 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
30812 /* Interleave low FIRST_IMODE vectors. */
30813 for (i = j = 0; i < n; i += 2, j++)
30815 op0 = gen_reg_rtx (first_imode);
30816 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
30818 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
30819 ops[j] = gen_reg_rtx (second_imode);
30820 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
30823 /* Interleave low SECOND_IMODE vectors. */
30824 switch (second_imode)
30827 for (i = j = 0; i < n / 2; i += 2, j++)
30829 op0 = gen_reg_rtx (second_imode);
30830 emit_insn (gen_interleave_second_low (op0, ops[i],
30833 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
30835 ops[j] = gen_reg_rtx (third_imode);
30836 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
30838 second_imode = V2DImode;
30839 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30843 op0 = gen_reg_rtx (second_imode);
30844 emit_insn (gen_interleave_second_low (op0, ops[0],
30847 /* Cast the SECOND_IMODE vector back to a vector on original
30849 emit_insn (gen_rtx_SET (VOIDmode, target,
30850 gen_lowpart (mode, op0)));
30854 gcc_unreachable ();
30858 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
30859 all values variable, and none identical. */
30862 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
30863 rtx target, rtx vals)
30865 rtx ops[32], op0, op1;
30866 enum machine_mode half_mode = VOIDmode;
30873 if (!mmx_ok && !TARGET_SSE)
30885 n = GET_MODE_NUNITS (mode);
30886 for (i = 0; i < n; i++)
30887 ops[i] = XVECEXP (vals, 0, i);
30888 ix86_expand_vector_init_concat (mode, target, ops, n);
30892 half_mode = V16QImode;
30896 half_mode = V8HImode;
30900 n = GET_MODE_NUNITS (mode);
30901 for (i = 0; i < n; i++)
30902 ops[i] = XVECEXP (vals, 0, i);
30903 op0 = gen_reg_rtx (half_mode);
30904 op1 = gen_reg_rtx (half_mode);
30905 ix86_expand_vector_init_interleave (half_mode, op0, ops,
30907 ix86_expand_vector_init_interleave (half_mode, op1,
30908 &ops [n >> 1], n >> 2);
30909 emit_insn (gen_rtx_SET (VOIDmode, target,
30910 gen_rtx_VEC_CONCAT (mode, op0, op1)));
30914 if (!TARGET_SSE4_1)
30922 /* Don't use ix86_expand_vector_init_interleave if we can't
30923 move from GPR to SSE register directly. */
30924 if (!TARGET_INTER_UNIT_MOVES)
30927 n = GET_MODE_NUNITS (mode);
30928 for (i = 0; i < n; i++)
30929 ops[i] = XVECEXP (vals, 0, i);
30930 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
30938 gcc_unreachable ();
30942 int i, j, n_elts, n_words, n_elt_per_word;
30943 enum machine_mode inner_mode;
30944 rtx words[4], shift;
30946 inner_mode = GET_MODE_INNER (mode);
30947 n_elts = GET_MODE_NUNITS (mode);
30948 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
30949 n_elt_per_word = n_elts / n_words;
30950 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
30952 for (i = 0; i < n_words; ++i)
30954 rtx word = NULL_RTX;
30956 for (j = 0; j < n_elt_per_word; ++j)
30958 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
30959 elt = convert_modes (word_mode, inner_mode, elt, true);
30965 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
30966 word, 1, OPTAB_LIB_WIDEN);
30967 word = expand_simple_binop (word_mode, IOR, word, elt,
30968 word, 1, OPTAB_LIB_WIDEN);
30976 emit_move_insn (target, gen_lowpart (mode, words[0]));
30977 else if (n_words == 2)
30979 rtx tmp = gen_reg_rtx (mode);
30980 emit_clobber (tmp);
30981 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
30982 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
30983 emit_move_insn (target, tmp);
30985 else if (n_words == 4)
30987 rtx tmp = gen_reg_rtx (V4SImode);
30988 gcc_assert (word_mode == SImode);
30989 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
30990 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
30991 emit_move_insn (target, gen_lowpart (mode, tmp));
30994 gcc_unreachable ();
30998 /* Initialize vector TARGET via VALS. Suppress the use of MMX
30999 instructions unless MMX_OK is true. */
31002 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
31004 enum machine_mode mode = GET_MODE (target);
31005 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31006 int n_elts = GET_MODE_NUNITS (mode);
31007 int n_var = 0, one_var = -1;
31008 bool all_same = true, all_const_zero = true;
31012 for (i = 0; i < n_elts; ++i)
31014 x = XVECEXP (vals, 0, i);
31015 if (!(CONST_INT_P (x)
31016 || GET_CODE (x) == CONST_DOUBLE
31017 || GET_CODE (x) == CONST_FIXED))
31018 n_var++, one_var = i;
31019 else if (x != CONST0_RTX (inner_mode))
31020 all_const_zero = false;
31021 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
31025 /* Constants are best loaded from the constant pool. */
31028 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
31032 /* If all values are identical, broadcast the value. */
31034 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
31035 XVECEXP (vals, 0, 0)))
31038 /* Values where only one field is non-constant are best loaded from
31039 the pool and overwritten via move later. */
31043 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
31044 XVECEXP (vals, 0, one_var),
31048 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
31052 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
31056 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
31058 enum machine_mode mode = GET_MODE (target);
31059 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31060 enum machine_mode half_mode;
31061 bool use_vec_merge = false;
31063 static rtx (*gen_extract[6][2]) (rtx, rtx)
31065 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
31066 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
31067 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
31068 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
31069 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
31070 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
31072 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
31074 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
31075 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
31076 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
31077 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
31078 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
31079 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
31089 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31090 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
31092 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31094 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31095 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31101 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
31105 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31106 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
31108 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31110 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31111 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31118 /* For the two element vectors, we implement a VEC_CONCAT with
31119 the extraction of the other element. */
31121 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
31122 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
31125 op0 = val, op1 = tmp;
31127 op0 = tmp, op1 = val;
31129 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
31130 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31135 use_vec_merge = TARGET_SSE4_1;
31142 use_vec_merge = true;
31146 /* tmp = target = A B C D */
31147 tmp = copy_to_reg (target);
31148 /* target = A A B B */
31149 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
31150 /* target = X A B B */
31151 ix86_expand_vector_set (false, target, val, 0);
31152 /* target = A X C D */
31153 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31154 const1_rtx, const0_rtx,
31155 GEN_INT (2+4), GEN_INT (3+4)));
31159 /* tmp = target = A B C D */
31160 tmp = copy_to_reg (target);
31161 /* tmp = X B C D */
31162 ix86_expand_vector_set (false, tmp, val, 0);
31163 /* target = A B X D */
31164 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31165 const0_rtx, const1_rtx,
31166 GEN_INT (0+4), GEN_INT (3+4)));
31170 /* tmp = target = A B C D */
31171 tmp = copy_to_reg (target);
31172 /* tmp = X B C D */
31173 ix86_expand_vector_set (false, tmp, val, 0);
31174 /* target = A B X D */
31175 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31176 const0_rtx, const1_rtx,
31177 GEN_INT (2+4), GEN_INT (0+4)));
31181 gcc_unreachable ();
31186 use_vec_merge = TARGET_SSE4_1;
31190 /* Element 0 handled by vec_merge below. */
31193 use_vec_merge = true;
31199 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31200 store into element 0, then shuffle them back. */
31204 order[0] = GEN_INT (elt);
31205 order[1] = const1_rtx;
31206 order[2] = const2_rtx;
31207 order[3] = GEN_INT (3);
31208 order[elt] = const0_rtx;
31210 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31211 order[1], order[2], order[3]));
31213 ix86_expand_vector_set (false, target, val, 0);
31215 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31216 order[1], order[2], order[3]));
31220 /* For SSE1, we have to reuse the V4SF code. */
31221 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
31222 gen_lowpart (SFmode, val), elt);
31227 use_vec_merge = TARGET_SSE2;
31230 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31234 use_vec_merge = TARGET_SSE4_1;
31241 half_mode = V16QImode;
31247 half_mode = V8HImode;
31253 half_mode = V4SImode;
31259 half_mode = V2DImode;
31265 half_mode = V4SFmode;
31271 half_mode = V2DFmode;
31277 /* Compute offset. */
31281 gcc_assert (i <= 1);
31283 /* Extract the half. */
31284 tmp = gen_reg_rtx (half_mode);
31285 emit_insn (gen_extract[j][i] (tmp, target));
31287 /* Put val in tmp at elt. */
31288 ix86_expand_vector_set (false, tmp, val, elt);
31291 emit_insn (gen_insert[j][i] (target, target, tmp));
31300 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
31301 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
31302 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31306 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31308 emit_move_insn (mem, target);
31310 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31311 emit_move_insn (tmp, val);
31313 emit_move_insn (target, mem);
31318 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
31320 enum machine_mode mode = GET_MODE (vec);
31321 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31322 bool use_vec_extr = false;
31335 use_vec_extr = true;
31339 use_vec_extr = TARGET_SSE4_1;
31351 tmp = gen_reg_rtx (mode);
31352 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
31353 GEN_INT (elt), GEN_INT (elt),
31354 GEN_INT (elt+4), GEN_INT (elt+4)));
31358 tmp = gen_reg_rtx (mode);
31359 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
31363 gcc_unreachable ();
31366 use_vec_extr = true;
31371 use_vec_extr = TARGET_SSE4_1;
31385 tmp = gen_reg_rtx (mode);
31386 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
31387 GEN_INT (elt), GEN_INT (elt),
31388 GEN_INT (elt), GEN_INT (elt)));
31392 tmp = gen_reg_rtx (mode);
31393 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
31397 gcc_unreachable ();
31400 use_vec_extr = true;
31405 /* For SSE1, we have to reuse the V4SF code. */
31406 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
31407 gen_lowpart (V4SFmode, vec), elt);
31413 use_vec_extr = TARGET_SSE2;
31416 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31420 use_vec_extr = TARGET_SSE4_1;
31424 /* ??? Could extract the appropriate HImode element and shift. */
31431 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
31432 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
31434 /* Let the rtl optimizers know about the zero extension performed. */
31435 if (inner_mode == QImode || inner_mode == HImode)
31437 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
31438 target = gen_lowpart (SImode, target);
31441 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31445 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31447 emit_move_insn (mem, vec);
31449 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31450 emit_move_insn (target, tmp);
31454 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31455 pattern to reduce; DEST is the destination; IN is the input vector. */
31458 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
31460 rtx tmp1, tmp2, tmp3;
31462 tmp1 = gen_reg_rtx (V4SFmode);
31463 tmp2 = gen_reg_rtx (V4SFmode);
31464 tmp3 = gen_reg_rtx (V4SFmode);
31466 emit_insn (gen_sse_movhlps (tmp1, in, in));
31467 emit_insn (fn (tmp2, tmp1, in));
31469 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
31470 const1_rtx, const1_rtx,
31471 GEN_INT (1+4), GEN_INT (1+4)));
31472 emit_insn (fn (dest, tmp2, tmp3));
31475 /* Target hook for scalar_mode_supported_p. */
31477 ix86_scalar_mode_supported_p (enum machine_mode mode)
31479 if (DECIMAL_FLOAT_MODE_P (mode))
31480 return default_decimal_float_supported_p ();
31481 else if (mode == TFmode)
31484 return default_scalar_mode_supported_p (mode);
31487 /* Implements target hook vector_mode_supported_p. */
31489 ix86_vector_mode_supported_p (enum machine_mode mode)
31491 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31493 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31495 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31497 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31499 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31504 /* Target hook for c_mode_for_suffix. */
31505 static enum machine_mode
31506 ix86_c_mode_for_suffix (char suffix)
31516 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31518 We do this in the new i386 backend to maintain source compatibility
31519 with the old cc0-based compiler. */
31522 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31523 tree inputs ATTRIBUTE_UNUSED,
31526 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31528 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31533 /* Implements target vector targetm.asm.encode_section_info. */
31535 static void ATTRIBUTE_UNUSED
31536 ix86_encode_section_info (tree decl, rtx rtl, int first)
31538 default_encode_section_info (decl, rtl, first);
31540 if (TREE_CODE (decl) == VAR_DECL
31541 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31542 && ix86_in_large_data_p (decl))
31543 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31546 /* Worker function for REVERSE_CONDITION. */
31549 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31551 return (mode != CCFPmode && mode != CCFPUmode
31552 ? reverse_condition (code)
31553 : reverse_condition_maybe_unordered (code));
31556 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31560 output_387_reg_move (rtx insn, rtx *operands)
31562 if (REG_P (operands[0]))
31564 if (REG_P (operands[1])
31565 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31567 if (REGNO (operands[0]) == FIRST_STACK_REG)
31568 return output_387_ffreep (operands, 0);
31569 return "fstp\t%y0";
31571 if (STACK_TOP_P (operands[0]))
31572 return "fld%Z1\t%y1";
31575 else if (MEM_P (operands[0]))
31577 gcc_assert (REG_P (operands[1]));
31578 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31579 return "fstp%Z0\t%y0";
31582 /* There is no non-popping store to memory for XFmode.
31583 So if we need one, follow the store with a load. */
31584 if (GET_MODE (operands[0]) == XFmode)
31585 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31587 return "fst%Z0\t%y0";
31594 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31595 FP status register is set. */
31598 ix86_emit_fp_unordered_jump (rtx label)
31600 rtx reg = gen_reg_rtx (HImode);
31603 emit_insn (gen_x86_fnstsw_1 (reg));
31605 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31607 emit_insn (gen_x86_sahf_1 (reg));
31609 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31610 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31614 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31616 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31617 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
31620 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
31621 gen_rtx_LABEL_REF (VOIDmode, label),
31623 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
31625 emit_jump_insn (temp);
31626 predict_jump (REG_BR_PROB_BASE * 10 / 100);
31629 /* Output code to perform a log1p XFmode calculation. */
31631 void ix86_emit_i387_log1p (rtx op0, rtx op1)
31633 rtx label1 = gen_label_rtx ();
31634 rtx label2 = gen_label_rtx ();
31636 rtx tmp = gen_reg_rtx (XFmode);
31637 rtx tmp2 = gen_reg_rtx (XFmode);
31640 emit_insn (gen_absxf2 (tmp, op1));
31641 test = gen_rtx_GE (VOIDmode, tmp,
31642 CONST_DOUBLE_FROM_REAL_VALUE (
31643 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
31645 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
31647 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31648 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
31649 emit_jump (label2);
31651 emit_label (label1);
31652 emit_move_insn (tmp, CONST1_RTX (XFmode));
31653 emit_insn (gen_addxf3 (tmp, op1, tmp));
31654 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31655 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
31657 emit_label (label2);
31660 /* Output code to perform a Newton-Rhapson approximation of a single precision
31661 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
31663 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
31665 rtx x0, x1, e0, e1;
31667 x0 = gen_reg_rtx (mode);
31668 e0 = gen_reg_rtx (mode);
31669 e1 = gen_reg_rtx (mode);
31670 x1 = gen_reg_rtx (mode);
31672 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
31674 /* x0 = rcp(b) estimate */
31675 emit_insn (gen_rtx_SET (VOIDmode, x0,
31676 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
31679 emit_insn (gen_rtx_SET (VOIDmode, e0,
31680 gen_rtx_MULT (mode, x0, b)));
31683 emit_insn (gen_rtx_SET (VOIDmode, e0,
31684 gen_rtx_MULT (mode, x0, e0)));
31687 emit_insn (gen_rtx_SET (VOIDmode, e1,
31688 gen_rtx_PLUS (mode, x0, x0)));
31691 emit_insn (gen_rtx_SET (VOIDmode, x1,
31692 gen_rtx_MINUS (mode, e1, e0)));
31695 emit_insn (gen_rtx_SET (VOIDmode, res,
31696 gen_rtx_MULT (mode, a, x1)));
31699 /* Output code to perform a Newton-Rhapson approximation of a
31700 single precision floating point [reciprocal] square root. */
31702 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
31705 rtx x0, e0, e1, e2, e3, mthree, mhalf;
31708 x0 = gen_reg_rtx (mode);
31709 e0 = gen_reg_rtx (mode);
31710 e1 = gen_reg_rtx (mode);
31711 e2 = gen_reg_rtx (mode);
31712 e3 = gen_reg_rtx (mode);
31714 real_from_integer (&r, VOIDmode, -3, -1, 0);
31715 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31717 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
31718 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31720 if (VECTOR_MODE_P (mode))
31722 mthree = ix86_build_const_vector (mode, true, mthree);
31723 mhalf = ix86_build_const_vector (mode, true, mhalf);
31726 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
31727 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
31729 /* x0 = rsqrt(a) estimate */
31730 emit_insn (gen_rtx_SET (VOIDmode, x0,
31731 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
31734 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
31739 zero = gen_reg_rtx (mode);
31740 mask = gen_reg_rtx (mode);
31742 zero = force_reg (mode, CONST0_RTX(mode));
31743 emit_insn (gen_rtx_SET (VOIDmode, mask,
31744 gen_rtx_NE (mode, zero, a)));
31746 emit_insn (gen_rtx_SET (VOIDmode, x0,
31747 gen_rtx_AND (mode, x0, mask)));
31751 emit_insn (gen_rtx_SET (VOIDmode, e0,
31752 gen_rtx_MULT (mode, x0, a)));
31754 emit_insn (gen_rtx_SET (VOIDmode, e1,
31755 gen_rtx_MULT (mode, e0, x0)));
31758 mthree = force_reg (mode, mthree);
31759 emit_insn (gen_rtx_SET (VOIDmode, e2,
31760 gen_rtx_PLUS (mode, e1, mthree)));
31762 mhalf = force_reg (mode, mhalf);
31764 /* e3 = -.5 * x0 */
31765 emit_insn (gen_rtx_SET (VOIDmode, e3,
31766 gen_rtx_MULT (mode, x0, mhalf)));
31768 /* e3 = -.5 * e0 */
31769 emit_insn (gen_rtx_SET (VOIDmode, e3,
31770 gen_rtx_MULT (mode, e0, mhalf)));
31771 /* ret = e2 * e3 */
31772 emit_insn (gen_rtx_SET (VOIDmode, res,
31773 gen_rtx_MULT (mode, e2, e3)));
31776 #ifdef TARGET_SOLARIS
31777 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
31780 i386_solaris_elf_named_section (const char *name, unsigned int flags,
31783 /* With Binutils 2.15, the "@unwind" marker must be specified on
31784 every occurrence of the ".eh_frame" section, not just the first
31787 && strcmp (name, ".eh_frame") == 0)
31789 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
31790 flags & SECTION_WRITE ? "aw" : "a");
31795 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
31797 solaris_elf_asm_comdat_section (name, flags, decl);
31802 default_elf_asm_named_section (name, flags, decl);
31804 #endif /* TARGET_SOLARIS */
31806 /* Return the mangling of TYPE if it is an extended fundamental type. */
31808 static const char *
31809 ix86_mangle_type (const_tree type)
31811 type = TYPE_MAIN_VARIANT (type);
31813 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
31814 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
31817 switch (TYPE_MODE (type))
31820 /* __float128 is "g". */
31823 /* "long double" or __float80 is "e". */
31830 /* For 32-bit code we can save PIC register setup by using
31831 __stack_chk_fail_local hidden function instead of calling
31832 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
31833 register, so it is better to call __stack_chk_fail directly. */
31835 static tree ATTRIBUTE_UNUSED
31836 ix86_stack_protect_fail (void)
31838 return TARGET_64BIT
31839 ? default_external_stack_protect_fail ()
31840 : default_hidden_stack_protect_fail ();
31843 /* Select a format to encode pointers in exception handling data. CODE
31844 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
31845 true if the symbol may be affected by dynamic relocations.
31847 ??? All x86 object file formats are capable of representing this.
31848 After all, the relocation needed is the same as for the call insn.
31849 Whether or not a particular assembler allows us to enter such, I
31850 guess we'll have to see. */
31852 asm_preferred_eh_data_format (int code, int global)
31856 int type = DW_EH_PE_sdata8;
31858 || ix86_cmodel == CM_SMALL_PIC
31859 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
31860 type = DW_EH_PE_sdata4;
31861 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
31863 if (ix86_cmodel == CM_SMALL
31864 || (ix86_cmodel == CM_MEDIUM && code))
31865 return DW_EH_PE_udata4;
31866 return DW_EH_PE_absptr;
31869 /* Expand copysign from SIGN to the positive value ABS_VALUE
31870 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
31873 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
31875 enum machine_mode mode = GET_MODE (sign);
31876 rtx sgn = gen_reg_rtx (mode);
31877 if (mask == NULL_RTX)
31879 enum machine_mode vmode;
31881 if (mode == SFmode)
31883 else if (mode == DFmode)
31888 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
31889 if (!VECTOR_MODE_P (mode))
31891 /* We need to generate a scalar mode mask in this case. */
31892 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31893 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31894 mask = gen_reg_rtx (mode);
31895 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31899 mask = gen_rtx_NOT (mode, mask);
31900 emit_insn (gen_rtx_SET (VOIDmode, sgn,
31901 gen_rtx_AND (mode, mask, sign)));
31902 emit_insn (gen_rtx_SET (VOIDmode, result,
31903 gen_rtx_IOR (mode, abs_value, sgn)));
31906 /* Expand fabs (OP0) and return a new rtx that holds the result. The
31907 mask for masking out the sign-bit is stored in *SMASK, if that is
31910 ix86_expand_sse_fabs (rtx op0, rtx *smask)
31912 enum machine_mode vmode, mode = GET_MODE (op0);
31915 xa = gen_reg_rtx (mode);
31916 if (mode == SFmode)
31918 else if (mode == DFmode)
31922 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
31923 if (!VECTOR_MODE_P (mode))
31925 /* We need to generate a scalar mode mask in this case. */
31926 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31927 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31928 mask = gen_reg_rtx (mode);
31929 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31931 emit_insn (gen_rtx_SET (VOIDmode, xa,
31932 gen_rtx_AND (mode, op0, mask)));
31940 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
31941 swapping the operands if SWAP_OPERANDS is true. The expanded
31942 code is a forward jump to a newly created label in case the
31943 comparison is true. The generated label rtx is returned. */
31945 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
31946 bool swap_operands)
31957 label = gen_label_rtx ();
31958 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
31959 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31960 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
31961 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
31962 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
31963 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
31964 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
31965 JUMP_LABEL (tmp) = label;
31970 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
31971 using comparison code CODE. Operands are swapped for the comparison if
31972 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
31974 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
31975 bool swap_operands)
31977 rtx (*insn)(rtx, rtx, rtx, rtx);
31978 enum machine_mode mode = GET_MODE (op0);
31979 rtx mask = gen_reg_rtx (mode);
31988 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
31990 emit_insn (insn (mask, op0, op1,
31991 gen_rtx_fmt_ee (code, mode, op0, op1)));
31995 /* Generate and return a rtx of mode MODE for 2**n where n is the number
31996 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
31998 ix86_gen_TWO52 (enum machine_mode mode)
32000 REAL_VALUE_TYPE TWO52r;
32003 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
32004 TWO52 = const_double_from_real_value (TWO52r, mode);
32005 TWO52 = force_reg (mode, TWO52);
32010 /* Expand SSE sequence for computing lround from OP1 storing
32013 ix86_expand_lround (rtx op0, rtx op1)
32015 /* C code for the stuff we're doing below:
32016 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
32019 enum machine_mode mode = GET_MODE (op1);
32020 const struct real_format *fmt;
32021 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32024 /* load nextafter (0.5, 0.0) */
32025 fmt = REAL_MODE_FORMAT (mode);
32026 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32027 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32029 /* adj = copysign (0.5, op1) */
32030 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
32031 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
32033 /* adj = op1 + adj */
32034 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
32036 /* op0 = (imode)adj */
32037 expand_fix (op0, adj, 0);
32040 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
32043 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
32045 /* C code for the stuff we're doing below (for do_floor):
32047 xi -= (double)xi > op1 ? 1 : 0;
32050 enum machine_mode fmode = GET_MODE (op1);
32051 enum machine_mode imode = GET_MODE (op0);
32052 rtx ireg, freg, label, tmp;
32054 /* reg = (long)op1 */
32055 ireg = gen_reg_rtx (imode);
32056 expand_fix (ireg, op1, 0);
32058 /* freg = (double)reg */
32059 freg = gen_reg_rtx (fmode);
32060 expand_float (freg, ireg, 0);
32062 /* ireg = (freg > op1) ? ireg - 1 : ireg */
32063 label = ix86_expand_sse_compare_and_jump (UNLE,
32064 freg, op1, !do_floor);
32065 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
32066 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
32067 emit_move_insn (ireg, tmp);
32069 emit_label (label);
32070 LABEL_NUSES (label) = 1;
32072 emit_move_insn (op0, ireg);
32075 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
32076 result in OPERAND0. */
32078 ix86_expand_rint (rtx operand0, rtx operand1)
32080 /* C code for the stuff we're doing below:
32081 xa = fabs (operand1);
32082 if (!isless (xa, 2**52))
32084 xa = xa + 2**52 - 2**52;
32085 return copysign (xa, operand1);
32087 enum machine_mode mode = GET_MODE (operand0);
32088 rtx res, xa, label, TWO52, mask;
32090 res = gen_reg_rtx (mode);
32091 emit_move_insn (res, operand1);
32093 /* xa = abs (operand1) */
32094 xa = ix86_expand_sse_fabs (res, &mask);
32096 /* if (!isless (xa, TWO52)) goto label; */
32097 TWO52 = ix86_gen_TWO52 (mode);
32098 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32100 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32101 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32103 ix86_sse_copysign_to_positive (res, xa, res, mask);
32105 emit_label (label);
32106 LABEL_NUSES (label) = 1;
32108 emit_move_insn (operand0, res);
32111 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32114 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
32116 /* C code for the stuff we expand below.
32117 double xa = fabs (x), x2;
32118 if (!isless (xa, TWO52))
32120 xa = xa + TWO52 - TWO52;
32121 x2 = copysign (xa, x);
32130 enum machine_mode mode = GET_MODE (operand0);
32131 rtx xa, TWO52, tmp, label, one, res, mask;
32133 TWO52 = ix86_gen_TWO52 (mode);
32135 /* Temporary for holding the result, initialized to the input
32136 operand to ease control flow. */
32137 res = gen_reg_rtx (mode);
32138 emit_move_insn (res, operand1);
32140 /* xa = abs (operand1) */
32141 xa = ix86_expand_sse_fabs (res, &mask);
32143 /* if (!isless (xa, TWO52)) goto label; */
32144 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32146 /* xa = xa + TWO52 - TWO52; */
32147 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32148 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32150 /* xa = copysign (xa, operand1) */
32151 ix86_sse_copysign_to_positive (xa, xa, res, mask);
32153 /* generate 1.0 or -1.0 */
32154 one = force_reg (mode,
32155 const_double_from_real_value (do_floor
32156 ? dconst1 : dconstm1, mode));
32158 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32159 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32160 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32161 gen_rtx_AND (mode, one, tmp)));
32162 /* We always need to subtract here to preserve signed zero. */
32163 tmp = expand_simple_binop (mode, MINUS,
32164 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32165 emit_move_insn (res, tmp);
32167 emit_label (label);
32168 LABEL_NUSES (label) = 1;
32170 emit_move_insn (operand0, res);
32173 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32176 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
32178 /* C code for the stuff we expand below.
32179 double xa = fabs (x), x2;
32180 if (!isless (xa, TWO52))
32182 x2 = (double)(long)x;
32189 if (HONOR_SIGNED_ZEROS (mode))
32190 return copysign (x2, x);
32193 enum machine_mode mode = GET_MODE (operand0);
32194 rtx xa, xi, TWO52, tmp, label, one, res, mask;
32196 TWO52 = ix86_gen_TWO52 (mode);
32198 /* Temporary for holding the result, initialized to the input
32199 operand to ease control flow. */
32200 res = gen_reg_rtx (mode);
32201 emit_move_insn (res, operand1);
32203 /* xa = abs (operand1) */
32204 xa = ix86_expand_sse_fabs (res, &mask);
32206 /* if (!isless (xa, TWO52)) goto label; */
32207 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32209 /* xa = (double)(long)x */
32210 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32211 expand_fix (xi, res, 0);
32212 expand_float (xa, xi, 0);
32215 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32217 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32218 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32219 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32220 gen_rtx_AND (mode, one, tmp)));
32221 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
32222 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32223 emit_move_insn (res, tmp);
32225 if (HONOR_SIGNED_ZEROS (mode))
32226 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32228 emit_label (label);
32229 LABEL_NUSES (label) = 1;
32231 emit_move_insn (operand0, res);
32234 /* Expand SSE sequence for computing round from OPERAND1 storing
32235 into OPERAND0. Sequence that works without relying on DImode truncation
32236 via cvttsd2siq that is only available on 64bit targets. */
32238 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
32240 /* C code for the stuff we expand below.
32241 double xa = fabs (x), xa2, x2;
32242 if (!isless (xa, TWO52))
32244 Using the absolute value and copying back sign makes
32245 -0.0 -> -0.0 correct.
32246 xa2 = xa + TWO52 - TWO52;
32251 else if (dxa > 0.5)
32253 x2 = copysign (xa2, x);
32256 enum machine_mode mode = GET_MODE (operand0);
32257 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
32259 TWO52 = ix86_gen_TWO52 (mode);
32261 /* Temporary for holding the result, initialized to the input
32262 operand to ease control flow. */
32263 res = gen_reg_rtx (mode);
32264 emit_move_insn (res, operand1);
32266 /* xa = abs (operand1) */
32267 xa = ix86_expand_sse_fabs (res, &mask);
32269 /* if (!isless (xa, TWO52)) goto label; */
32270 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32272 /* xa2 = xa + TWO52 - TWO52; */
32273 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32274 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
32276 /* dxa = xa2 - xa; */
32277 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
32279 /* generate 0.5, 1.0 and -0.5 */
32280 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
32281 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
32282 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
32286 tmp = gen_reg_rtx (mode);
32287 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32288 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
32289 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32290 gen_rtx_AND (mode, one, tmp)));
32291 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32292 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32293 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
32294 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32295 gen_rtx_AND (mode, one, tmp)));
32296 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32298 /* res = copysign (xa2, operand1) */
32299 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
32301 emit_label (label);
32302 LABEL_NUSES (label) = 1;
32304 emit_move_insn (operand0, res);
32307 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32310 ix86_expand_trunc (rtx operand0, rtx operand1)
32312 /* C code for SSE variant we expand below.
32313 double xa = fabs (x), x2;
32314 if (!isless (xa, TWO52))
32316 x2 = (double)(long)x;
32317 if (HONOR_SIGNED_ZEROS (mode))
32318 return copysign (x2, x);
32321 enum machine_mode mode = GET_MODE (operand0);
32322 rtx xa, xi, TWO52, label, res, mask;
32324 TWO52 = ix86_gen_TWO52 (mode);
32326 /* Temporary for holding the result, initialized to the input
32327 operand to ease control flow. */
32328 res = gen_reg_rtx (mode);
32329 emit_move_insn (res, operand1);
32331 /* xa = abs (operand1) */
32332 xa = ix86_expand_sse_fabs (res, &mask);
32334 /* if (!isless (xa, TWO52)) goto label; */
32335 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32337 /* x = (double)(long)x */
32338 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32339 expand_fix (xi, res, 0);
32340 expand_float (res, xi, 0);
32342 if (HONOR_SIGNED_ZEROS (mode))
32343 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32345 emit_label (label);
32346 LABEL_NUSES (label) = 1;
32348 emit_move_insn (operand0, res);
32351 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32354 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
32356 enum machine_mode mode = GET_MODE (operand0);
32357 rtx xa, mask, TWO52, label, one, res, smask, tmp;
32359 /* C code for SSE variant we expand below.
32360 double xa = fabs (x), x2;
32361 if (!isless (xa, TWO52))
32363 xa2 = xa + TWO52 - TWO52;
32367 x2 = copysign (xa2, x);
32371 TWO52 = ix86_gen_TWO52 (mode);
32373 /* Temporary for holding the result, initialized to the input
32374 operand to ease control flow. */
32375 res = gen_reg_rtx (mode);
32376 emit_move_insn (res, operand1);
32378 /* xa = abs (operand1) */
32379 xa = ix86_expand_sse_fabs (res, &smask);
32381 /* if (!isless (xa, TWO52)) goto label; */
32382 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32384 /* res = xa + TWO52 - TWO52; */
32385 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32386 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
32387 emit_move_insn (res, tmp);
32390 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32392 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32393 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
32394 emit_insn (gen_rtx_SET (VOIDmode, mask,
32395 gen_rtx_AND (mode, mask, one)));
32396 tmp = expand_simple_binop (mode, MINUS,
32397 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
32398 emit_move_insn (res, tmp);
32400 /* res = copysign (res, operand1) */
32401 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
32403 emit_label (label);
32404 LABEL_NUSES (label) = 1;
32406 emit_move_insn (operand0, res);
32409 /* Expand SSE sequence for computing round from OPERAND1 storing
32412 ix86_expand_round (rtx operand0, rtx operand1)
32414 /* C code for the stuff we're doing below:
32415 double xa = fabs (x);
32416 if (!isless (xa, TWO52))
32418 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32419 return copysign (xa, x);
32421 enum machine_mode mode = GET_MODE (operand0);
32422 rtx res, TWO52, xa, label, xi, half, mask;
32423 const struct real_format *fmt;
32424 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32426 /* Temporary for holding the result, initialized to the input
32427 operand to ease control flow. */
32428 res = gen_reg_rtx (mode);
32429 emit_move_insn (res, operand1);
32431 TWO52 = ix86_gen_TWO52 (mode);
32432 xa = ix86_expand_sse_fabs (res, &mask);
32433 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32435 /* load nextafter (0.5, 0.0) */
32436 fmt = REAL_MODE_FORMAT (mode);
32437 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32438 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32440 /* xa = xa + 0.5 */
32441 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32442 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
32444 /* xa = (double)(int64_t)xa */
32445 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32446 expand_fix (xi, xa, 0);
32447 expand_float (xa, xi, 0);
32449 /* res = copysign (xa, operand1) */
32450 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
32452 emit_label (label);
32453 LABEL_NUSES (label) = 1;
32455 emit_move_insn (operand0, res);
32459 /* Table of valid machine attributes. */
32460 static const struct attribute_spec ix86_attribute_table[] =
32462 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
32463 affects_type_identity } */
32464 /* Stdcall attribute says callee is responsible for popping arguments
32465 if they are not variable. */
32466 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32468 /* Fastcall attribute says callee is responsible for popping arguments
32469 if they are not variable. */
32470 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32472 /* Thiscall attribute says callee is responsible for popping arguments
32473 if they are not variable. */
32474 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32476 /* Cdecl attribute says the callee is a normal C declaration */
32477 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32479 /* Regparm attribute specifies how many integer arguments are to be
32480 passed in registers. */
32481 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
32483 /* Sseregparm attribute says we are using x86_64 calling conventions
32484 for FP arguments. */
32485 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32487 /* force_align_arg_pointer says this function realigns the stack at entry. */
32488 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
32489 false, true, true, ix86_handle_cconv_attribute, false },
32490 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32491 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
32492 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
32493 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
32496 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32498 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32500 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32501 SUBTARGET_ATTRIBUTE_TABLE,
32503 /* ms_abi and sysv_abi calling convention function attributes. */
32504 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32505 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32506 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
32508 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32509 ix86_handle_callee_pop_aggregate_return, true },
32511 { NULL, 0, 0, false, false, false, NULL, false }
32514 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32516 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32517 tree vectype ATTRIBUTE_UNUSED,
32518 int misalign ATTRIBUTE_UNUSED)
32520 switch (type_of_cost)
32523 return ix86_cost->scalar_stmt_cost;
32526 return ix86_cost->scalar_load_cost;
32529 return ix86_cost->scalar_store_cost;
32532 return ix86_cost->vec_stmt_cost;
32535 return ix86_cost->vec_align_load_cost;
32538 return ix86_cost->vec_store_cost;
32540 case vec_to_scalar:
32541 return ix86_cost->vec_to_scalar_cost;
32543 case scalar_to_vec:
32544 return ix86_cost->scalar_to_vec_cost;
32546 case unaligned_load:
32547 case unaligned_store:
32548 return ix86_cost->vec_unalign_load_cost;
32550 case cond_branch_taken:
32551 return ix86_cost->cond_taken_branch_cost;
32553 case cond_branch_not_taken:
32554 return ix86_cost->cond_not_taken_branch_cost;
32560 gcc_unreachable ();
32565 /* Implement targetm.vectorize.builtin_vec_perm. */
32568 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32570 tree itype = TREE_TYPE (vec_type);
32571 bool u = TYPE_UNSIGNED (itype);
32572 enum machine_mode vmode = TYPE_MODE (vec_type);
32573 enum ix86_builtins fcode;
32574 bool ok = TARGET_SSE2;
32580 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32583 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32585 itype = ix86_get_builtin_type (IX86_BT_DI);
32590 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32594 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32596 itype = ix86_get_builtin_type (IX86_BT_SI);
32600 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32603 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32606 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32609 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32619 *mask_type = itype;
32620 return ix86_builtins[(int) fcode];
32623 /* Return a vector mode with twice as many elements as VMODE. */
32624 /* ??? Consider moving this to a table generated by genmodes.c. */
32626 static enum machine_mode
32627 doublesize_vector_mode (enum machine_mode vmode)
32631 case V2SFmode: return V4SFmode;
32632 case V1DImode: return V2DImode;
32633 case V2SImode: return V4SImode;
32634 case V4HImode: return V8HImode;
32635 case V8QImode: return V16QImode;
32637 case V2DFmode: return V4DFmode;
32638 case V4SFmode: return V8SFmode;
32639 case V2DImode: return V4DImode;
32640 case V4SImode: return V8SImode;
32641 case V8HImode: return V16HImode;
32642 case V16QImode: return V32QImode;
32644 case V4DFmode: return V8DFmode;
32645 case V8SFmode: return V16SFmode;
32646 case V4DImode: return V8DImode;
32647 case V8SImode: return V16SImode;
32648 case V16HImode: return V32HImode;
32649 case V32QImode: return V64QImode;
32652 gcc_unreachable ();
32656 /* Construct (set target (vec_select op0 (parallel perm))) and
32657 return true if that's a valid instruction in the active ISA. */
32660 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
32662 rtx rperm[MAX_VECT_LEN], x;
32665 for (i = 0; i < nelt; ++i)
32666 rperm[i] = GEN_INT (perm[i]);
32668 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
32669 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
32670 x = gen_rtx_SET (VOIDmode, target, x);
32673 if (recog_memoized (x) < 0)
32681 /* Similar, but generate a vec_concat from op0 and op1 as well. */
32684 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
32685 const unsigned char *perm, unsigned nelt)
32687 enum machine_mode v2mode;
32690 v2mode = doublesize_vector_mode (GET_MODE (op0));
32691 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
32692 return expand_vselect (target, x, perm, nelt);
32695 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32696 in terms of blendp[sd] / pblendw / pblendvb. */
32699 expand_vec_perm_blend (struct expand_vec_perm_d *d)
32701 enum machine_mode vmode = d->vmode;
32702 unsigned i, mask, nelt = d->nelt;
32703 rtx target, op0, op1, x;
32705 if (!TARGET_SSE4_1 || d->op0 == d->op1)
32707 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
32710 /* This is a blend, not a permute. Elements must stay in their
32711 respective lanes. */
32712 for (i = 0; i < nelt; ++i)
32714 unsigned e = d->perm[i];
32715 if (!(e == i || e == i + nelt))
32722 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
32723 decision should be extracted elsewhere, so that we only try that
32724 sequence once all budget==3 options have been tried. */
32726 /* For bytes, see if bytes move in pairs so we can use pblendw with
32727 an immediate argument, rather than pblendvb with a vector argument. */
32728 if (vmode == V16QImode)
32730 bool pblendw_ok = true;
32731 for (i = 0; i < 16 && pblendw_ok; i += 2)
32732 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
32736 rtx rperm[16], vperm;
32738 for (i = 0; i < nelt; ++i)
32739 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
32741 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32742 vperm = force_reg (V16QImode, vperm);
32744 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
32749 target = d->target;
32761 for (i = 0; i < nelt; ++i)
32762 mask |= (d->perm[i] >= nelt) << i;
32766 for (i = 0; i < 2; ++i)
32767 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
32771 for (i = 0; i < 4; ++i)
32772 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
32776 for (i = 0; i < 8; ++i)
32777 mask |= (d->perm[i * 2] >= 16) << i;
32781 target = gen_lowpart (vmode, target);
32782 op0 = gen_lowpart (vmode, op0);
32783 op1 = gen_lowpart (vmode, op1);
32787 gcc_unreachable ();
32790 /* This matches five different patterns with the different modes. */
32791 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
32792 x = gen_rtx_SET (VOIDmode, target, x);
32798 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32799 in terms of the variable form of vpermilps.
32801 Note that we will have already failed the immediate input vpermilps,
32802 which requires that the high and low part shuffle be identical; the
32803 variable form doesn't require that. */
32806 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
32808 rtx rperm[8], vperm;
32811 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
32814 /* We can only permute within the 128-bit lane. */
32815 for (i = 0; i < 8; ++i)
32817 unsigned e = d->perm[i];
32818 if (i < 4 ? e >= 4 : e < 4)
32825 for (i = 0; i < 8; ++i)
32827 unsigned e = d->perm[i];
32829 /* Within each 128-bit lane, the elements of op0 are numbered
32830 from 0 and the elements of op1 are numbered from 4. */
32836 rperm[i] = GEN_INT (e);
32839 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
32840 vperm = force_reg (V8SImode, vperm);
32841 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
32846 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32847 in terms of pshufb or vpperm. */
32850 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
32852 unsigned i, nelt, eltsz;
32853 rtx rperm[16], vperm, target, op0, op1;
32855 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
32857 if (GET_MODE_SIZE (d->vmode) != 16)
32864 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
32866 for (i = 0; i < nelt; ++i)
32868 unsigned j, e = d->perm[i];
32869 for (j = 0; j < eltsz; ++j)
32870 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
32873 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32874 vperm = force_reg (V16QImode, vperm);
32876 target = gen_lowpart (V16QImode, d->target);
32877 op0 = gen_lowpart (V16QImode, d->op0);
32878 if (d->op0 == d->op1)
32879 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
32882 op1 = gen_lowpart (V16QImode, d->op1);
32883 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
32889 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
32890 in a single instruction. */
32893 expand_vec_perm_1 (struct expand_vec_perm_d *d)
32895 unsigned i, nelt = d->nelt;
32896 unsigned char perm2[MAX_VECT_LEN];
32898 /* Check plain VEC_SELECT first, because AVX has instructions that could
32899 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
32900 input where SEL+CONCAT may not. */
32901 if (d->op0 == d->op1)
32903 int mask = nelt - 1;
32905 for (i = 0; i < nelt; i++)
32906 perm2[i] = d->perm[i] & mask;
32908 if (expand_vselect (d->target, d->op0, perm2, nelt))
32911 /* There are plenty of patterns in sse.md that are written for
32912 SEL+CONCAT and are not replicated for a single op. Perhaps
32913 that should be changed, to avoid the nastiness here. */
32915 /* Recognize interleave style patterns, which means incrementing
32916 every other permutation operand. */
32917 for (i = 0; i < nelt; i += 2)
32919 perm2[i] = d->perm[i] & mask;
32920 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
32922 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32925 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
32928 for (i = 0; i < nelt; i += 4)
32930 perm2[i + 0] = d->perm[i + 0] & mask;
32931 perm2[i + 1] = d->perm[i + 1] & mask;
32932 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
32933 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
32936 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32941 /* Finally, try the fully general two operand permute. */
32942 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
32945 /* Recognize interleave style patterns with reversed operands. */
32946 if (d->op0 != d->op1)
32948 for (i = 0; i < nelt; ++i)
32950 unsigned e = d->perm[i];
32958 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
32962 /* Try the SSE4.1 blend variable merge instructions. */
32963 if (expand_vec_perm_blend (d))
32966 /* Try one of the AVX vpermil variable permutations. */
32967 if (expand_vec_perm_vpermil (d))
32970 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
32971 if (expand_vec_perm_pshufb (d))
32977 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32978 in terms of a pair of pshuflw + pshufhw instructions. */
32981 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
32983 unsigned char perm2[MAX_VECT_LEN];
32987 if (d->vmode != V8HImode || d->op0 != d->op1)
32990 /* The two permutations only operate in 64-bit lanes. */
32991 for (i = 0; i < 4; ++i)
32992 if (d->perm[i] >= 4)
32994 for (i = 4; i < 8; ++i)
32995 if (d->perm[i] < 4)
33001 /* Emit the pshuflw. */
33002 memcpy (perm2, d->perm, 4);
33003 for (i = 4; i < 8; ++i)
33005 ok = expand_vselect (d->target, d->op0, perm2, 8);
33008 /* Emit the pshufhw. */
33009 memcpy (perm2 + 4, d->perm + 4, 4);
33010 for (i = 0; i < 4; ++i)
33012 ok = expand_vselect (d->target, d->target, perm2, 8);
33018 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33019 the permutation using the SSSE3 palignr instruction. This succeeds
33020 when all of the elements in PERM fit within one vector and we merely
33021 need to shift them down so that a single vector permutation has a
33022 chance to succeed. */
33025 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
33027 unsigned i, nelt = d->nelt;
33032 /* Even with AVX, palignr only operates on 128-bit vectors. */
33033 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33036 min = nelt, max = 0;
33037 for (i = 0; i < nelt; ++i)
33039 unsigned e = d->perm[i];
33045 if (min == 0 || max - min >= nelt)
33048 /* Given that we have SSSE3, we know we'll be able to implement the
33049 single operand permutation after the palignr with pshufb. */
33053 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
33054 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
33055 gen_lowpart (TImode, d->op1),
33056 gen_lowpart (TImode, d->op0), shift));
33058 d->op0 = d->op1 = d->target;
33061 for (i = 0; i < nelt; ++i)
33063 unsigned e = d->perm[i] - min;
33069 /* Test for the degenerate case where the alignment by itself
33070 produces the desired permutation. */
33074 ok = expand_vec_perm_1 (d);
33080 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33081 a two vector permutation into a single vector permutation by using
33082 an interleave operation to merge the vectors. */
33085 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
33087 struct expand_vec_perm_d dremap, dfinal;
33088 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
33089 unsigned contents, h1, h2, h3, h4;
33090 unsigned char remap[2 * MAX_VECT_LEN];
33094 if (d->op0 == d->op1)
33097 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
33098 lanes. We can use similar techniques with the vperm2f128 instruction,
33099 but it requires slightly different logic. */
33100 if (GET_MODE_SIZE (d->vmode) != 16)
33103 /* Examine from whence the elements come. */
33105 for (i = 0; i < nelt; ++i)
33106 contents |= 1u << d->perm[i];
33108 /* Split the two input vectors into 4 halves. */
33109 h1 = (1u << nelt2) - 1;
33114 memset (remap, 0xff, sizeof (remap));
33117 /* If the elements from the low halves use interleave low, and similarly
33118 for interleave high. If the elements are from mis-matched halves, we
33119 can use shufps for V4SF/V4SI or do a DImode shuffle. */
33120 if ((contents & (h1 | h3)) == contents)
33122 for (i = 0; i < nelt2; ++i)
33125 remap[i + nelt] = i * 2 + 1;
33126 dremap.perm[i * 2] = i;
33127 dremap.perm[i * 2 + 1] = i + nelt;
33130 else if ((contents & (h2 | h4)) == contents)
33132 for (i = 0; i < nelt2; ++i)
33134 remap[i + nelt2] = i * 2;
33135 remap[i + nelt + nelt2] = i * 2 + 1;
33136 dremap.perm[i * 2] = i + nelt2;
33137 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
33140 else if ((contents & (h1 | h4)) == contents)
33142 for (i = 0; i < nelt2; ++i)
33145 remap[i + nelt + nelt2] = i + nelt2;
33146 dremap.perm[i] = i;
33147 dremap.perm[i + nelt2] = i + nelt + nelt2;
33151 dremap.vmode = V2DImode;
33153 dremap.perm[0] = 0;
33154 dremap.perm[1] = 3;
33157 else if ((contents & (h2 | h3)) == contents)
33159 for (i = 0; i < nelt2; ++i)
33161 remap[i + nelt2] = i;
33162 remap[i + nelt] = i + nelt2;
33163 dremap.perm[i] = i + nelt2;
33164 dremap.perm[i + nelt2] = i + nelt;
33168 dremap.vmode = V2DImode;
33170 dremap.perm[0] = 1;
33171 dremap.perm[1] = 2;
33177 /* Use the remapping array set up above to move the elements from their
33178 swizzled locations into their final destinations. */
33180 for (i = 0; i < nelt; ++i)
33182 unsigned e = remap[d->perm[i]];
33183 gcc_assert (e < nelt);
33184 dfinal.perm[i] = e;
33186 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
33187 dfinal.op1 = dfinal.op0;
33188 dremap.target = dfinal.op0;
33190 /* Test if the final remap can be done with a single insn. For V4SFmode or
33191 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
33193 ok = expand_vec_perm_1 (&dfinal);
33194 seq = get_insns ();
33200 if (dremap.vmode != dfinal.vmode)
33202 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
33203 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
33204 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
33207 ok = expand_vec_perm_1 (&dremap);
33214 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33215 permutation with two pshufb insns and an ior. We should have already
33216 failed all two instruction sequences. */
33219 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
33221 rtx rperm[2][16], vperm, l, h, op, m128;
33222 unsigned int i, nelt, eltsz;
33224 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33226 gcc_assert (d->op0 != d->op1);
33229 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33231 /* Generate two permutation masks. If the required element is within
33232 the given vector it is shuffled into the proper lane. If the required
33233 element is in the other vector, force a zero into the lane by setting
33234 bit 7 in the permutation mask. */
33235 m128 = GEN_INT (-128);
33236 for (i = 0; i < nelt; ++i)
33238 unsigned j, e = d->perm[i];
33239 unsigned which = (e >= nelt);
33243 for (j = 0; j < eltsz; ++j)
33245 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
33246 rperm[1-which][i*eltsz + j] = m128;
33250 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
33251 vperm = force_reg (V16QImode, vperm);
33253 l = gen_reg_rtx (V16QImode);
33254 op = gen_lowpart (V16QImode, d->op0);
33255 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
33257 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
33258 vperm = force_reg (V16QImode, vperm);
33260 h = gen_reg_rtx (V16QImode);
33261 op = gen_lowpart (V16QImode, d->op1);
33262 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
33264 op = gen_lowpart (V16QImode, d->target);
33265 emit_insn (gen_iorv16qi3 (op, l, h));
33270 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33271 and extract-odd permutations. */
33274 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
33281 t1 = gen_reg_rtx (V4DFmode);
33282 t2 = gen_reg_rtx (V4DFmode);
33284 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33285 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
33286 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
33288 /* Now an unpck[lh]pd will produce the result required. */
33290 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
33292 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
33298 int mask = odd ? 0xdd : 0x88;
33300 t1 = gen_reg_rtx (V8SFmode);
33301 t2 = gen_reg_rtx (V8SFmode);
33302 t3 = gen_reg_rtx (V8SFmode);
33304 /* Shuffle within the 128-bit lanes to produce:
33305 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33306 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
33309 /* Shuffle the lanes around to produce:
33310 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33311 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
33314 /* Shuffle within the 128-bit lanes to produce:
33315 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33316 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
33318 /* Shuffle within the 128-bit lanes to produce:
33319 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33320 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
33322 /* Shuffle the lanes around to produce:
33323 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33324 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
33333 /* These are always directly implementable by expand_vec_perm_1. */
33334 gcc_unreachable ();
33338 return expand_vec_perm_pshufb2 (d);
33341 /* We need 2*log2(N)-1 operations to achieve odd/even
33342 with interleave. */
33343 t1 = gen_reg_rtx (V8HImode);
33344 t2 = gen_reg_rtx (V8HImode);
33345 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
33346 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
33347 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
33348 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
33350 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
33352 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
33359 return expand_vec_perm_pshufb2 (d);
33362 t1 = gen_reg_rtx (V16QImode);
33363 t2 = gen_reg_rtx (V16QImode);
33364 t3 = gen_reg_rtx (V16QImode);
33365 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
33366 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
33367 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
33368 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
33369 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
33370 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
33372 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
33374 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
33380 gcc_unreachable ();
33386 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33387 extract-even and extract-odd permutations. */
33390 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
33392 unsigned i, odd, nelt = d->nelt;
33395 if (odd != 0 && odd != 1)
33398 for (i = 1; i < nelt; ++i)
33399 if (d->perm[i] != 2 * i + odd)
33402 return expand_vec_perm_even_odd_1 (d, odd);
33405 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33406 permutations. We assume that expand_vec_perm_1 has already failed. */
33409 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
33411 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
33412 enum machine_mode vmode = d->vmode;
33413 unsigned char perm2[4];
33421 /* These are special-cased in sse.md so that we can optionally
33422 use the vbroadcast instruction. They expand to two insns
33423 if the input happens to be in a register. */
33424 gcc_unreachable ();
33430 /* These are always implementable using standard shuffle patterns. */
33431 gcc_unreachable ();
33435 /* These can be implemented via interleave. We save one insn by
33436 stopping once we have promoted to V4SImode and then use pshufd. */
33439 optab otab = vec_interleave_low_optab;
33443 otab = vec_interleave_high_optab;
33448 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
33449 vmode = get_mode_wider_vector (vmode);
33450 op0 = gen_lowpart (vmode, op0);
33452 while (vmode != V4SImode);
33454 memset (perm2, elt, 4);
33455 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
33460 gcc_unreachable ();
33464 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33465 broadcast permutations. */
33468 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
33470 unsigned i, elt, nelt = d->nelt;
33472 if (d->op0 != d->op1)
33476 for (i = 1; i < nelt; ++i)
33477 if (d->perm[i] != elt)
33480 return expand_vec_perm_broadcast_1 (d);
33483 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33484 With all of the interface bits taken care of, perform the expansion
33485 in D and return true on success. */
33488 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
33490 /* Try a single instruction expansion. */
33491 if (expand_vec_perm_1 (d))
33494 /* Try sequences of two instructions. */
33496 if (expand_vec_perm_pshuflw_pshufhw (d))
33499 if (expand_vec_perm_palignr (d))
33502 if (expand_vec_perm_interleave2 (d))
33505 if (expand_vec_perm_broadcast (d))
33508 /* Try sequences of three instructions. */
33510 if (expand_vec_perm_pshufb2 (d))
33513 /* ??? Look for narrow permutations whose element orderings would
33514 allow the promotion to a wider mode. */
33516 /* ??? Look for sequences of interleave or a wider permute that place
33517 the data into the correct lanes for a half-vector shuffle like
33518 pshuf[lh]w or vpermilps. */
33520 /* ??? Look for sequences of interleave that produce the desired results.
33521 The combinatorics of punpck[lh] get pretty ugly... */
33523 if (expand_vec_perm_even_odd (d))
33529 /* Extract the values from the vector CST into the permutation array in D.
33530 Return 0 on error, 1 if all values from the permutation come from the
33531 first vector, 2 if all values from the second vector, and 3 otherwise. */
33534 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33536 tree list = TREE_VECTOR_CST_ELTS (cst);
33537 unsigned i, nelt = d->nelt;
33540 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33542 unsigned HOST_WIDE_INT e;
33544 if (!host_integerp (TREE_VALUE (list), 1))
33546 e = tree_low_cst (TREE_VALUE (list), 1);
33550 ret |= (e < nelt ? 1 : 2);
33553 gcc_assert (list == NULL);
33555 /* For all elements from second vector, fold the elements to first. */
33557 for (i = 0; i < nelt; ++i)
33558 d->perm[i] -= nelt;
33564 ix86_expand_vec_perm_builtin (tree exp)
33566 struct expand_vec_perm_d d;
33567 tree arg0, arg1, arg2;
33569 arg0 = CALL_EXPR_ARG (exp, 0);
33570 arg1 = CALL_EXPR_ARG (exp, 1);
33571 arg2 = CALL_EXPR_ARG (exp, 2);
33573 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33574 d.nelt = GET_MODE_NUNITS (d.vmode);
33575 d.testing_p = false;
33576 gcc_assert (VECTOR_MODE_P (d.vmode));
33578 if (TREE_CODE (arg2) != VECTOR_CST)
33580 error_at (EXPR_LOCATION (exp),
33581 "vector permutation requires vector constant");
33585 switch (extract_vec_perm_cst (&d, arg2))
33591 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33595 if (!operand_equal_p (arg0, arg1, 0))
33597 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33598 d.op0 = force_reg (d.vmode, d.op0);
33599 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33600 d.op1 = force_reg (d.vmode, d.op1);
33604 /* The elements of PERM do not suggest that only the first operand
33605 is used, but both operands are identical. Allow easier matching
33606 of the permutation by folding the permutation into the single
33609 unsigned i, nelt = d.nelt;
33610 for (i = 0; i < nelt; ++i)
33611 if (d.perm[i] >= nelt)
33617 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33618 d.op0 = force_reg (d.vmode, d.op0);
33623 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33624 d.op0 = force_reg (d.vmode, d.op0);
33629 d.target = gen_reg_rtx (d.vmode);
33630 if (ix86_expand_vec_perm_builtin_1 (&d))
33633 /* For compiler generated permutations, we should never got here, because
33634 the compiler should also be checking the ok hook. But since this is a
33635 builtin the user has access too, so don't abort. */
33639 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
33642 sorry ("vector permutation (%d %d %d %d)",
33643 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
33646 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
33647 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33648 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
33651 sorry ("vector permutation "
33652 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
33653 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33654 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
33655 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
33656 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
33659 gcc_unreachable ();
33662 return CONST0_RTX (d.vmode);
33665 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
33668 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
33670 struct expand_vec_perm_d d;
33674 d.vmode = TYPE_MODE (vec_type);
33675 d.nelt = GET_MODE_NUNITS (d.vmode);
33676 d.testing_p = true;
33678 /* Given sufficient ISA support we can just return true here
33679 for selected vector modes. */
33680 if (GET_MODE_SIZE (d.vmode) == 16)
33682 /* All implementable with a single vpperm insn. */
33685 /* All implementable with 2 pshufb + 1 ior. */
33688 /* All implementable with shufpd or unpck[lh]pd. */
33693 vec_mask = extract_vec_perm_cst (&d, mask);
33695 /* This hook is cannot be called in response to something that the
33696 user does (unlike the builtin expander) so we shouldn't ever see
33697 an error generated from the extract. */
33698 gcc_assert (vec_mask > 0 && vec_mask <= 3);
33699 one_vec = (vec_mask != 3);
33701 /* Implementable with shufps or pshufd. */
33702 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
33705 /* Otherwise we have to go through the motions and see if we can
33706 figure out how to generate the requested permutation. */
33707 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
33708 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
33710 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
33713 ret = ix86_expand_vec_perm_builtin_1 (&d);
33720 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
33722 struct expand_vec_perm_d d;
33728 d.vmode = GET_MODE (targ);
33729 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
33730 d.testing_p = false;
33732 for (i = 0; i < nelt; ++i)
33733 d.perm[i] = i * 2 + odd;
33735 /* We'll either be able to implement the permutation directly... */
33736 if (expand_vec_perm_1 (&d))
33739 /* ... or we use the special-case patterns. */
33740 expand_vec_perm_even_odd_1 (&d, odd);
33743 /* Expand an insert into a vector register through pinsr insn.
33744 Return true if successful. */
33747 ix86_expand_pinsr (rtx *operands)
33749 rtx dst = operands[0];
33750 rtx src = operands[3];
33752 unsigned int size = INTVAL (operands[1]);
33753 unsigned int pos = INTVAL (operands[2]);
33755 if (GET_CODE (dst) == SUBREG)
33757 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
33758 dst = SUBREG_REG (dst);
33761 if (GET_CODE (src) == SUBREG)
33762 src = SUBREG_REG (src);
33764 switch (GET_MODE (dst))
33771 enum machine_mode srcmode, dstmode;
33772 rtx (*pinsr)(rtx, rtx, rtx, rtx);
33774 srcmode = mode_for_size (size, MODE_INT, 0);
33779 if (!TARGET_SSE4_1)
33781 dstmode = V16QImode;
33782 pinsr = gen_sse4_1_pinsrb;
33788 dstmode = V8HImode;
33789 pinsr = gen_sse2_pinsrw;
33793 if (!TARGET_SSE4_1)
33795 dstmode = V4SImode;
33796 pinsr = gen_sse4_1_pinsrd;
33800 gcc_assert (TARGET_64BIT);
33801 if (!TARGET_SSE4_1)
33803 dstmode = V2DImode;
33804 pinsr = gen_sse4_1_pinsrq;
33811 dst = gen_lowpart (dstmode, dst);
33812 src = gen_lowpart (srcmode, src);
33816 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
33825 /* This function returns the calling abi specific va_list type node.
33826 It returns the FNDECL specific va_list type. */
33829 ix86_fn_abi_va_list (tree fndecl)
33832 return va_list_type_node;
33833 gcc_assert (fndecl != NULL_TREE);
33835 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
33836 return ms_va_list_type_node;
33838 return sysv_va_list_type_node;
33841 /* Returns the canonical va_list type specified by TYPE. If there
33842 is no valid TYPE provided, it return NULL_TREE. */
33845 ix86_canonical_va_list_type (tree type)
33849 /* Resolve references and pointers to va_list type. */
33850 if (TREE_CODE (type) == MEM_REF)
33851 type = TREE_TYPE (type);
33852 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
33853 type = TREE_TYPE (type);
33854 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
33855 type = TREE_TYPE (type);
33857 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
33859 wtype = va_list_type_node;
33860 gcc_assert (wtype != NULL_TREE);
33862 if (TREE_CODE (wtype) == ARRAY_TYPE)
33864 /* If va_list is an array type, the argument may have decayed
33865 to a pointer type, e.g. by being passed to another function.
33866 In that case, unwrap both types so that we can compare the
33867 underlying records. */
33868 if (TREE_CODE (htype) == ARRAY_TYPE
33869 || POINTER_TYPE_P (htype))
33871 wtype = TREE_TYPE (wtype);
33872 htype = TREE_TYPE (htype);
33875 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33876 return va_list_type_node;
33877 wtype = sysv_va_list_type_node;
33878 gcc_assert (wtype != NULL_TREE);
33880 if (TREE_CODE (wtype) == ARRAY_TYPE)
33882 /* If va_list is an array type, the argument may have decayed
33883 to a pointer type, e.g. by being passed to another function.
33884 In that case, unwrap both types so that we can compare the
33885 underlying records. */
33886 if (TREE_CODE (htype) == ARRAY_TYPE
33887 || POINTER_TYPE_P (htype))
33889 wtype = TREE_TYPE (wtype);
33890 htype = TREE_TYPE (htype);
33893 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33894 return sysv_va_list_type_node;
33895 wtype = ms_va_list_type_node;
33896 gcc_assert (wtype != NULL_TREE);
33898 if (TREE_CODE (wtype) == ARRAY_TYPE)
33900 /* If va_list is an array type, the argument may have decayed
33901 to a pointer type, e.g. by being passed to another function.
33902 In that case, unwrap both types so that we can compare the
33903 underlying records. */
33904 if (TREE_CODE (htype) == ARRAY_TYPE
33905 || POINTER_TYPE_P (htype))
33907 wtype = TREE_TYPE (wtype);
33908 htype = TREE_TYPE (htype);
33911 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33912 return ms_va_list_type_node;
33915 return std_canonical_va_list_type (type);
33918 /* Iterate through the target-specific builtin types for va_list.
33919 IDX denotes the iterator, *PTREE is set to the result type of
33920 the va_list builtin, and *PNAME to its internal type.
33921 Returns zero if there is no element for this index, otherwise
33922 IDX should be increased upon the next call.
33923 Note, do not iterate a base builtin's name like __builtin_va_list.
33924 Used from c_common_nodes_and_builtins. */
33927 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
33937 *ptree = ms_va_list_type_node;
33938 *pname = "__builtin_ms_va_list";
33942 *ptree = sysv_va_list_type_node;
33943 *pname = "__builtin_sysv_va_list";
33951 #undef TARGET_SCHED_DISPATCH
33952 #define TARGET_SCHED_DISPATCH has_dispatch
33953 #undef TARGET_SCHED_DISPATCH_DO
33954 #define TARGET_SCHED_DISPATCH_DO do_dispatch
33956 /* The size of the dispatch window is the total number of bytes of
33957 object code allowed in a window. */
33958 #define DISPATCH_WINDOW_SIZE 16
33960 /* Number of dispatch windows considered for scheduling. */
33961 #define MAX_DISPATCH_WINDOWS 3
33963 /* Maximum number of instructions in a window. */
33966 /* Maximum number of immediate operands in a window. */
33969 /* Maximum number of immediate bits allowed in a window. */
33970 #define MAX_IMM_SIZE 128
33972 /* Maximum number of 32 bit immediates allowed in a window. */
33973 #define MAX_IMM_32 4
33975 /* Maximum number of 64 bit immediates allowed in a window. */
33976 #define MAX_IMM_64 2
33978 /* Maximum total of loads or prefetches allowed in a window. */
33981 /* Maximum total of stores allowed in a window. */
33982 #define MAX_STORE 1
33988 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
33989 enum dispatch_group {
34004 /* Number of allowable groups in a dispatch window. It is an array
34005 indexed by dispatch_group enum. 100 is used as a big number,
34006 because the number of these kind of operations does not have any
34007 effect in dispatch window, but we need them for other reasons in
34009 static unsigned int num_allowable_groups[disp_last] = {
34010 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
34013 char group_name[disp_last + 1][16] = {
34014 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
34015 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
34016 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
34019 /* Instruction path. */
34022 path_single, /* Single micro op. */
34023 path_double, /* Double micro op. */
34024 path_multi, /* Instructions with more than 2 micro op.. */
34028 /* sched_insn_info defines a window to the instructions scheduled in
34029 the basic block. It contains a pointer to the insn_info table and
34030 the instruction scheduled.
34032 Windows are allocated for each basic block and are linked
34034 typedef struct sched_insn_info_s {
34036 enum dispatch_group group;
34037 enum insn_path path;
34042 /* Linked list of dispatch windows. This is a two way list of
34043 dispatch windows of a basic block. It contains information about
34044 the number of uops in the window and the total number of
34045 instructions and of bytes in the object code for this dispatch
34047 typedef struct dispatch_windows_s {
34048 int num_insn; /* Number of insn in the window. */
34049 int num_uops; /* Number of uops in the window. */
34050 int window_size; /* Number of bytes in the window. */
34051 int window_num; /* Window number between 0 or 1. */
34052 int num_imm; /* Number of immediates in an insn. */
34053 int num_imm_32; /* Number of 32 bit immediates in an insn. */
34054 int num_imm_64; /* Number of 64 bit immediates in an insn. */
34055 int imm_size; /* Total immediates in the window. */
34056 int num_loads; /* Total memory loads in the window. */
34057 int num_stores; /* Total memory stores in the window. */
34058 int violation; /* Violation exists in window. */
34059 sched_insn_info *window; /* Pointer to the window. */
34060 struct dispatch_windows_s *next;
34061 struct dispatch_windows_s *prev;
34062 } dispatch_windows;
34064 /* Immediate valuse used in an insn. */
34065 typedef struct imm_info_s
34072 static dispatch_windows *dispatch_window_list;
34073 static dispatch_windows *dispatch_window_list1;
34075 /* Get dispatch group of insn. */
34077 static enum dispatch_group
34078 get_mem_group (rtx insn)
34080 enum attr_memory memory;
34082 if (INSN_CODE (insn) < 0)
34083 return disp_no_group;
34084 memory = get_attr_memory (insn);
34085 if (memory == MEMORY_STORE)
34088 if (memory == MEMORY_LOAD)
34091 if (memory == MEMORY_BOTH)
34092 return disp_load_store;
34094 return disp_no_group;
34097 /* Return true if insn is a compare instruction. */
34102 enum attr_type type;
34104 type = get_attr_type (insn);
34105 return (type == TYPE_TEST
34106 || type == TYPE_ICMP
34107 || type == TYPE_FCMP
34108 || GET_CODE (PATTERN (insn)) == COMPARE);
34111 /* Return true if a dispatch violation encountered. */
34114 dispatch_violation (void)
34116 if (dispatch_window_list->next)
34117 return dispatch_window_list->next->violation;
34118 return dispatch_window_list->violation;
34121 /* Return true if insn is a branch instruction. */
34124 is_branch (rtx insn)
34126 return (CALL_P (insn) || JUMP_P (insn));
34129 /* Return true if insn is a prefetch instruction. */
34132 is_prefetch (rtx insn)
34134 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
34137 /* This function initializes a dispatch window and the list container holding a
34138 pointer to the window. */
34141 init_window (int window_num)
34144 dispatch_windows *new_list;
34146 if (window_num == 0)
34147 new_list = dispatch_window_list;
34149 new_list = dispatch_window_list1;
34151 new_list->num_insn = 0;
34152 new_list->num_uops = 0;
34153 new_list->window_size = 0;
34154 new_list->next = NULL;
34155 new_list->prev = NULL;
34156 new_list->window_num = window_num;
34157 new_list->num_imm = 0;
34158 new_list->num_imm_32 = 0;
34159 new_list->num_imm_64 = 0;
34160 new_list->imm_size = 0;
34161 new_list->num_loads = 0;
34162 new_list->num_stores = 0;
34163 new_list->violation = false;
34165 for (i = 0; i < MAX_INSN; i++)
34167 new_list->window[i].insn = NULL;
34168 new_list->window[i].group = disp_no_group;
34169 new_list->window[i].path = no_path;
34170 new_list->window[i].byte_len = 0;
34171 new_list->window[i].imm_bytes = 0;
34176 /* This function allocates and initializes a dispatch window and the
34177 list container holding a pointer to the window. */
34179 static dispatch_windows *
34180 allocate_window (void)
34182 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
34183 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
34188 /* This routine initializes the dispatch scheduling information. It
34189 initiates building dispatch scheduler tables and constructs the
34190 first dispatch window. */
34193 init_dispatch_sched (void)
34195 /* Allocate a dispatch list and a window. */
34196 dispatch_window_list = allocate_window ();
34197 dispatch_window_list1 = allocate_window ();
34202 /* This function returns true if a branch is detected. End of a basic block
34203 does not have to be a branch, but here we assume only branches end a
34207 is_end_basic_block (enum dispatch_group group)
34209 return group == disp_branch;
34212 /* This function is called when the end of a window processing is reached. */
34215 process_end_window (void)
34217 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
34218 if (dispatch_window_list->next)
34220 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
34221 gcc_assert (dispatch_window_list->window_size
34222 + dispatch_window_list1->window_size <= 48);
34228 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
34229 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
34230 for 48 bytes of instructions. Note that these windows are not dispatch
34231 windows that their sizes are DISPATCH_WINDOW_SIZE. */
34233 static dispatch_windows *
34234 allocate_next_window (int window_num)
34236 if (window_num == 0)
34238 if (dispatch_window_list->next)
34241 return dispatch_window_list;
34244 dispatch_window_list->next = dispatch_window_list1;
34245 dispatch_window_list1->prev = dispatch_window_list;
34247 return dispatch_window_list1;
34250 /* Increment the number of immediate operands of an instruction. */
34253 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
34258 switch ( GET_CODE (*in_rtx))
34263 (imm_values->imm)++;
34264 if (x86_64_immediate_operand (*in_rtx, SImode))
34265 (imm_values->imm32)++;
34267 (imm_values->imm64)++;
34271 (imm_values->imm)++;
34272 (imm_values->imm64)++;
34276 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
34278 (imm_values->imm)++;
34279 (imm_values->imm32)++;
34290 /* Compute number of immediate operands of an instruction. */
34293 find_constant (rtx in_rtx, imm_info *imm_values)
34295 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
34296 (rtx_function) find_constant_1, (void *) imm_values);
34299 /* Return total size of immediate operands of an instruction along with number
34300 of corresponding immediate-operands. It initializes its parameters to zero
34301 befor calling FIND_CONSTANT.
34302 INSN is the input instruction. IMM is the total of immediates.
34303 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34307 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
34309 imm_info imm_values = {0, 0, 0};
34311 find_constant (insn, &imm_values);
34312 *imm = imm_values.imm;
34313 *imm32 = imm_values.imm32;
34314 *imm64 = imm_values.imm64;
34315 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
34318 /* This function indicates if an operand of an instruction is an
34322 has_immediate (rtx insn)
34324 int num_imm_operand;
34325 int num_imm32_operand;
34326 int num_imm64_operand;
34329 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34330 &num_imm64_operand);
34334 /* Return single or double path for instructions. */
34336 static enum insn_path
34337 get_insn_path (rtx insn)
34339 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
34341 if ((int)path == 0)
34342 return path_single;
34344 if ((int)path == 1)
34345 return path_double;
34350 /* Return insn dispatch group. */
34352 static enum dispatch_group
34353 get_insn_group (rtx insn)
34355 enum dispatch_group group = get_mem_group (insn);
34359 if (is_branch (insn))
34360 return disp_branch;
34365 if (has_immediate (insn))
34368 if (is_prefetch (insn))
34369 return disp_prefetch;
34371 return disp_no_group;
34374 /* Count number of GROUP restricted instructions in a dispatch
34375 window WINDOW_LIST. */
34378 count_num_restricted (rtx insn, dispatch_windows *window_list)
34380 enum dispatch_group group = get_insn_group (insn);
34382 int num_imm_operand;
34383 int num_imm32_operand;
34384 int num_imm64_operand;
34386 if (group == disp_no_group)
34389 if (group == disp_imm)
34391 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34392 &num_imm64_operand);
34393 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
34394 || num_imm_operand + window_list->num_imm > MAX_IMM
34395 || (num_imm32_operand > 0
34396 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
34397 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
34398 || (num_imm64_operand > 0
34399 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
34400 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
34401 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
34402 && num_imm64_operand > 0
34403 && ((window_list->num_imm_64 > 0
34404 && window_list->num_insn >= 2)
34405 || window_list->num_insn >= 3)))
34411 if ((group == disp_load_store
34412 && (window_list->num_loads >= MAX_LOAD
34413 || window_list->num_stores >= MAX_STORE))
34414 || ((group == disp_load
34415 || group == disp_prefetch)
34416 && window_list->num_loads >= MAX_LOAD)
34417 || (group == disp_store
34418 && window_list->num_stores >= MAX_STORE))
34424 /* This function returns true if insn satisfies dispatch rules on the
34425 last window scheduled. */
34428 fits_dispatch_window (rtx insn)
34430 dispatch_windows *window_list = dispatch_window_list;
34431 dispatch_windows *window_list_next = dispatch_window_list->next;
34432 unsigned int num_restrict;
34433 enum dispatch_group group = get_insn_group (insn);
34434 enum insn_path path = get_insn_path (insn);
34437 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34438 instructions should be given the lowest priority in the
34439 scheduling process in Haifa scheduler to make sure they will be
34440 scheduled in the same dispatch window as the refrence to them. */
34441 if (group == disp_jcc || group == disp_cmp)
34444 /* Check nonrestricted. */
34445 if (group == disp_no_group || group == disp_branch)
34448 /* Get last dispatch window. */
34449 if (window_list_next)
34450 window_list = window_list_next;
34452 if (window_list->window_num == 1)
34454 sum = window_list->prev->window_size + window_list->window_size;
34457 || (min_insn_size (insn) + sum) >= 48)
34458 /* Window 1 is full. Go for next window. */
34462 num_restrict = count_num_restricted (insn, window_list);
34464 if (num_restrict > num_allowable_groups[group])
34467 /* See if it fits in the first window. */
34468 if (window_list->window_num == 0)
34470 /* The first widow should have only single and double path
34472 if (path == path_double
34473 && (window_list->num_uops + 2) > MAX_INSN)
34475 else if (path != path_single)
34481 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34482 dispatch window WINDOW_LIST. */
34485 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
34487 int byte_len = min_insn_size (insn);
34488 int num_insn = window_list->num_insn;
34490 sched_insn_info *window = window_list->window;
34491 enum dispatch_group group = get_insn_group (insn);
34492 enum insn_path path = get_insn_path (insn);
34493 int num_imm_operand;
34494 int num_imm32_operand;
34495 int num_imm64_operand;
34497 if (!window_list->violation && group != disp_cmp
34498 && !fits_dispatch_window (insn))
34499 window_list->violation = true;
34501 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34502 &num_imm64_operand);
34504 /* Initialize window with new instruction. */
34505 window[num_insn].insn = insn;
34506 window[num_insn].byte_len = byte_len;
34507 window[num_insn].group = group;
34508 window[num_insn].path = path;
34509 window[num_insn].imm_bytes = imm_size;
34511 window_list->window_size += byte_len;
34512 window_list->num_insn = num_insn + 1;
34513 window_list->num_uops = window_list->num_uops + num_uops;
34514 window_list->imm_size += imm_size;
34515 window_list->num_imm += num_imm_operand;
34516 window_list->num_imm_32 += num_imm32_operand;
34517 window_list->num_imm_64 += num_imm64_operand;
34519 if (group == disp_store)
34520 window_list->num_stores += 1;
34521 else if (group == disp_load
34522 || group == disp_prefetch)
34523 window_list->num_loads += 1;
34524 else if (group == disp_load_store)
34526 window_list->num_stores += 1;
34527 window_list->num_loads += 1;
34531 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34532 If the total bytes of instructions or the number of instructions in
34533 the window exceed allowable, it allocates a new window. */
34536 add_to_dispatch_window (rtx insn)
34539 dispatch_windows *window_list;
34540 dispatch_windows *next_list;
34541 dispatch_windows *window0_list;
34542 enum insn_path path;
34543 enum dispatch_group insn_group;
34551 if (INSN_CODE (insn) < 0)
34554 byte_len = min_insn_size (insn);
34555 window_list = dispatch_window_list;
34556 next_list = window_list->next;
34557 path = get_insn_path (insn);
34558 insn_group = get_insn_group (insn);
34560 /* Get the last dispatch window. */
34562 window_list = dispatch_window_list->next;
34564 if (path == path_single)
34566 else if (path == path_double)
34569 insn_num_uops = (int) path;
34571 /* If current window is full, get a new window.
34572 Window number zero is full, if MAX_INSN uops are scheduled in it.
34573 Window number one is full, if window zero's bytes plus window
34574 one's bytes is 32, or if the bytes of the new instruction added
34575 to the total makes it greater than 48, or it has already MAX_INSN
34576 instructions in it. */
34577 num_insn = window_list->num_insn;
34578 num_uops = window_list->num_uops;
34579 window_num = window_list->window_num;
34580 insn_fits = fits_dispatch_window (insn);
34582 if (num_insn >= MAX_INSN
34583 || num_uops + insn_num_uops > MAX_INSN
34586 window_num = ~window_num & 1;
34587 window_list = allocate_next_window (window_num);
34590 if (window_num == 0)
34592 add_insn_window (insn, window_list, insn_num_uops);
34593 if (window_list->num_insn >= MAX_INSN
34594 && insn_group == disp_branch)
34596 process_end_window ();
34600 else if (window_num == 1)
34602 window0_list = window_list->prev;
34603 sum = window0_list->window_size + window_list->window_size;
34605 || (byte_len + sum) >= 48)
34607 process_end_window ();
34608 window_list = dispatch_window_list;
34611 add_insn_window (insn, window_list, insn_num_uops);
34614 gcc_unreachable ();
34616 if (is_end_basic_block (insn_group))
34618 /* End of basic block is reached do end-basic-block process. */
34619 process_end_window ();
34624 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34626 DEBUG_FUNCTION static void
34627 debug_dispatch_window_file (FILE *file, int window_num)
34629 dispatch_windows *list;
34632 if (window_num == 0)
34633 list = dispatch_window_list;
34635 list = dispatch_window_list1;
34637 fprintf (file, "Window #%d:\n", list->window_num);
34638 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
34639 list->num_insn, list->num_uops, list->window_size);
34640 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34641 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
34643 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
34645 fprintf (file, " insn info:\n");
34647 for (i = 0; i < MAX_INSN; i++)
34649 if (!list->window[i].insn)
34651 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
34652 i, group_name[list->window[i].group],
34653 i, (void *)list->window[i].insn,
34654 i, list->window[i].path,
34655 i, list->window[i].byte_len,
34656 i, list->window[i].imm_bytes);
34660 /* Print to stdout a dispatch window. */
34662 DEBUG_FUNCTION void
34663 debug_dispatch_window (int window_num)
34665 debug_dispatch_window_file (stdout, window_num);
34668 /* Print INSN dispatch information to FILE. */
34670 DEBUG_FUNCTION static void
34671 debug_insn_dispatch_info_file (FILE *file, rtx insn)
34674 enum insn_path path;
34675 enum dispatch_group group;
34677 int num_imm_operand;
34678 int num_imm32_operand;
34679 int num_imm64_operand;
34681 if (INSN_CODE (insn) < 0)
34684 byte_len = min_insn_size (insn);
34685 path = get_insn_path (insn);
34686 group = get_insn_group (insn);
34687 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34688 &num_imm64_operand);
34690 fprintf (file, " insn info:\n");
34691 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
34692 group_name[group], path, byte_len);
34693 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34694 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
34697 /* Print to STDERR the status of the ready list with respect to
34698 dispatch windows. */
34700 DEBUG_FUNCTION void
34701 debug_ready_dispatch (void)
34704 int no_ready = number_in_ready ();
34706 fprintf (stdout, "Number of ready: %d\n", no_ready);
34708 for (i = 0; i < no_ready; i++)
34709 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
34712 /* This routine is the driver of the dispatch scheduler. */
34715 do_dispatch (rtx insn, int mode)
34717 if (mode == DISPATCH_INIT)
34718 init_dispatch_sched ();
34719 else if (mode == ADD_TO_DISPATCH_WINDOW)
34720 add_to_dispatch_window (insn);
34723 /* Return TRUE if Dispatch Scheduling is supported. */
34726 has_dispatch (rtx insn, int action)
34728 if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
34729 && flag_dispatch_scheduler)
34735 case IS_DISPATCH_ON:
34740 return is_cmp (insn);
34742 case DISPATCH_VIOLATION:
34743 return dispatch_violation ();
34745 case FITS_DISPATCH_WINDOW:
34746 return fits_dispatch_window (insn);
34752 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
34753 place emms and femms instructions. */
34755 static enum machine_mode
34756 ix86_preferred_simd_mode (enum machine_mode mode)
34773 if (TARGET_AVX && !TARGET_PREFER_AVX128)
34779 if (!TARGET_VECTORIZE_DOUBLE)
34781 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
34783 else if (TARGET_SSE2)
34792 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
34795 static unsigned int
34796 ix86_autovectorize_vector_sizes (void)
34798 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
34801 /* Initialize the GCC target structure. */
34802 #undef TARGET_RETURN_IN_MEMORY
34803 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
34805 #undef TARGET_LEGITIMIZE_ADDRESS
34806 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
34808 #undef TARGET_ATTRIBUTE_TABLE
34809 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
34810 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34811 # undef TARGET_MERGE_DECL_ATTRIBUTES
34812 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
34815 #undef TARGET_COMP_TYPE_ATTRIBUTES
34816 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
34818 #undef TARGET_INIT_BUILTINS
34819 #define TARGET_INIT_BUILTINS ix86_init_builtins
34820 #undef TARGET_BUILTIN_DECL
34821 #define TARGET_BUILTIN_DECL ix86_builtin_decl
34822 #undef TARGET_EXPAND_BUILTIN
34823 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
34825 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
34826 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
34827 ix86_builtin_vectorized_function
34829 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
34830 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
34832 #undef TARGET_BUILTIN_RECIPROCAL
34833 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
34835 #undef TARGET_ASM_FUNCTION_EPILOGUE
34836 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
34838 #undef TARGET_ENCODE_SECTION_INFO
34839 #ifndef SUBTARGET_ENCODE_SECTION_INFO
34840 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
34842 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
34845 #undef TARGET_ASM_OPEN_PAREN
34846 #define TARGET_ASM_OPEN_PAREN ""
34847 #undef TARGET_ASM_CLOSE_PAREN
34848 #define TARGET_ASM_CLOSE_PAREN ""
34850 #undef TARGET_ASM_BYTE_OP
34851 #define TARGET_ASM_BYTE_OP ASM_BYTE
34853 #undef TARGET_ASM_ALIGNED_HI_OP
34854 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
34855 #undef TARGET_ASM_ALIGNED_SI_OP
34856 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
34858 #undef TARGET_ASM_ALIGNED_DI_OP
34859 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
34862 #undef TARGET_PROFILE_BEFORE_PROLOGUE
34863 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
34865 #undef TARGET_ASM_UNALIGNED_HI_OP
34866 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
34867 #undef TARGET_ASM_UNALIGNED_SI_OP
34868 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
34869 #undef TARGET_ASM_UNALIGNED_DI_OP
34870 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
34872 #undef TARGET_PRINT_OPERAND
34873 #define TARGET_PRINT_OPERAND ix86_print_operand
34874 #undef TARGET_PRINT_OPERAND_ADDRESS
34875 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
34876 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
34877 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
34878 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
34879 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
34881 #undef TARGET_SCHED_INIT_GLOBAL
34882 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
34883 #undef TARGET_SCHED_ADJUST_COST
34884 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
34885 #undef TARGET_SCHED_ISSUE_RATE
34886 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
34887 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
34888 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
34889 ia32_multipass_dfa_lookahead
34891 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
34892 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
34895 #undef TARGET_HAVE_TLS
34896 #define TARGET_HAVE_TLS true
34898 #undef TARGET_CANNOT_FORCE_CONST_MEM
34899 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
34900 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
34901 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
34903 #undef TARGET_DELEGITIMIZE_ADDRESS
34904 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
34906 #undef TARGET_MS_BITFIELD_LAYOUT_P
34907 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
34910 #undef TARGET_BINDS_LOCAL_P
34911 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
34913 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34914 #undef TARGET_BINDS_LOCAL_P
34915 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
34918 #undef TARGET_ASM_OUTPUT_MI_THUNK
34919 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
34920 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
34921 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
34923 #undef TARGET_ASM_FILE_START
34924 #define TARGET_ASM_FILE_START x86_file_start
34926 #undef TARGET_OPTION_OVERRIDE
34927 #define TARGET_OPTION_OVERRIDE ix86_option_override
34929 #undef TARGET_REGISTER_MOVE_COST
34930 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
34931 #undef TARGET_MEMORY_MOVE_COST
34932 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
34933 #undef TARGET_RTX_COSTS
34934 #define TARGET_RTX_COSTS ix86_rtx_costs
34935 #undef TARGET_ADDRESS_COST
34936 #define TARGET_ADDRESS_COST ix86_address_cost
34938 #undef TARGET_FIXED_CONDITION_CODE_REGS
34939 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
34940 #undef TARGET_CC_MODES_COMPATIBLE
34941 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
34943 #undef TARGET_MACHINE_DEPENDENT_REORG
34944 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
34946 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
34947 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
34949 #undef TARGET_BUILD_BUILTIN_VA_LIST
34950 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
34952 #undef TARGET_ENUM_VA_LIST_P
34953 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
34955 #undef TARGET_FN_ABI_VA_LIST
34956 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
34958 #undef TARGET_CANONICAL_VA_LIST_TYPE
34959 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
34961 #undef TARGET_EXPAND_BUILTIN_VA_START
34962 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
34964 #undef TARGET_MD_ASM_CLOBBERS
34965 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
34967 #undef TARGET_PROMOTE_PROTOTYPES
34968 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
34969 #undef TARGET_STRUCT_VALUE_RTX
34970 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
34971 #undef TARGET_SETUP_INCOMING_VARARGS
34972 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
34973 #undef TARGET_MUST_PASS_IN_STACK
34974 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
34975 #undef TARGET_FUNCTION_ARG_ADVANCE
34976 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
34977 #undef TARGET_FUNCTION_ARG
34978 #define TARGET_FUNCTION_ARG ix86_function_arg
34979 #undef TARGET_FUNCTION_ARG_BOUNDARY
34980 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
34981 #undef TARGET_PASS_BY_REFERENCE
34982 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
34983 #undef TARGET_INTERNAL_ARG_POINTER
34984 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
34985 #undef TARGET_UPDATE_STACK_BOUNDARY
34986 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
34987 #undef TARGET_GET_DRAP_RTX
34988 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
34989 #undef TARGET_STRICT_ARGUMENT_NAMING
34990 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
34991 #undef TARGET_STATIC_CHAIN
34992 #define TARGET_STATIC_CHAIN ix86_static_chain
34993 #undef TARGET_TRAMPOLINE_INIT
34994 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
34995 #undef TARGET_RETURN_POPS_ARGS
34996 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
34998 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
34999 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
35001 #undef TARGET_SCALAR_MODE_SUPPORTED_P
35002 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
35004 #undef TARGET_VECTOR_MODE_SUPPORTED_P
35005 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
35007 #undef TARGET_C_MODE_FOR_SUFFIX
35008 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
35011 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
35012 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
35015 #ifdef SUBTARGET_INSERT_ATTRIBUTES
35016 #undef TARGET_INSERT_ATTRIBUTES
35017 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
35020 #undef TARGET_MANGLE_TYPE
35021 #define TARGET_MANGLE_TYPE ix86_mangle_type
35023 #ifndef TARGET_MACHO
35024 #undef TARGET_STACK_PROTECT_FAIL
35025 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
35028 #undef TARGET_FUNCTION_VALUE
35029 #define TARGET_FUNCTION_VALUE ix86_function_value
35031 #undef TARGET_FUNCTION_VALUE_REGNO_P
35032 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
35034 #undef TARGET_PROMOTE_FUNCTION_MODE
35035 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
35037 #undef TARGET_SECONDARY_RELOAD
35038 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
35040 #undef TARGET_CLASS_MAX_NREGS
35041 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
35043 #undef TARGET_PREFERRED_RELOAD_CLASS
35044 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
35045 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
35046 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
35047 #undef TARGET_CLASS_LIKELY_SPILLED_P
35048 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
35050 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
35051 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
35052 ix86_builtin_vectorization_cost
35053 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
35054 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
35055 ix86_vectorize_builtin_vec_perm
35056 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
35057 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
35058 ix86_vectorize_builtin_vec_perm_ok
35059 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
35060 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
35061 ix86_preferred_simd_mode
35062 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
35063 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
35064 ix86_autovectorize_vector_sizes
35066 #undef TARGET_SET_CURRENT_FUNCTION
35067 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
35069 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
35070 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
35072 #undef TARGET_OPTION_SAVE
35073 #define TARGET_OPTION_SAVE ix86_function_specific_save
35075 #undef TARGET_OPTION_RESTORE
35076 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
35078 #undef TARGET_OPTION_PRINT
35079 #define TARGET_OPTION_PRINT ix86_function_specific_print
35081 #undef TARGET_CAN_INLINE_P
35082 #define TARGET_CAN_INLINE_P ix86_can_inline_p
35084 #undef TARGET_EXPAND_TO_RTL_HOOK
35085 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
35087 #undef TARGET_LEGITIMATE_ADDRESS_P
35088 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
35090 #undef TARGET_LEGITIMATE_CONSTANT_P
35091 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
35093 #undef TARGET_FRAME_POINTER_REQUIRED
35094 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
35096 #undef TARGET_CAN_ELIMINATE
35097 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
35099 #undef TARGET_EXTRA_LIVE_ON_ENTRY
35100 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
35102 #undef TARGET_ASM_CODE_END
35103 #define TARGET_ASM_CODE_END ix86_code_end
35105 #undef TARGET_CONDITIONAL_REGISTER_USAGE
35106 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
35109 #undef TARGET_INIT_LIBFUNCS
35110 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
35113 struct gcc_target targetm = TARGET_INITIALIZER;
35115 #include "gt-i386.h"