1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 #include "sched-int.h"
62 #include "diagnostic.h"
64 enum upper_128bits_state
71 typedef struct block_info_def
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
78 /* TRUE if block has been processed. */
80 /* TRUE if block has been scanned. */
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev;
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88 enum call_avx256_state
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256 = -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256,
94 /* Callee passes 256bit AVX register. */
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
99 /* vzeroupper intrinsic. */
103 /* Check if a 256bit AVX register is referenced in stores. */
106 check_avx256_stores (rtx dest, const_rtx set, void *data)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
110 || (GET_CODE (set) == SET
111 && REG_P (SET_SRC (set))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
114 enum upper_128bits_state *state
115 = (enum upper_128bits_state *) data;
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
124 STATE is state of the upper 128bits of AVX registers at entry. */
127 move_or_delete_vzeroupper_2 (basic_block bb,
128 enum upper_128bits_state state)
131 rtx vzeroupper_insn = NULL_RTX;
136 if (BLOCK_INFO (bb)->unchanged)
139 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
142 BLOCK_INFO (bb)->state = state;
146 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
149 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
150 bb->index, BLOCK_INFO (bb)->state);
154 BLOCK_INFO (bb)->prev = state;
157 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
162 /* BB_END changes when it is deleted. */
163 bb_end = BB_END (bb);
165 while (insn != bb_end)
167 insn = NEXT_INSN (insn);
169 if (!NONDEBUG_INSN_P (insn))
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn) || CALL_P (insn))
175 if (!vzeroupper_insn)
178 if (PREV_INSN (insn) != vzeroupper_insn)
182 fprintf (dump_file, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file, PREV_INSN (insn));
184 fprintf (dump_file, "before:\n");
185 print_rtl_single (dump_file, insn);
187 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
190 vzeroupper_insn = NULL_RTX;
194 pat = PATTERN (insn);
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat) == UNSPEC_VOLATILE
198 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file, "Found vzeroupper:\n");
204 print_rtl_single (dump_file, insn);
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat) == PARALLEL
211 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
217 /* Delete pending vzeroupper insertion. */
220 delete_insn (vzeroupper_insn);
221 vzeroupper_insn = NULL_RTX;
224 else if (state != used)
226 note_stores (pat, check_avx256_stores, &state);
233 /* Process vzeroupper intrinsic. */
234 avx256 = INTVAL (XVECEXP (pat, 0, 0));
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256 == callee_return_avx256)
247 /* Remove unnecessary vzeroupper since upper 128bits are
251 fprintf (dump_file, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file, insn);
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
260 if (avx256 != callee_return_pass_avx256)
263 if (avx256 == callee_return_pass_avx256
264 || avx256 == callee_pass_avx256)
266 /* Must remove vzeroupper since callee passes in 256bit
270 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file, insn);
277 vzeroupper_insn = insn;
283 BLOCK_INFO (bb)->state = state;
284 BLOCK_INFO (bb)->unchanged = unchanged;
285 BLOCK_INFO (bb)->scanned = true;
288 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb->index, unchanged ? "unchanged" : "changed",
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
299 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
303 enum upper_128bits_state state, old_state, new_state;
307 fprintf (dump_file, " Process [bb %i]: status: %d\n",
308 block->index, BLOCK_INFO (block)->processed);
310 if (BLOCK_INFO (block)->processed)
315 /* Check all predecessor edges of this block. */
316 seen_unknown = false;
317 FOR_EACH_EDGE (e, ei, block->preds)
321 switch (BLOCK_INFO (e->src)->state)
324 if (!unknown_is_unused)
338 old_state = BLOCK_INFO (block)->state;
339 move_or_delete_vzeroupper_2 (block, state);
340 new_state = BLOCK_INFO (block)->state;
342 if (state != unknown || new_state == used)
343 BLOCK_INFO (block)->processed = true;
345 /* Need to rescan if the upper 128bits of AVX registers are changed
347 if (new_state != old_state)
349 if (new_state == used)
350 cfun->machine->rescan_vzeroupper_p = 1;
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
362 move_or_delete_vzeroupper (void)
367 fibheap_t worklist, pending, fibheap_swap;
368 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def));
376 /* Process outgoing edges of entry point. */
378 fprintf (dump_file, "Process outgoing edges of entry point\n");
380 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
382 move_or_delete_vzeroupper_2 (e->dest,
383 cfun->machine->caller_pass_avx256_p
385 BLOCK_INFO (e->dest)->processed = true;
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
391 bb_order = XNEWVEC (int, last_basic_block);
392 pre_and_rev_post_order_compute (NULL, rc_order, false);
393 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
394 bb_order[rc_order[i]] = i;
397 worklist = fibheap_new ();
398 pending = fibheap_new ();
399 visited = sbitmap_alloc (last_basic_block);
400 in_worklist = sbitmap_alloc (last_basic_block);
401 in_pending = sbitmap_alloc (last_basic_block);
402 sbitmap_zero (in_worklist);
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending);
407 if (BLOCK_INFO (bb)->processed)
408 RESET_BIT (in_pending, bb->index);
411 move_or_delete_vzeroupper_1 (bb, false);
412 fibheap_insert (pending, bb_order[bb->index], bb);
416 fprintf (dump_file, "Check remaining basic blocks\n");
418 while (!fibheap_empty (pending))
420 fibheap_swap = pending;
422 worklist = fibheap_swap;
423 sbitmap_swap = in_pending;
424 in_pending = in_worklist;
425 in_worklist = sbitmap_swap;
427 sbitmap_zero (visited);
429 cfun->machine->rescan_vzeroupper_p = 0;
431 while (!fibheap_empty (worklist))
433 bb = (basic_block) fibheap_extract_min (worklist);
434 RESET_BIT (in_worklist, bb->index);
435 gcc_assert (!TEST_BIT (visited, bb->index));
436 if (!TEST_BIT (visited, bb->index))
440 SET_BIT (visited, bb->index);
442 if (move_or_delete_vzeroupper_1 (bb, false))
443 FOR_EACH_EDGE (e, ei, bb->succs)
445 if (e->dest == EXIT_BLOCK_PTR
446 || BLOCK_INFO (e->dest)->processed)
449 if (TEST_BIT (visited, e->dest->index))
451 if (!TEST_BIT (in_pending, e->dest->index))
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending, e->dest->index);
455 fibheap_insert (pending,
456 bb_order[e->dest->index],
460 else if (!TEST_BIT (in_worklist, e->dest->index))
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist, e->dest->index);
464 fibheap_insert (worklist, bb_order[e->dest->index],
471 if (!cfun->machine->rescan_vzeroupper_p)
476 fibheap_delete (worklist);
477 fibheap_delete (pending);
478 sbitmap_free (visited);
479 sbitmap_free (in_worklist);
480 sbitmap_free (in_pending);
483 fprintf (dump_file, "Process remaining basic blocks\n");
486 move_or_delete_vzeroupper_1 (bb, true);
488 free_aux_for_blocks ();
491 static rtx legitimize_dllimport_symbol (rtx, bool);
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
512 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
566 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
567 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
581 /* Processor costs (relative to an add) */
583 struct processor_costs i386_cost = { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
636 DUMMY_STRINGOP_ALGS},
637 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs i486_cost = { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
708 DUMMY_STRINGOP_ALGS},
709 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
710 DUMMY_STRINGOP_ALGS},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
725 struct processor_costs pentium_cost = {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
778 DUMMY_STRINGOP_ALGS},
779 {{libcall, {{-1, rep_prefix_4_byte}}},
780 DUMMY_STRINGOP_ALGS},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
795 struct processor_costs pentiumpro_cost = {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
853 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
854 DUMMY_STRINGOP_ALGS},
855 {{rep_prefix_4_byte, {{1024, unrolled_loop},
856 {8192, rep_prefix_4_byte}, {-1, libcall}}},
857 DUMMY_STRINGOP_ALGS},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
872 struct processor_costs geode_cost = {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
926 DUMMY_STRINGOP_ALGS},
927 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
928 DUMMY_STRINGOP_ALGS},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
943 struct processor_costs k6_cost = {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
999 DUMMY_STRINGOP_ALGS},
1000 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1001 DUMMY_STRINGOP_ALGS},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1016 struct processor_costs athlon_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1072 DUMMY_STRINGOP_ALGS},
1073 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1074 DUMMY_STRINGOP_ALGS},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1089 struct processor_costs k8_cost = {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1150 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1151 {{libcall, {{8, loop}, {24, unrolled_loop},
1152 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1153 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1167 struct processor_costs amdfam10_cost = {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1212 MOVD reg64, xmmreg Double FADD 3
1214 MOVD reg32, xmmreg Double FADD 3
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1237 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1238 {{libcall, {{8, loop}, {24, unrolled_loop},
1239 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1240 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1254 struct processor_costs bdver1_cost = {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1299 MOVD reg64, xmmreg Double FADD 3
1301 MOVD reg32, xmmreg Double FADD 3
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1324 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1325 {{libcall, {{8, loop}, {24, unrolled_loop},
1326 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1327 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1341 struct processor_costs bdver2_cost = {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (4), /* SI */
1349 COSTS_N_INSNS (6), /* DI */
1350 COSTS_N_INSNS (6)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1361 4, /* cost for loading QImode using movzbl */
1362 {5, 5, 4}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {5, 5, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {4, 4}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 4}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1386 MOVD reg64, xmmreg Double FADD 3
1388 MOVD reg32, xmmreg Double FADD 3
1390 16, /* size of l1 cache. */
1391 2048, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 /* New AMD processors never drop prefetches; if they cannot be performed
1394 immediately, they are queued. We set number of simultaneous prefetches
1395 to a large constant to reflect this (it probably is not a good idea not
1396 to limit number of prefetches at all, as their execution also takes some
1398 100, /* number of parallel prefetches */
1399 2, /* Branch cost */
1400 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1401 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1402 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1403 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1404 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1405 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1407 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1408 very small blocks it is better to use loop. For large blocks, libcall
1409 can do nontemporary accesses and beat inline considerably. */
1410 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1411 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1412 {{libcall, {{8, loop}, {24, unrolled_loop},
1413 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1414 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1415 6, /* scalar_stmt_cost. */
1416 4, /* scalar load_cost. */
1417 4, /* scalar_store_cost. */
1418 6, /* vec_stmt_cost. */
1419 0, /* vec_to_scalar_cost. */
1420 2, /* scalar_to_vec_cost. */
1421 4, /* vec_align_load_cost. */
1422 4, /* vec_unalign_load_cost. */
1423 4, /* vec_store_cost. */
1424 2, /* cond_taken_branch_cost. */
1425 1, /* cond_not_taken_branch_cost. */
1428 struct processor_costs btver1_cost = {
1429 COSTS_N_INSNS (1), /* cost of an add instruction */
1430 COSTS_N_INSNS (2), /* cost of a lea instruction */
1431 COSTS_N_INSNS (1), /* variable shift costs */
1432 COSTS_N_INSNS (1), /* constant shift costs */
1433 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1434 COSTS_N_INSNS (4), /* HI */
1435 COSTS_N_INSNS (3), /* SI */
1436 COSTS_N_INSNS (4), /* DI */
1437 COSTS_N_INSNS (5)}, /* other */
1438 0, /* cost of multiply per each bit set */
1439 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1440 COSTS_N_INSNS (35), /* HI */
1441 COSTS_N_INSNS (51), /* SI */
1442 COSTS_N_INSNS (83), /* DI */
1443 COSTS_N_INSNS (83)}, /* other */
1444 COSTS_N_INSNS (1), /* cost of movsx */
1445 COSTS_N_INSNS (1), /* cost of movzx */
1446 8, /* "large" insn */
1448 4, /* cost for loading QImode using movzbl */
1449 {3, 4, 3}, /* cost of loading integer registers
1450 in QImode, HImode and SImode.
1451 Relative to reg-reg move (2). */
1452 {3, 4, 3}, /* cost of storing integer registers */
1453 4, /* cost of reg,reg fld/fst */
1454 {4, 4, 12}, /* cost of loading fp registers
1455 in SFmode, DFmode and XFmode */
1456 {6, 6, 8}, /* cost of storing fp registers
1457 in SFmode, DFmode and XFmode */
1458 2, /* cost of moving MMX register */
1459 {3, 3}, /* cost of loading MMX registers
1460 in SImode and DImode */
1461 {4, 4}, /* cost of storing MMX registers
1462 in SImode and DImode */
1463 2, /* cost of moving SSE register */
1464 {4, 4, 3}, /* cost of loading SSE registers
1465 in SImode, DImode and TImode */
1466 {4, 4, 5}, /* cost of storing SSE registers
1467 in SImode, DImode and TImode */
1468 3, /* MMX or SSE register to integer */
1470 MOVD reg64, xmmreg Double FSTORE 4
1471 MOVD reg32, xmmreg Double FSTORE 4
1473 MOVD reg64, xmmreg Double FADD 3
1475 MOVD reg32, xmmreg Double FADD 3
1477 32, /* size of l1 cache. */
1478 512, /* size of l2 cache. */
1479 64, /* size of prefetch block */
1480 100, /* number of parallel prefetches */
1481 2, /* Branch cost */
1482 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1483 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1484 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1485 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1486 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1487 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1489 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1490 very small blocks it is better to use loop. For large blocks, libcall can
1491 do nontemporary accesses and beat inline considerably. */
1492 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1493 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1494 {{libcall, {{8, loop}, {24, unrolled_loop},
1495 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1496 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1497 4, /* scalar_stmt_cost. */
1498 2, /* scalar load_cost. */
1499 2, /* scalar_store_cost. */
1500 6, /* vec_stmt_cost. */
1501 0, /* vec_to_scalar_cost. */
1502 2, /* scalar_to_vec_cost. */
1503 2, /* vec_align_load_cost. */
1504 2, /* vec_unalign_load_cost. */
1505 2, /* vec_store_cost. */
1506 2, /* cond_taken_branch_cost. */
1507 1, /* cond_not_taken_branch_cost. */
1511 struct processor_costs pentium4_cost = {
1512 COSTS_N_INSNS (1), /* cost of an add instruction */
1513 COSTS_N_INSNS (3), /* cost of a lea instruction */
1514 COSTS_N_INSNS (4), /* variable shift costs */
1515 COSTS_N_INSNS (4), /* constant shift costs */
1516 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1517 COSTS_N_INSNS (15), /* HI */
1518 COSTS_N_INSNS (15), /* SI */
1519 COSTS_N_INSNS (15), /* DI */
1520 COSTS_N_INSNS (15)}, /* other */
1521 0, /* cost of multiply per each bit set */
1522 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1523 COSTS_N_INSNS (56), /* HI */
1524 COSTS_N_INSNS (56), /* SI */
1525 COSTS_N_INSNS (56), /* DI */
1526 COSTS_N_INSNS (56)}, /* other */
1527 COSTS_N_INSNS (1), /* cost of movsx */
1528 COSTS_N_INSNS (1), /* cost of movzx */
1529 16, /* "large" insn */
1531 2, /* cost for loading QImode using movzbl */
1532 {4, 5, 4}, /* cost of loading integer registers
1533 in QImode, HImode and SImode.
1534 Relative to reg-reg move (2). */
1535 {2, 3, 2}, /* cost of storing integer registers */
1536 2, /* cost of reg,reg fld/fst */
1537 {2, 2, 6}, /* cost of loading fp registers
1538 in SFmode, DFmode and XFmode */
1539 {4, 4, 6}, /* cost of storing fp registers
1540 in SFmode, DFmode and XFmode */
1541 2, /* cost of moving MMX register */
1542 {2, 2}, /* cost of loading MMX registers
1543 in SImode and DImode */
1544 {2, 2}, /* cost of storing MMX registers
1545 in SImode and DImode */
1546 12, /* cost of moving SSE register */
1547 {12, 12, 12}, /* cost of loading SSE registers
1548 in SImode, DImode and TImode */
1549 {2, 2, 8}, /* cost of storing SSE registers
1550 in SImode, DImode and TImode */
1551 10, /* MMX or SSE register to integer */
1552 8, /* size of l1 cache. */
1553 256, /* size of l2 cache. */
1554 64, /* size of prefetch block */
1555 6, /* number of parallel prefetches */
1556 2, /* Branch cost */
1557 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1558 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1559 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1562 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1563 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1564 DUMMY_STRINGOP_ALGS},
1565 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1567 DUMMY_STRINGOP_ALGS},
1568 1, /* scalar_stmt_cost. */
1569 1, /* scalar load_cost. */
1570 1, /* scalar_store_cost. */
1571 1, /* vec_stmt_cost. */
1572 1, /* vec_to_scalar_cost. */
1573 1, /* scalar_to_vec_cost. */
1574 1, /* vec_align_load_cost. */
1575 2, /* vec_unalign_load_cost. */
1576 1, /* vec_store_cost. */
1577 3, /* cond_taken_branch_cost. */
1578 1, /* cond_not_taken_branch_cost. */
1582 struct processor_costs nocona_cost = {
1583 COSTS_N_INSNS (1), /* cost of an add instruction */
1584 COSTS_N_INSNS (1), /* cost of a lea instruction */
1585 COSTS_N_INSNS (1), /* variable shift costs */
1586 COSTS_N_INSNS (1), /* constant shift costs */
1587 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1588 COSTS_N_INSNS (10), /* HI */
1589 COSTS_N_INSNS (10), /* SI */
1590 COSTS_N_INSNS (10), /* DI */
1591 COSTS_N_INSNS (10)}, /* other */
1592 0, /* cost of multiply per each bit set */
1593 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1594 COSTS_N_INSNS (66), /* HI */
1595 COSTS_N_INSNS (66), /* SI */
1596 COSTS_N_INSNS (66), /* DI */
1597 COSTS_N_INSNS (66)}, /* other */
1598 COSTS_N_INSNS (1), /* cost of movsx */
1599 COSTS_N_INSNS (1), /* cost of movzx */
1600 16, /* "large" insn */
1601 17, /* MOVE_RATIO */
1602 4, /* cost for loading QImode using movzbl */
1603 {4, 4, 4}, /* cost of loading integer registers
1604 in QImode, HImode and SImode.
1605 Relative to reg-reg move (2). */
1606 {4, 4, 4}, /* cost of storing integer registers */
1607 3, /* cost of reg,reg fld/fst */
1608 {12, 12, 12}, /* cost of loading fp registers
1609 in SFmode, DFmode and XFmode */
1610 {4, 4, 4}, /* cost of storing fp registers
1611 in SFmode, DFmode and XFmode */
1612 6, /* cost of moving MMX register */
1613 {12, 12}, /* cost of loading MMX registers
1614 in SImode and DImode */
1615 {12, 12}, /* cost of storing MMX registers
1616 in SImode and DImode */
1617 6, /* cost of moving SSE register */
1618 {12, 12, 12}, /* cost of loading SSE registers
1619 in SImode, DImode and TImode */
1620 {12, 12, 12}, /* cost of storing SSE registers
1621 in SImode, DImode and TImode */
1622 8, /* MMX or SSE register to integer */
1623 8, /* size of l1 cache. */
1624 1024, /* size of l2 cache. */
1625 128, /* size of prefetch block */
1626 8, /* number of parallel prefetches */
1627 1, /* Branch cost */
1628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1629 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1630 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1633 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1634 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1635 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1636 {100000, unrolled_loop}, {-1, libcall}}}},
1637 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1639 {libcall, {{24, loop}, {64, unrolled_loop},
1640 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1641 1, /* scalar_stmt_cost. */
1642 1, /* scalar load_cost. */
1643 1, /* scalar_store_cost. */
1644 1, /* vec_stmt_cost. */
1645 1, /* vec_to_scalar_cost. */
1646 1, /* scalar_to_vec_cost. */
1647 1, /* vec_align_load_cost. */
1648 2, /* vec_unalign_load_cost. */
1649 1, /* vec_store_cost. */
1650 3, /* cond_taken_branch_cost. */
1651 1, /* cond_not_taken_branch_cost. */
1655 struct processor_costs atom_cost = {
1656 COSTS_N_INSNS (1), /* cost of an add instruction */
1657 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1658 COSTS_N_INSNS (1), /* variable shift costs */
1659 COSTS_N_INSNS (1), /* constant shift costs */
1660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1661 COSTS_N_INSNS (4), /* HI */
1662 COSTS_N_INSNS (3), /* SI */
1663 COSTS_N_INSNS (4), /* DI */
1664 COSTS_N_INSNS (2)}, /* other */
1665 0, /* cost of multiply per each bit set */
1666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1667 COSTS_N_INSNS (26), /* HI */
1668 COSTS_N_INSNS (42), /* SI */
1669 COSTS_N_INSNS (74), /* DI */
1670 COSTS_N_INSNS (74)}, /* other */
1671 COSTS_N_INSNS (1), /* cost of movsx */
1672 COSTS_N_INSNS (1), /* cost of movzx */
1673 8, /* "large" insn */
1674 17, /* MOVE_RATIO */
1675 2, /* cost for loading QImode using movzbl */
1676 {4, 4, 4}, /* cost of loading integer registers
1677 in QImode, HImode and SImode.
1678 Relative to reg-reg move (2). */
1679 {4, 4, 4}, /* cost of storing integer registers */
1680 4, /* cost of reg,reg fld/fst */
1681 {12, 12, 12}, /* cost of loading fp registers
1682 in SFmode, DFmode and XFmode */
1683 {6, 6, 8}, /* cost of storing fp registers
1684 in SFmode, DFmode and XFmode */
1685 2, /* cost of moving MMX register */
1686 {8, 8}, /* cost of loading MMX registers
1687 in SImode and DImode */
1688 {8, 8}, /* cost of storing MMX registers
1689 in SImode and DImode */
1690 2, /* cost of moving SSE register */
1691 {8, 8, 8}, /* cost of loading SSE registers
1692 in SImode, DImode and TImode */
1693 {8, 8, 8}, /* cost of storing SSE registers
1694 in SImode, DImode and TImode */
1695 5, /* MMX or SSE register to integer */
1696 32, /* size of l1 cache. */
1697 256, /* size of l2 cache. */
1698 64, /* size of prefetch block */
1699 6, /* number of parallel prefetches */
1700 3, /* Branch cost */
1701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1702 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1703 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1704 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1705 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1706 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1707 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1708 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1709 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1710 {{libcall, {{8, loop}, {15, unrolled_loop},
1711 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1712 {libcall, {{24, loop}, {32, unrolled_loop},
1713 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1714 1, /* scalar_stmt_cost. */
1715 1, /* scalar load_cost. */
1716 1, /* scalar_store_cost. */
1717 1, /* vec_stmt_cost. */
1718 1, /* vec_to_scalar_cost. */
1719 1, /* scalar_to_vec_cost. */
1720 1, /* vec_align_load_cost. */
1721 2, /* vec_unalign_load_cost. */
1722 1, /* vec_store_cost. */
1723 3, /* cond_taken_branch_cost. */
1724 1, /* cond_not_taken_branch_cost. */
1727 /* Generic64 should produce code tuned for Nocona and K8. */
1729 struct processor_costs generic64_cost = {
1730 COSTS_N_INSNS (1), /* cost of an add instruction */
1731 /* On all chips taken into consideration lea is 2 cycles and more. With
1732 this cost however our current implementation of synth_mult results in
1733 use of unnecessary temporary registers causing regression on several
1734 SPECfp benchmarks. */
1735 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1736 COSTS_N_INSNS (1), /* variable shift costs */
1737 COSTS_N_INSNS (1), /* constant shift costs */
1738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1739 COSTS_N_INSNS (4), /* HI */
1740 COSTS_N_INSNS (3), /* SI */
1741 COSTS_N_INSNS (4), /* DI */
1742 COSTS_N_INSNS (2)}, /* other */
1743 0, /* cost of multiply per each bit set */
1744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1745 COSTS_N_INSNS (26), /* HI */
1746 COSTS_N_INSNS (42), /* SI */
1747 COSTS_N_INSNS (74), /* DI */
1748 COSTS_N_INSNS (74)}, /* other */
1749 COSTS_N_INSNS (1), /* cost of movsx */
1750 COSTS_N_INSNS (1), /* cost of movzx */
1751 8, /* "large" insn */
1752 17, /* MOVE_RATIO */
1753 4, /* cost for loading QImode using movzbl */
1754 {4, 4, 4}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {4, 4, 4}, /* cost of storing integer registers */
1758 4, /* cost of reg,reg fld/fst */
1759 {12, 12, 12}, /* cost of loading fp registers
1760 in SFmode, DFmode and XFmode */
1761 {6, 6, 8}, /* cost of storing fp registers
1762 in SFmode, DFmode and XFmode */
1763 2, /* cost of moving MMX register */
1764 {8, 8}, /* cost of loading MMX registers
1765 in SImode and DImode */
1766 {8, 8}, /* cost of storing MMX registers
1767 in SImode and DImode */
1768 2, /* cost of moving SSE register */
1769 {8, 8, 8}, /* cost of loading SSE registers
1770 in SImode, DImode and TImode */
1771 {8, 8, 8}, /* cost of storing SSE registers
1772 in SImode, DImode and TImode */
1773 5, /* MMX or SSE register to integer */
1774 32, /* size of l1 cache. */
1775 512, /* size of l2 cache. */
1776 64, /* size of prefetch block */
1777 6, /* number of parallel prefetches */
1778 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1779 value is increased to perhaps more appropriate value of 5. */
1780 3, /* Branch cost */
1781 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1782 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1783 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1784 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1785 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1786 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1787 {DUMMY_STRINGOP_ALGS,
1788 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1789 {DUMMY_STRINGOP_ALGS,
1790 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1791 1, /* scalar_stmt_cost. */
1792 1, /* scalar load_cost. */
1793 1, /* scalar_store_cost. */
1794 1, /* vec_stmt_cost. */
1795 1, /* vec_to_scalar_cost. */
1796 1, /* scalar_to_vec_cost. */
1797 1, /* vec_align_load_cost. */
1798 2, /* vec_unalign_load_cost. */
1799 1, /* vec_store_cost. */
1800 3, /* cond_taken_branch_cost. */
1801 1, /* cond_not_taken_branch_cost. */
1804 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1807 struct processor_costs generic32_cost = {
1808 COSTS_N_INSNS (1), /* cost of an add instruction */
1809 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1810 COSTS_N_INSNS (1), /* variable shift costs */
1811 COSTS_N_INSNS (1), /* constant shift costs */
1812 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1813 COSTS_N_INSNS (4), /* HI */
1814 COSTS_N_INSNS (3), /* SI */
1815 COSTS_N_INSNS (4), /* DI */
1816 COSTS_N_INSNS (2)}, /* other */
1817 0, /* cost of multiply per each bit set */
1818 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1819 COSTS_N_INSNS (26), /* HI */
1820 COSTS_N_INSNS (42), /* SI */
1821 COSTS_N_INSNS (74), /* DI */
1822 COSTS_N_INSNS (74)}, /* other */
1823 COSTS_N_INSNS (1), /* cost of movsx */
1824 COSTS_N_INSNS (1), /* cost of movzx */
1825 8, /* "large" insn */
1826 17, /* MOVE_RATIO */
1827 4, /* cost for loading QImode using movzbl */
1828 {4, 4, 4}, /* cost of loading integer registers
1829 in QImode, HImode and SImode.
1830 Relative to reg-reg move (2). */
1831 {4, 4, 4}, /* cost of storing integer registers */
1832 4, /* cost of reg,reg fld/fst */
1833 {12, 12, 12}, /* cost of loading fp registers
1834 in SFmode, DFmode and XFmode */
1835 {6, 6, 8}, /* cost of storing fp registers
1836 in SFmode, DFmode and XFmode */
1837 2, /* cost of moving MMX register */
1838 {8, 8}, /* cost of loading MMX registers
1839 in SImode and DImode */
1840 {8, 8}, /* cost of storing MMX registers
1841 in SImode and DImode */
1842 2, /* cost of moving SSE register */
1843 {8, 8, 8}, /* cost of loading SSE registers
1844 in SImode, DImode and TImode */
1845 {8, 8, 8}, /* cost of storing SSE registers
1846 in SImode, DImode and TImode */
1847 5, /* MMX or SSE register to integer */
1848 32, /* size of l1 cache. */
1849 256, /* size of l2 cache. */
1850 64, /* size of prefetch block */
1851 6, /* number of parallel prefetches */
1852 3, /* Branch cost */
1853 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1854 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1855 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1856 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1857 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1858 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1859 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1860 DUMMY_STRINGOP_ALGS},
1861 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1862 DUMMY_STRINGOP_ALGS},
1863 1, /* scalar_stmt_cost. */
1864 1, /* scalar load_cost. */
1865 1, /* scalar_store_cost. */
1866 1, /* vec_stmt_cost. */
1867 1, /* vec_to_scalar_cost. */
1868 1, /* scalar_to_vec_cost. */
1869 1, /* vec_align_load_cost. */
1870 2, /* vec_unalign_load_cost. */
1871 1, /* vec_store_cost. */
1872 3, /* cond_taken_branch_cost. */
1873 1, /* cond_not_taken_branch_cost. */
1876 const struct processor_costs *ix86_cost = &pentium_cost;
1878 /* Processor feature/optimization bitmasks. */
1879 #define m_386 (1<<PROCESSOR_I386)
1880 #define m_486 (1<<PROCESSOR_I486)
1881 #define m_PENT (1<<PROCESSOR_PENTIUM)
1882 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1883 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1884 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1885 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1886 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1887 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1888 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1889 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1890 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1891 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1892 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1893 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1894 #define m_ATOM (1<<PROCESSOR_ATOM)
1896 #define m_GEODE (1<<PROCESSOR_GEODE)
1897 #define m_K6 (1<<PROCESSOR_K6)
1898 #define m_K6_GEODE (m_K6 | m_GEODE)
1899 #define m_K8 (1<<PROCESSOR_K8)
1900 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1903 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1904 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1905 #define m_BDVER (m_BDVER1 | m_BDVER2)
1906 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1907 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1909 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1910 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1912 /* Generic instruction choice should be common subset of supported CPUs
1913 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1914 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1916 /* Feature tests against the various tunings. */
1917 unsigned char ix86_tune_features[X86_TUNE_LAST];
1919 /* Feature tests against the various tunings used to create ix86_tune_features
1920 based on the processor mask. */
1921 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1922 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1923 negatively, so enabling for Generic64 seems like good code size
1924 tradeoff. We can't enable it for 32bit generic because it does not
1925 work well with PPro base chips. */
1926 m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1928 /* X86_TUNE_PUSH_MEMORY */
1929 m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1931 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1934 /* X86_TUNE_UNROLL_STRLEN */
1935 m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1937 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1938 on simulation result. But after P4 was made, no performance benefit
1939 was observed with branch hints. It also increases the code size.
1940 As a result, icc never generates branch hints. */
1943 /* X86_TUNE_DOUBLE_WITH_ADD */
1946 /* X86_TUNE_USE_SAHF */
1947 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
1949 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1950 partial dependencies. */
1951 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1953 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1954 register stalls on Generic32 compilation setting as well. However
1955 in current implementation the partial register stalls are not eliminated
1956 very well - they can be introduced via subregs synthesized by combine
1957 and can happen in caller/callee saving sequences. Because this option
1958 pays back little on PPro based chips and is in conflict with partial reg
1959 dependencies used by Athlon/P4 based chips, it is better to leave it off
1960 for generic32 for now. */
1963 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1964 m_CORE2I7 | m_GENERIC,
1966 /* X86_TUNE_USE_HIMODE_FIOP */
1967 m_386 | m_486 | m_K6_GEODE,
1969 /* X86_TUNE_USE_SIMODE_FIOP */
1970 ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
1972 /* X86_TUNE_USE_MOV0 */
1975 /* X86_TUNE_USE_CLTD */
1976 ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
1978 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1981 /* X86_TUNE_SPLIT_LONG_MOVES */
1984 /* X86_TUNE_READ_MODIFY_WRITE */
1987 /* X86_TUNE_READ_MODIFY */
1990 /* X86_TUNE_PROMOTE_QIMODE */
1991 m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1993 /* X86_TUNE_FAST_PREFIX */
1994 ~(m_386 | m_486 | m_PENT),
1996 /* X86_TUNE_SINGLE_STRINGOP */
1997 m_386 | m_P4_NOCONA,
1999 /* X86_TUNE_QIMODE_MATH */
2002 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2003 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2004 might be considered for Generic32 if our scheme for avoiding partial
2005 stalls was more effective. */
2008 /* X86_TUNE_PROMOTE_QI_REGS */
2011 /* X86_TUNE_PROMOTE_HI_REGS */
2014 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2015 over esp addition. */
2016 m_386 | m_486 | m_PENT | m_PPRO,
2018 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2019 over esp addition. */
2022 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2023 over esp subtraction. */
2024 m_386 | m_486 | m_PENT | m_K6_GEODE,
2026 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2027 over esp subtraction. */
2028 m_PENT | m_K6_GEODE,
2030 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2031 for DFmode copies */
2032 ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
2034 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2035 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2037 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2038 conflict here in between PPro/Pentium4 based chips that thread 128bit
2039 SSE registers as single units versus K8 based chips that divide SSE
2040 registers to two 64bit halves. This knob promotes all store destinations
2041 to be 128bit to allow register renaming on 128bit SSE units, but usually
2042 results in one extra microop on 64bit SSE units. Experimental results
2043 shows that disabling this option on P4 brings over 20% SPECfp regression,
2044 while enabling it on K8 brings roughly 2.4% regression that can be partly
2045 masked by careful scheduling of moves. */
2046 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
2048 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2049 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
2051 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2054 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2057 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2058 are resolved on SSE register parts instead of whole registers, so we may
2059 maintain just lower part of scalar values in proper format leaving the
2060 upper part undefined. */
2063 /* X86_TUNE_SSE_TYPELESS_STORES */
2066 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2067 m_PPRO | m_P4_NOCONA,
2069 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2070 m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2072 /* X86_TUNE_PROLOGUE_USING_MOVE */
2073 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2075 /* X86_TUNE_EPILOGUE_USING_MOVE */
2076 m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
2078 /* X86_TUNE_SHIFT1 */
2081 /* X86_TUNE_USE_FFREEP */
2084 /* X86_TUNE_INTER_UNIT_MOVES */
2085 ~(m_AMD_MULTIPLE | m_GENERIC),
2087 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2088 ~(m_AMDFAM10 | m_BDVER ),
2090 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2091 than 4 branch instructions in the 16 byte window. */
2092 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2094 /* X86_TUNE_SCHEDULE */
2095 m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2097 /* X86_TUNE_USE_BT */
2098 m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
2100 /* X86_TUNE_USE_INCDEC */
2101 ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
2103 /* X86_TUNE_PAD_RETURNS */
2104 m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
2106 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2109 /* X86_TUNE_EXT_80387_CONSTANTS */
2110 m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2112 /* X86_TUNE_SHORTEN_X87_SSE */
2115 /* X86_TUNE_AVOID_VECTOR_DECODE */
2116 m_CORE2I7_64 | m_K8 | m_GENERIC64,
2118 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2119 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2122 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2123 vector path on AMD machines. */
2124 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2126 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2128 m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
2130 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2134 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2135 but one byte longer. */
2138 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2139 operand that cannot be represented using a modRM byte. The XOR
2140 replacement is long decoded, so this split helps here as well. */
2143 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2145 m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
2147 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2148 from integer to FP. */
2151 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2152 with a subsequent conditional jump instruction into a single
2153 compare-and-branch uop. */
2156 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2157 will impact LEA instruction selection. */
2160 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2164 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2165 at -O3. For the moment, the prefetching seems badly tuned for Intel
2167 m_K6_GEODE | m_AMD_MULTIPLE,
2169 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2170 the auto-vectorizer. */
2174 /* Feature tests against the various architecture variations. */
2175 unsigned char ix86_arch_features[X86_ARCH_LAST];
2177 /* Feature tests against the various architecture variations, used to create
2178 ix86_arch_features based on the processor mask. */
2179 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2180 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2181 ~(m_386 | m_486 | m_PENT | m_K6),
2183 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2186 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2189 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2192 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2196 static const unsigned int x86_accumulate_outgoing_args
2197 = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
2199 static const unsigned int x86_arch_always_fancy_math_387
2200 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
2202 static const unsigned int x86_avx256_split_unaligned_load
2203 = m_COREI7 | m_GENERIC;
2205 static const unsigned int x86_avx256_split_unaligned_store
2206 = m_COREI7 | m_BDVER | m_GENERIC;
2208 /* In case the average insn count for single function invocation is
2209 lower than this constant, emit fast (but longer) prologue and
2211 #define FAST_PROLOGUE_INSN_COUNT 20
2213 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2214 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2215 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2216 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2218 /* Array of the smallest class containing reg number REGNO, indexed by
2219 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2221 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2223 /* ax, dx, cx, bx */
2224 AREG, DREG, CREG, BREG,
2225 /* si, di, bp, sp */
2226 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2228 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2229 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2232 /* flags, fpsr, fpcr, frame */
2233 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2235 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2238 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2241 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2242 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2243 /* SSE REX registers */
2244 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2248 /* The "default" register map used in 32bit mode. */
2250 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2252 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2253 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2254 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2255 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2256 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2257 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2258 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2261 /* The "default" register map used in 64bit mode. */
2263 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2265 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2266 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2267 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2268 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2269 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2270 8,9,10,11,12,13,14,15, /* extended integer registers */
2271 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2274 /* Define the register numbers to be used in Dwarf debugging information.
2275 The SVR4 reference port C compiler uses the following register numbers
2276 in its Dwarf output code:
2277 0 for %eax (gcc regno = 0)
2278 1 for %ecx (gcc regno = 2)
2279 2 for %edx (gcc regno = 1)
2280 3 for %ebx (gcc regno = 3)
2281 4 for %esp (gcc regno = 7)
2282 5 for %ebp (gcc regno = 6)
2283 6 for %esi (gcc regno = 4)
2284 7 for %edi (gcc regno = 5)
2285 The following three DWARF register numbers are never generated by
2286 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2287 believes these numbers have these meanings.
2288 8 for %eip (no gcc equivalent)
2289 9 for %eflags (gcc regno = 17)
2290 10 for %trapno (no gcc equivalent)
2291 It is not at all clear how we should number the FP stack registers
2292 for the x86 architecture. If the version of SDB on x86/svr4 were
2293 a bit less brain dead with respect to floating-point then we would
2294 have a precedent to follow with respect to DWARF register numbers
2295 for x86 FP registers, but the SDB on x86/svr4 is so completely
2296 broken with respect to FP registers that it is hardly worth thinking
2297 of it as something to strive for compatibility with.
2298 The version of x86/svr4 SDB I have at the moment does (partially)
2299 seem to believe that DWARF register number 11 is associated with
2300 the x86 register %st(0), but that's about all. Higher DWARF
2301 register numbers don't seem to be associated with anything in
2302 particular, and even for DWARF regno 11, SDB only seems to under-
2303 stand that it should say that a variable lives in %st(0) (when
2304 asked via an `=' command) if we said it was in DWARF regno 11,
2305 but SDB still prints garbage when asked for the value of the
2306 variable in question (via a `/' command).
2307 (Also note that the labels SDB prints for various FP stack regs
2308 when doing an `x' command are all wrong.)
2309 Note that these problems generally don't affect the native SVR4
2310 C compiler because it doesn't allow the use of -O with -g and
2311 because when it is *not* optimizing, it allocates a memory
2312 location for each floating-point variable, and the memory
2313 location is what gets described in the DWARF AT_location
2314 attribute for the variable in question.
2315 Regardless of the severe mental illness of the x86/svr4 SDB, we
2316 do something sensible here and we use the following DWARF
2317 register numbers. Note that these are all stack-top-relative
2319 11 for %st(0) (gcc regno = 8)
2320 12 for %st(1) (gcc regno = 9)
2321 13 for %st(2) (gcc regno = 10)
2322 14 for %st(3) (gcc regno = 11)
2323 15 for %st(4) (gcc regno = 12)
2324 16 for %st(5) (gcc regno = 13)
2325 17 for %st(6) (gcc regno = 14)
2326 18 for %st(7) (gcc regno = 15)
2328 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2330 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2331 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2332 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2333 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2334 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2335 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2336 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2339 /* Define parameter passing and return registers. */
2341 static int const x86_64_int_parameter_registers[6] =
2343 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2346 static int const x86_64_ms_abi_int_parameter_registers[4] =
2348 CX_REG, DX_REG, R8_REG, R9_REG
2351 static int const x86_64_int_return_registers[4] =
2353 AX_REG, DX_REG, DI_REG, SI_REG
2356 /* Define the structure for the machine field in struct function. */
2358 struct GTY(()) stack_local_entry {
2359 unsigned short mode;
2362 struct stack_local_entry *next;
2365 /* Structure describing stack frame layout.
2366 Stack grows downward:
2372 saved static chain if ix86_static_chain_on_stack
2374 saved frame pointer if frame_pointer_needed
2375 <- HARD_FRAME_POINTER
2381 <- sse_regs_save_offset
2384 [va_arg registers] |
2388 [padding2] | = to_allocate
2397 int outgoing_arguments_size;
2398 HOST_WIDE_INT frame;
2400 /* The offsets relative to ARG_POINTER. */
2401 HOST_WIDE_INT frame_pointer_offset;
2402 HOST_WIDE_INT hard_frame_pointer_offset;
2403 HOST_WIDE_INT stack_pointer_offset;
2404 HOST_WIDE_INT hfp_save_offset;
2405 HOST_WIDE_INT reg_save_offset;
2406 HOST_WIDE_INT sse_reg_save_offset;
2408 /* When save_regs_using_mov is set, emit prologue using
2409 move instead of push instructions. */
2410 bool save_regs_using_mov;
2413 /* Which cpu are we scheduling for. */
2414 enum attr_cpu ix86_schedule;
2416 /* Which cpu are we optimizing for. */
2417 enum processor_type ix86_tune;
2419 /* Which instruction set architecture to use. */
2420 enum processor_type ix86_arch;
2422 /* true if sse prefetch instruction is not NOOP. */
2423 int x86_prefetch_sse;
2425 /* -mstackrealign option */
2426 static const char ix86_force_align_arg_pointer_string[]
2427 = "force_align_arg_pointer";
2429 static rtx (*ix86_gen_leave) (void);
2430 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2431 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2432 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2433 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2434 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2435 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2436 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2437 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2438 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2440 /* Preferred alignment for stack boundary in bits. */
2441 unsigned int ix86_preferred_stack_boundary;
2443 /* Alignment for incoming stack boundary in bits specified at
2445 static unsigned int ix86_user_incoming_stack_boundary;
2447 /* Default alignment for incoming stack boundary in bits. */
2448 static unsigned int ix86_default_incoming_stack_boundary;
2450 /* Alignment for incoming stack boundary in bits. */
2451 unsigned int ix86_incoming_stack_boundary;
2453 /* Calling abi specific va_list type nodes. */
2454 static GTY(()) tree sysv_va_list_type_node;
2455 static GTY(()) tree ms_va_list_type_node;
2457 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2458 char internal_label_prefix[16];
2459 int internal_label_prefix_len;
2461 /* Fence to use after loop using movnt. */
2464 /* Register class used for passing given 64bit part of the argument.
2465 These represent classes as documented by the PS ABI, with the exception
2466 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2467 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2469 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2470 whenever possible (upper half does contain padding). */
2471 enum x86_64_reg_class
2474 X86_64_INTEGER_CLASS,
2475 X86_64_INTEGERSI_CLASS,
2482 X86_64_COMPLEX_X87_CLASS,
2486 #define MAX_CLASSES 4
2488 /* Table of constants used by fldpi, fldln2, etc.... */
2489 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2490 static bool ext_80387_constants_init = 0;
2493 static struct machine_function * ix86_init_machine_status (void);
2494 static rtx ix86_function_value (const_tree, const_tree, bool);
2495 static bool ix86_function_value_regno_p (const unsigned int);
2496 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2498 static rtx ix86_static_chain (const_tree, bool);
2499 static int ix86_function_regparm (const_tree, const_tree);
2500 static void ix86_compute_frame_layout (struct ix86_frame *);
2501 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2503 static void ix86_add_new_builtins (int);
2504 static rtx ix86_expand_vec_perm_builtin (tree);
2505 static tree ix86_canonical_va_list_type (tree);
2506 static void predict_jump (int);
2507 static unsigned int split_stack_prologue_scratch_regno (void);
2508 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2510 enum ix86_function_specific_strings
2512 IX86_FUNCTION_SPECIFIC_ARCH,
2513 IX86_FUNCTION_SPECIFIC_TUNE,
2514 IX86_FUNCTION_SPECIFIC_MAX
2517 static char *ix86_target_string (int, int, const char *, const char *,
2518 enum fpmath_unit, bool);
2519 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2520 static void ix86_function_specific_save (struct cl_target_option *);
2521 static void ix86_function_specific_restore (struct cl_target_option *);
2522 static void ix86_function_specific_print (FILE *, int,
2523 struct cl_target_option *);
2524 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2525 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2526 struct gcc_options *);
2527 static bool ix86_can_inline_p (tree, tree);
2528 static void ix86_set_current_function (tree);
2529 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2531 static enum calling_abi ix86_function_abi (const_tree);
2534 #ifndef SUBTARGET32_DEFAULT_CPU
2535 #define SUBTARGET32_DEFAULT_CPU "i386"
2538 /* The svr4 ABI for the i386 says that records and unions are returned
2540 #ifndef DEFAULT_PCC_STRUCT_RETURN
2541 #define DEFAULT_PCC_STRUCT_RETURN 1
2544 /* Whether -mtune= or -march= were specified */
2545 static int ix86_tune_defaulted;
2546 static int ix86_arch_specified;
2548 /* Vectorization library interface and handlers. */
2549 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2551 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2552 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2554 /* Processor target table, indexed by processor number */
2557 const struct processor_costs *cost; /* Processor costs */
2558 const int align_loop; /* Default alignments. */
2559 const int align_loop_max_skip;
2560 const int align_jump;
2561 const int align_jump_max_skip;
2562 const int align_func;
2565 static const struct ptt processor_target_table[PROCESSOR_max] =
2567 {&i386_cost, 4, 3, 4, 3, 4},
2568 {&i486_cost, 16, 15, 16, 15, 16},
2569 {&pentium_cost, 16, 7, 16, 7, 16},
2570 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2571 {&geode_cost, 0, 0, 0, 0, 0},
2572 {&k6_cost, 32, 7, 32, 7, 32},
2573 {&athlon_cost, 16, 7, 16, 7, 16},
2574 {&pentium4_cost, 0, 0, 0, 0, 0},
2575 {&k8_cost, 16, 7, 16, 7, 16},
2576 {&nocona_cost, 0, 0, 0, 0, 0},
2577 /* Core 2 32-bit. */
2578 {&generic32_cost, 16, 10, 16, 10, 16},
2579 /* Core 2 64-bit. */
2580 {&generic64_cost, 16, 10, 16, 10, 16},
2581 /* Core i7 32-bit. */
2582 {&generic32_cost, 16, 10, 16, 10, 16},
2583 /* Core i7 64-bit. */
2584 {&generic64_cost, 16, 10, 16, 10, 16},
2585 {&generic32_cost, 16, 7, 16, 7, 16},
2586 {&generic64_cost, 16, 10, 16, 10, 16},
2587 {&amdfam10_cost, 32, 24, 32, 7, 32},
2588 {&bdver1_cost, 32, 24, 32, 7, 32},
2589 {&bdver2_cost, 32, 24, 32, 7, 32},
2590 {&btver1_cost, 32, 24, 32, 7, 32},
2591 {&atom_cost, 16, 7, 16, 7, 16}
2594 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2624 /* Return true if a red-zone is in use. */
2627 ix86_using_red_zone (void)
2629 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2632 /* Return a string that documents the current -m options. The caller is
2633 responsible for freeing the string. */
2636 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2637 enum fpmath_unit fpmath, bool add_nl_p)
2639 struct ix86_target_opts
2641 const char *option; /* option string */
2642 int mask; /* isa mask options */
2645 /* This table is ordered so that options like -msse4.2 that imply
2646 preceding options while match those first. */
2647 static struct ix86_target_opts isa_opts[] =
2649 { "-m64", OPTION_MASK_ISA_64BIT },
2650 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2651 { "-mfma", OPTION_MASK_ISA_FMA },
2652 { "-mxop", OPTION_MASK_ISA_XOP },
2653 { "-mlwp", OPTION_MASK_ISA_LWP },
2654 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2655 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2656 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2657 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2658 { "-msse3", OPTION_MASK_ISA_SSE3 },
2659 { "-msse2", OPTION_MASK_ISA_SSE2 },
2660 { "-msse", OPTION_MASK_ISA_SSE },
2661 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2662 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2663 { "-mmmx", OPTION_MASK_ISA_MMX },
2664 { "-mabm", OPTION_MASK_ISA_ABM },
2665 { "-mbmi", OPTION_MASK_ISA_BMI },
2666 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2667 { "-mtbm", OPTION_MASK_ISA_TBM },
2668 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2669 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2670 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2671 { "-maes", OPTION_MASK_ISA_AES },
2672 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2673 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2674 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2675 { "-mf16c", OPTION_MASK_ISA_F16C },
2679 static struct ix86_target_opts flag_opts[] =
2681 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2682 { "-m80387", MASK_80387 },
2683 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2684 { "-malign-double", MASK_ALIGN_DOUBLE },
2685 { "-mcld", MASK_CLD },
2686 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2687 { "-mieee-fp", MASK_IEEE_FP },
2688 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2689 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2690 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2691 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2692 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2693 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2694 { "-mno-red-zone", MASK_NO_RED_ZONE },
2695 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2696 { "-mrecip", MASK_RECIP },
2697 { "-mrtd", MASK_RTD },
2698 { "-msseregparm", MASK_SSEREGPARM },
2699 { "-mstack-arg-probe", MASK_STACK_PROBE },
2700 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2701 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2702 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2703 { "-mvzeroupper", MASK_VZEROUPPER },
2704 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2705 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2706 { "-mprefer-avx128", MASK_PREFER_AVX128},
2709 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2712 char target_other[40];
2721 memset (opts, '\0', sizeof (opts));
2723 /* Add -march= option. */
2726 opts[num][0] = "-march=";
2727 opts[num++][1] = arch;
2730 /* Add -mtune= option. */
2733 opts[num][0] = "-mtune=";
2734 opts[num++][1] = tune;
2737 /* Pick out the options in isa options. */
2738 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2740 if ((isa & isa_opts[i].mask) != 0)
2742 opts[num++][0] = isa_opts[i].option;
2743 isa &= ~ isa_opts[i].mask;
2747 if (isa && add_nl_p)
2749 opts[num++][0] = isa_other;
2750 sprintf (isa_other, "(other isa: %#x)", isa);
2753 /* Add flag options. */
2754 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2756 if ((flags & flag_opts[i].mask) != 0)
2758 opts[num++][0] = flag_opts[i].option;
2759 flags &= ~ flag_opts[i].mask;
2763 if (flags && add_nl_p)
2765 opts[num++][0] = target_other;
2766 sprintf (target_other, "(other flags: %#x)", flags);
2769 /* Add -fpmath= option. */
2772 opts[num][0] = "-mfpmath=";
2773 switch ((int) fpmath)
2776 opts[num++][1] = "387";
2780 opts[num++][1] = "sse";
2783 case FPMATH_387 | FPMATH_SSE:
2784 opts[num++][1] = "sse+387";
2796 gcc_assert (num < ARRAY_SIZE (opts));
2798 /* Size the string. */
2800 sep_len = (add_nl_p) ? 3 : 1;
2801 for (i = 0; i < num; i++)
2804 for (j = 0; j < 2; j++)
2806 len += strlen (opts[i][j]);
2809 /* Build the string. */
2810 ret = ptr = (char *) xmalloc (len);
2813 for (i = 0; i < num; i++)
2817 for (j = 0; j < 2; j++)
2818 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2825 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2833 for (j = 0; j < 2; j++)
2836 memcpy (ptr, opts[i][j], len2[j]);
2838 line_len += len2[j];
2843 gcc_assert (ret + len >= ptr);
2848 /* Return true, if profiling code should be emitted before
2849 prologue. Otherwise it returns false.
2850 Note: For x86 with "hotfix" it is sorried. */
2852 ix86_profile_before_prologue (void)
2854 return flag_fentry != 0;
2857 /* Function that is callable from the debugger to print the current
2860 ix86_debug_options (void)
2862 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2863 ix86_arch_string, ix86_tune_string,
2868 fprintf (stderr, "%s\n\n", opts);
2872 fputs ("<no options>\n\n", stderr);
2877 /* Override various settings based on options. If MAIN_ARGS_P, the
2878 options are from the command line, otherwise they are from
2882 ix86_option_override_internal (bool main_args_p)
2885 unsigned int ix86_arch_mask, ix86_tune_mask;
2886 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2897 PTA_PREFETCH_SSE = 1 << 4,
2899 PTA_3DNOW_A = 1 << 6,
2903 PTA_POPCNT = 1 << 10,
2905 PTA_SSE4A = 1 << 12,
2906 PTA_NO_SAHF = 1 << 13,
2907 PTA_SSE4_1 = 1 << 14,
2908 PTA_SSE4_2 = 1 << 15,
2910 PTA_PCLMUL = 1 << 17,
2913 PTA_MOVBE = 1 << 20,
2917 PTA_FSGSBASE = 1 << 24,
2918 PTA_RDRND = 1 << 25,
2923 /* if this reaches 32, need to widen struct pta flags below */
2928 const char *const name; /* processor name or nickname. */
2929 const enum processor_type processor;
2930 const enum attr_cpu schedule;
2931 const unsigned /*enum pta_flags*/ flags;
2933 const processor_alias_table[] =
2935 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2936 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2937 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2938 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2939 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2940 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2941 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2942 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2943 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2944 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2945 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2946 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2947 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2949 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2951 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2952 PTA_MMX | PTA_SSE | PTA_SSE2},
2953 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2954 PTA_MMX |PTA_SSE | PTA_SSE2},
2955 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2956 PTA_MMX | PTA_SSE | PTA_SSE2},
2957 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2958 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2959 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2960 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2961 | PTA_CX16 | PTA_NO_SAHF},
2962 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2963 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2964 | PTA_SSSE3 | PTA_CX16},
2965 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2966 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2967 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
2968 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2969 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2970 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2971 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2972 {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
2973 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2974 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2975 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
2976 | PTA_RDRND | PTA_F16C},
2977 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2978 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2979 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2980 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2981 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2982 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2983 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2984 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2985 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2986 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2987 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2988 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2989 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2990 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2991 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2992 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2993 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2994 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2995 {"x86-64", PROCESSOR_K8, CPU_K8,
2996 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2997 {"k8", PROCESSOR_K8, CPU_K8,
2998 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2999 | PTA_SSE2 | PTA_NO_SAHF},
3000 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3001 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3002 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3003 {"opteron", PROCESSOR_K8, CPU_K8,
3004 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3005 | PTA_SSE2 | PTA_NO_SAHF},
3006 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3007 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3008 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3009 {"athlon64", PROCESSOR_K8, CPU_K8,
3010 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3011 | PTA_SSE2 | PTA_NO_SAHF},
3012 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3013 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3014 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3015 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3016 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3017 | PTA_SSE2 | PTA_NO_SAHF},
3018 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3019 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3020 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3021 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3022 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3023 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3024 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3025 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3026 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3027 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3028 | PTA_XOP | PTA_LWP},
3029 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3030 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3031 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3032 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3033 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3035 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3036 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3037 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3038 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3039 0 /* flags are only used for -march switch. */ },
3040 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3041 PTA_64BIT /* flags are only used for -march switch. */ },
3044 int const pta_size = ARRAY_SIZE (processor_alias_table);
3046 /* Set up prefix/suffix so the error messages refer to either the command
3047 line argument, or the attribute(target). */
3056 prefix = "option(\"";
3061 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3062 SUBTARGET_OVERRIDE_OPTIONS;
3065 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3066 SUBSUBTARGET_OVERRIDE_OPTIONS;
3070 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3072 /* -fPIC is the default for x86_64. */
3073 if (TARGET_MACHO && TARGET_64BIT)
3076 /* Need to check -mtune=generic first. */
3077 if (ix86_tune_string)
3079 if (!strcmp (ix86_tune_string, "generic")
3080 || !strcmp (ix86_tune_string, "i686")
3081 /* As special support for cross compilers we read -mtune=native
3082 as -mtune=generic. With native compilers we won't see the
3083 -mtune=native, as it was changed by the driver. */
3084 || !strcmp (ix86_tune_string, "native"))
3087 ix86_tune_string = "generic64";
3089 ix86_tune_string = "generic32";
3091 /* If this call is for setting the option attribute, allow the
3092 generic32/generic64 that was previously set. */
3093 else if (!main_args_p
3094 && (!strcmp (ix86_tune_string, "generic32")
3095 || !strcmp (ix86_tune_string, "generic64")))
3097 else if (!strncmp (ix86_tune_string, "generic", 7))
3098 error ("bad value (%s) for %stune=%s %s",
3099 ix86_tune_string, prefix, suffix, sw);
3100 else if (!strcmp (ix86_tune_string, "x86-64"))
3101 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3102 "%stune=k8%s or %stune=generic%s instead as appropriate",
3103 prefix, suffix, prefix, suffix, prefix, suffix);
3107 if (ix86_arch_string)
3108 ix86_tune_string = ix86_arch_string;
3109 if (!ix86_tune_string)
3111 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3112 ix86_tune_defaulted = 1;
3115 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3116 need to use a sensible tune option. */
3117 if (!strcmp (ix86_tune_string, "generic")
3118 || !strcmp (ix86_tune_string, "x86-64")
3119 || !strcmp (ix86_tune_string, "i686"))
3122 ix86_tune_string = "generic64";
3124 ix86_tune_string = "generic32";
3128 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3130 /* rep; movq isn't available in 32-bit code. */
3131 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3132 ix86_stringop_alg = no_stringop;
3135 if (!ix86_arch_string)
3136 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3138 ix86_arch_specified = 1;
3140 if (!global_options_set.x_ix86_abi)
3141 ix86_abi = DEFAULT_ABI;
3143 if (global_options_set.x_ix86_cmodel)
3145 switch (ix86_cmodel)
3150 ix86_cmodel = CM_SMALL_PIC;
3152 error ("code model %qs not supported in the %s bit mode",
3159 ix86_cmodel = CM_MEDIUM_PIC;
3161 error ("code model %qs not supported in the %s bit mode",
3163 else if (TARGET_X32)
3164 error ("code model %qs not supported in x32 mode",
3171 ix86_cmodel = CM_LARGE_PIC;
3173 error ("code model %qs not supported in the %s bit mode",
3175 else if (TARGET_X32)
3176 error ("code model %qs not supported in x32 mode",
3182 error ("code model %s does not support PIC mode", "32");
3184 error ("code model %qs not supported in the %s bit mode",
3191 error ("code model %s does not support PIC mode", "kernel");
3192 ix86_cmodel = CM_32;
3195 error ("code model %qs not supported in the %s bit mode",
3205 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3206 use of rip-relative addressing. This eliminates fixups that
3207 would otherwise be needed if this object is to be placed in a
3208 DLL, and is essentially just as efficient as direct addressing. */
3209 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3210 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3211 else if (TARGET_64BIT)
3212 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3214 ix86_cmodel = CM_32;
3216 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3218 error ("-masm=intel not supported in this configuration");
3219 ix86_asm_dialect = ASM_ATT;
3221 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3222 sorry ("%i-bit mode not compiled in",
3223 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3225 for (i = 0; i < pta_size; i++)
3226 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3228 ix86_schedule = processor_alias_table[i].schedule;
3229 ix86_arch = processor_alias_table[i].processor;
3230 /* Default cpu tuning to the architecture. */
3231 ix86_tune = ix86_arch;
3233 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3234 error ("CPU you selected does not support x86-64 "
3237 if (processor_alias_table[i].flags & PTA_MMX
3238 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3239 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3240 if (processor_alias_table[i].flags & PTA_3DNOW
3241 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3242 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3243 if (processor_alias_table[i].flags & PTA_3DNOW_A
3244 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3245 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3246 if (processor_alias_table[i].flags & PTA_SSE
3247 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3248 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3249 if (processor_alias_table[i].flags & PTA_SSE2
3250 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3251 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3252 if (processor_alias_table[i].flags & PTA_SSE3
3253 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3254 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3255 if (processor_alias_table[i].flags & PTA_SSSE3
3256 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3257 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3258 if (processor_alias_table[i].flags & PTA_SSE4_1
3259 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3260 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3261 if (processor_alias_table[i].flags & PTA_SSE4_2
3262 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3263 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3264 if (processor_alias_table[i].flags & PTA_AVX
3265 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3266 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3267 if (processor_alias_table[i].flags & PTA_FMA
3268 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3269 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3270 if (processor_alias_table[i].flags & PTA_SSE4A
3271 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3272 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3273 if (processor_alias_table[i].flags & PTA_FMA4
3274 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3275 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3276 if (processor_alias_table[i].flags & PTA_XOP
3277 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3278 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3279 if (processor_alias_table[i].flags & PTA_LWP
3280 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3281 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3282 if (processor_alias_table[i].flags & PTA_ABM
3283 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3284 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3285 if (processor_alias_table[i].flags & PTA_BMI
3286 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3287 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3288 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3289 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3290 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3291 if (processor_alias_table[i].flags & PTA_TBM
3292 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3293 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3294 if (processor_alias_table[i].flags & PTA_CX16
3295 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3296 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3297 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3298 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3299 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3300 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3301 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3302 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3303 if (processor_alias_table[i].flags & PTA_MOVBE
3304 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3305 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3306 if (processor_alias_table[i].flags & PTA_AES
3307 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3308 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3309 if (processor_alias_table[i].flags & PTA_PCLMUL
3310 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3311 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3312 if (processor_alias_table[i].flags & PTA_FSGSBASE
3313 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3314 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3315 if (processor_alias_table[i].flags & PTA_RDRND
3316 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3317 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3318 if (processor_alias_table[i].flags & PTA_F16C
3319 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3320 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3321 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3322 x86_prefetch_sse = true;
3327 if (!strcmp (ix86_arch_string, "generic"))
3328 error ("generic CPU can be used only for %stune=%s %s",
3329 prefix, suffix, sw);
3330 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3331 error ("bad value (%s) for %sarch=%s %s",
3332 ix86_arch_string, prefix, suffix, sw);
3334 ix86_arch_mask = 1u << ix86_arch;
3335 for (i = 0; i < X86_ARCH_LAST; ++i)
3336 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3338 for (i = 0; i < pta_size; i++)
3339 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3341 ix86_schedule = processor_alias_table[i].schedule;
3342 ix86_tune = processor_alias_table[i].processor;
3345 if (!(processor_alias_table[i].flags & PTA_64BIT))
3347 if (ix86_tune_defaulted)
3349 ix86_tune_string = "x86-64";
3350 for (i = 0; i < pta_size; i++)
3351 if (! strcmp (ix86_tune_string,
3352 processor_alias_table[i].name))
3354 ix86_schedule = processor_alias_table[i].schedule;
3355 ix86_tune = processor_alias_table[i].processor;
3358 error ("CPU you selected does not support x86-64 "
3364 /* Adjust tuning when compiling for 32-bit ABI. */
3367 case PROCESSOR_GENERIC64:
3368 ix86_tune = PROCESSOR_GENERIC32;
3369 ix86_schedule = CPU_PENTIUMPRO;
3372 case PROCESSOR_CORE2_64:
3373 ix86_tune = PROCESSOR_CORE2_32;
3376 case PROCESSOR_COREI7_64:
3377 ix86_tune = PROCESSOR_COREI7_32;
3384 /* Intel CPUs have always interpreted SSE prefetch instructions as
3385 NOPs; so, we can enable SSE prefetch instructions even when
3386 -mtune (rather than -march) points us to a processor that has them.
3387 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3388 higher processors. */
3390 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3391 x86_prefetch_sse = true;
3395 if (ix86_tune_specified && i == pta_size)
3396 error ("bad value (%s) for %stune=%s %s",
3397 ix86_tune_string, prefix, suffix, sw);
3399 ix86_tune_mask = 1u << ix86_tune;
3400 for (i = 0; i < X86_TUNE_LAST; ++i)
3401 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3403 #ifndef USE_IX86_FRAME_POINTER
3404 #define USE_IX86_FRAME_POINTER 0
3407 #ifndef USE_X86_64_FRAME_POINTER
3408 #define USE_X86_64_FRAME_POINTER 0
3411 /* Set the default values for switches whose default depends on TARGET_64BIT
3412 in case they weren't overwritten by command line options. */
3415 if (optimize > 1 && !global_options_set.x_flag_zee)
3417 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3418 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3419 if (flag_asynchronous_unwind_tables == 2)
3420 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3421 if (flag_pcc_struct_return == 2)
3422 flag_pcc_struct_return = 0;
3426 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3427 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3428 if (flag_asynchronous_unwind_tables == 2)
3429 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3430 if (flag_pcc_struct_return == 2)
3431 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3435 ix86_cost = &ix86_size_cost;
3437 ix86_cost = processor_target_table[ix86_tune].cost;
3439 /* Arrange to set up i386_stack_locals for all functions. */
3440 init_machine_status = ix86_init_machine_status;
3442 /* Validate -mregparm= value. */
3443 if (global_options_set.x_ix86_regparm)
3446 warning (0, "-mregparm is ignored in 64-bit mode");
3447 if (ix86_regparm > REGPARM_MAX)
3449 error ("-mregparm=%d is not between 0 and %d",
3450 ix86_regparm, REGPARM_MAX);
3455 ix86_regparm = REGPARM_MAX;
3457 /* Default align_* from the processor table. */
3458 if (align_loops == 0)
3460 align_loops = processor_target_table[ix86_tune].align_loop;
3461 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3463 if (align_jumps == 0)
3465 align_jumps = processor_target_table[ix86_tune].align_jump;
3466 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3468 if (align_functions == 0)
3470 align_functions = processor_target_table[ix86_tune].align_func;
3473 /* Provide default for -mbranch-cost= value. */
3474 if (!global_options_set.x_ix86_branch_cost)
3475 ix86_branch_cost = ix86_cost->branch_cost;
3479 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3481 /* Enable by default the SSE and MMX builtins. Do allow the user to
3482 explicitly disable any of these. In particular, disabling SSE and
3483 MMX for kernel code is extremely useful. */
3484 if (!ix86_arch_specified)
3486 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3487 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3490 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3494 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3496 if (!ix86_arch_specified)
3498 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3500 /* i386 ABI does not specify red zone. It still makes sense to use it
3501 when programmer takes care to stack from being destroyed. */
3502 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3503 target_flags |= MASK_NO_RED_ZONE;
3506 /* Keep nonleaf frame pointers. */
3507 if (flag_omit_frame_pointer)
3508 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3509 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3510 flag_omit_frame_pointer = 1;
3512 /* If we're doing fast math, we don't care about comparison order
3513 wrt NaNs. This lets us use a shorter comparison sequence. */
3514 if (flag_finite_math_only)
3515 target_flags &= ~MASK_IEEE_FP;
3517 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3518 since the insns won't need emulation. */
3519 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3520 target_flags &= ~MASK_NO_FANCY_MATH_387;
3522 /* Likewise, if the target doesn't have a 387, or we've specified
3523 software floating point, don't use 387 inline intrinsics. */
3525 target_flags |= MASK_NO_FANCY_MATH_387;
3527 /* Turn on MMX builtins for -msse. */
3530 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3531 x86_prefetch_sse = true;
3534 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3535 if (TARGET_SSE4_2 || TARGET_ABM)
3536 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3538 /* Turn on lzcnt instruction for -mabm. */
3540 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
3542 /* Validate -mpreferred-stack-boundary= value or default it to
3543 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3544 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3545 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3547 int min = (TARGET_64BIT ? 4 : 2);
3548 int max = (TARGET_SEH ? 4 : 12);
3550 if (ix86_preferred_stack_boundary_arg < min
3551 || ix86_preferred_stack_boundary_arg > max)
3554 error ("-mpreferred-stack-boundary is not supported "
3557 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3558 ix86_preferred_stack_boundary_arg, min, max);
3561 ix86_preferred_stack_boundary
3562 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3565 /* Set the default value for -mstackrealign. */
3566 if (ix86_force_align_arg_pointer == -1)
3567 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3569 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3571 /* Validate -mincoming-stack-boundary= value or default it to
3572 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3573 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3574 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3576 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3577 || ix86_incoming_stack_boundary_arg > 12)
3578 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3579 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3582 ix86_user_incoming_stack_boundary
3583 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3584 ix86_incoming_stack_boundary
3585 = ix86_user_incoming_stack_boundary;
3589 /* Accept -msseregparm only if at least SSE support is enabled. */
3590 if (TARGET_SSEREGPARM
3592 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3594 if (global_options_set.x_ix86_fpmath)
3596 if (ix86_fpmath & FPMATH_SSE)
3600 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3601 ix86_fpmath = FPMATH_387;
3603 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3605 warning (0, "387 instruction set disabled, using SSE arithmetics");
3606 ix86_fpmath = FPMATH_SSE;
3611 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3613 /* If the i387 is disabled, then do not return values in it. */
3615 target_flags &= ~MASK_FLOAT_RETURNS;
3617 /* Use external vectorized library in vectorizing intrinsics. */
3618 if (global_options_set.x_ix86_veclibabi_type)
3619 switch (ix86_veclibabi_type)
3621 case ix86_veclibabi_type_svml:
3622 ix86_veclib_handler = ix86_veclibabi_svml;
3625 case ix86_veclibabi_type_acml:
3626 ix86_veclib_handler = ix86_veclibabi_acml;
3633 if ((!USE_IX86_FRAME_POINTER
3634 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3635 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3637 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3639 /* ??? Unwind info is not correct around the CFG unless either a frame
3640 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3641 unwind info generation to be aware of the CFG and propagating states
3643 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3644 || flag_exceptions || flag_non_call_exceptions)
3645 && flag_omit_frame_pointer
3646 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3648 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3649 warning (0, "unwind tables currently require either a frame pointer "
3650 "or %saccumulate-outgoing-args%s for correctness",
3652 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3655 /* If stack probes are required, the space used for large function
3656 arguments on the stack must also be probed, so enable
3657 -maccumulate-outgoing-args so this happens in the prologue. */
3658 if (TARGET_STACK_PROBE
3659 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3661 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3662 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3663 "for correctness", prefix, suffix);
3664 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3667 /* For sane SSE instruction set generation we need fcomi instruction.
3668 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3669 expands to a sequence that includes conditional move. */
3670 if (TARGET_SSE || TARGET_RDRND)
3673 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3676 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3677 p = strchr (internal_label_prefix, 'X');
3678 internal_label_prefix_len = p - internal_label_prefix;
3682 /* When scheduling description is not available, disable scheduler pass
3683 so it won't slow down the compilation and make x87 code slower. */
3684 if (!TARGET_SCHEDULE)
3685 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3687 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3688 ix86_cost->simultaneous_prefetches,
3689 global_options.x_param_values,
3690 global_options_set.x_param_values);
3691 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3692 global_options.x_param_values,
3693 global_options_set.x_param_values);
3694 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3695 global_options.x_param_values,
3696 global_options_set.x_param_values);
3697 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3698 global_options.x_param_values,
3699 global_options_set.x_param_values);
3701 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3702 if (flag_prefetch_loop_arrays < 0
3705 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3706 flag_prefetch_loop_arrays = 1;
3708 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3709 can be optimized to ap = __builtin_next_arg (0). */
3710 if (!TARGET_64BIT && !flag_split_stack)
3711 targetm.expand_builtin_va_start = NULL;
3715 ix86_gen_leave = gen_leave_rex64;
3716 ix86_gen_add3 = gen_adddi3;
3717 ix86_gen_sub3 = gen_subdi3;
3718 ix86_gen_sub3_carry = gen_subdi3_carry;
3719 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3720 ix86_gen_monitor = gen_sse3_monitor64;
3721 ix86_gen_andsp = gen_anddi3;
3722 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3723 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3724 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3728 ix86_gen_leave = gen_leave;
3729 ix86_gen_add3 = gen_addsi3;
3730 ix86_gen_sub3 = gen_subsi3;
3731 ix86_gen_sub3_carry = gen_subsi3_carry;
3732 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3733 ix86_gen_monitor = gen_sse3_monitor;
3734 ix86_gen_andsp = gen_andsi3;
3735 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3736 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3737 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3741 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3743 target_flags |= MASK_CLD & ~target_flags_explicit;
3746 if (!TARGET_64BIT && flag_pic)
3748 if (flag_fentry > 0)
3749 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3753 else if (TARGET_SEH)
3755 if (flag_fentry == 0)
3756 sorry ("-mno-fentry isn%'t compatible with SEH");
3759 else if (flag_fentry < 0)
3761 #if defined(PROFILE_BEFORE_PROLOGUE)
3770 /* When not optimize for size, enable vzeroupper optimization for
3771 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3772 AVX unaligned load/store. */
3775 if (flag_expensive_optimizations
3776 && !(target_flags_explicit & MASK_VZEROUPPER))
3777 target_flags |= MASK_VZEROUPPER;
3778 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3779 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3780 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3781 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3782 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3783 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3784 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3785 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
3786 target_flags |= MASK_PREFER_AVX128;
3791 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3792 target_flags &= ~MASK_VZEROUPPER;
3795 /* Save the initial options in case the user does function specific
3798 target_option_default_node = target_option_current_node
3799 = build_target_option_node ();
3802 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3805 function_pass_avx256_p (const_rtx val)
3810 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3813 if (GET_CODE (val) == PARALLEL)
3818 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3820 r = XVECEXP (val, 0, i);
3821 if (GET_CODE (r) == EXPR_LIST
3823 && REG_P (XEXP (r, 0))
3824 && (GET_MODE (XEXP (r, 0)) == OImode
3825 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3833 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3836 ix86_option_override (void)
3838 ix86_option_override_internal (true);
3841 /* Update register usage after having seen the compiler flags. */
3844 ix86_conditional_register_usage (void)
3849 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3851 if (fixed_regs[i] > 1)
3852 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3853 if (call_used_regs[i] > 1)
3854 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3857 /* The PIC register, if it exists, is fixed. */
3858 j = PIC_OFFSET_TABLE_REGNUM;
3859 if (j != INVALID_REGNUM)
3860 fixed_regs[j] = call_used_regs[j] = 1;
3862 /* The 64-bit MS_ABI changes the set of call-used registers. */
3863 if (TARGET_64BIT_MS_ABI)
3865 call_used_regs[SI_REG] = 0;
3866 call_used_regs[DI_REG] = 0;
3867 call_used_regs[XMM6_REG] = 0;
3868 call_used_regs[XMM7_REG] = 0;
3869 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3870 call_used_regs[i] = 0;
3873 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3874 other call-clobbered regs for 64-bit. */
3877 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3879 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3880 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3881 && call_used_regs[i])
3882 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3885 /* If MMX is disabled, squash the registers. */
3887 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3888 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3889 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3891 /* If SSE is disabled, squash the registers. */
3893 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3894 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3895 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3897 /* If the FPU is disabled, squash the registers. */
3898 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3899 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3900 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3901 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3903 /* If 32-bit, squash the 64-bit registers. */
3906 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3908 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3914 /* Save the current options */
3917 ix86_function_specific_save (struct cl_target_option *ptr)
3919 ptr->arch = ix86_arch;
3920 ptr->schedule = ix86_schedule;
3921 ptr->tune = ix86_tune;
3922 ptr->branch_cost = ix86_branch_cost;
3923 ptr->tune_defaulted = ix86_tune_defaulted;
3924 ptr->arch_specified = ix86_arch_specified;
3925 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3926 ptr->ix86_target_flags_explicit = target_flags_explicit;
3928 /* The fields are char but the variables are not; make sure the
3929 values fit in the fields. */
3930 gcc_assert (ptr->arch == ix86_arch);
3931 gcc_assert (ptr->schedule == ix86_schedule);
3932 gcc_assert (ptr->tune == ix86_tune);
3933 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3936 /* Restore the current options */
3939 ix86_function_specific_restore (struct cl_target_option *ptr)
3941 enum processor_type old_tune = ix86_tune;
3942 enum processor_type old_arch = ix86_arch;
3943 unsigned int ix86_arch_mask, ix86_tune_mask;
3946 ix86_arch = (enum processor_type) ptr->arch;
3947 ix86_schedule = (enum attr_cpu) ptr->schedule;
3948 ix86_tune = (enum processor_type) ptr->tune;
3949 ix86_branch_cost = ptr->branch_cost;
3950 ix86_tune_defaulted = ptr->tune_defaulted;
3951 ix86_arch_specified = ptr->arch_specified;
3952 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
3953 target_flags_explicit = ptr->ix86_target_flags_explicit;
3955 /* Recreate the arch feature tests if the arch changed */
3956 if (old_arch != ix86_arch)
3958 ix86_arch_mask = 1u << ix86_arch;
3959 for (i = 0; i < X86_ARCH_LAST; ++i)
3960 ix86_arch_features[i]
3961 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3964 /* Recreate the tune optimization tests */
3965 if (old_tune != ix86_tune)
3967 ix86_tune_mask = 1u << ix86_tune;
3968 for (i = 0; i < X86_TUNE_LAST; ++i)
3969 ix86_tune_features[i]
3970 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3974 /* Print the current options */
3977 ix86_function_specific_print (FILE *file, int indent,
3978 struct cl_target_option *ptr)
3981 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
3982 NULL, NULL, ptr->x_ix86_fpmath, false);
3984 fprintf (file, "%*sarch = %d (%s)\n",
3987 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3988 ? cpu_names[ptr->arch]
3991 fprintf (file, "%*stune = %d (%s)\n",
3994 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3995 ? cpu_names[ptr->tune]
3998 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4002 fprintf (file, "%*s%s\n", indent, "", target_string);
4003 free (target_string);
4008 /* Inner function to process the attribute((target(...))), take an argument and
4009 set the current options from the argument. If we have a list, recursively go
4013 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4014 struct gcc_options *enum_opts_set)
4019 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4020 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4021 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4022 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4023 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4039 enum ix86_opt_type type;
4044 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4045 IX86_ATTR_ISA ("abm", OPT_mabm),
4046 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4047 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4048 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4049 IX86_ATTR_ISA ("aes", OPT_maes),
4050 IX86_ATTR_ISA ("avx", OPT_mavx),
4051 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4052 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4053 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4054 IX86_ATTR_ISA ("sse", OPT_msse),
4055 IX86_ATTR_ISA ("sse2", OPT_msse2),
4056 IX86_ATTR_ISA ("sse3", OPT_msse3),
4057 IX86_ATTR_ISA ("sse4", OPT_msse4),
4058 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4059 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4060 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4061 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4062 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4063 IX86_ATTR_ISA ("xop", OPT_mxop),
4064 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4065 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4066 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4067 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4070 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4072 /* string options */
4073 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4074 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4077 IX86_ATTR_YES ("cld",
4081 IX86_ATTR_NO ("fancy-math-387",
4082 OPT_mfancy_math_387,
4083 MASK_NO_FANCY_MATH_387),
4085 IX86_ATTR_YES ("ieee-fp",
4089 IX86_ATTR_YES ("inline-all-stringops",
4090 OPT_minline_all_stringops,
4091 MASK_INLINE_ALL_STRINGOPS),
4093 IX86_ATTR_YES ("inline-stringops-dynamically",
4094 OPT_minline_stringops_dynamically,
4095 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4097 IX86_ATTR_NO ("align-stringops",
4098 OPT_mno_align_stringops,
4099 MASK_NO_ALIGN_STRINGOPS),
4101 IX86_ATTR_YES ("recip",
4107 /* If this is a list, recurse to get the options. */
4108 if (TREE_CODE (args) == TREE_LIST)
4112 for (; args; args = TREE_CHAIN (args))
4113 if (TREE_VALUE (args)
4114 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4115 p_strings, enum_opts_set))
4121 else if (TREE_CODE (args) != STRING_CST)
4124 /* Handle multiple arguments separated by commas. */
4125 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4127 while (next_optstr && *next_optstr != '\0')
4129 char *p = next_optstr;
4131 char *comma = strchr (next_optstr, ',');
4132 const char *opt_string;
4133 size_t len, opt_len;
4138 enum ix86_opt_type type = ix86_opt_unknown;
4144 len = comma - next_optstr;
4145 next_optstr = comma + 1;
4153 /* Recognize no-xxx. */
4154 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4163 /* Find the option. */
4166 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4168 type = attrs[i].type;
4169 opt_len = attrs[i].len;
4170 if (ch == attrs[i].string[0]
4171 && ((type != ix86_opt_str && type != ix86_opt_enum)
4174 && memcmp (p, attrs[i].string, opt_len) == 0)
4177 mask = attrs[i].mask;
4178 opt_string = attrs[i].string;
4183 /* Process the option. */
4186 error ("attribute(target(\"%s\")) is unknown", orig_p);
4190 else if (type == ix86_opt_isa)
4192 struct cl_decoded_option decoded;
4194 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4195 ix86_handle_option (&global_options, &global_options_set,
4196 &decoded, input_location);
4199 else if (type == ix86_opt_yes || type == ix86_opt_no)
4201 if (type == ix86_opt_no)
4202 opt_set_p = !opt_set_p;
4205 target_flags |= mask;
4207 target_flags &= ~mask;
4210 else if (type == ix86_opt_str)
4214 error ("option(\"%s\") was already specified", opt_string);
4218 p_strings[opt] = xstrdup (p + opt_len);
4221 else if (type == ix86_opt_enum)
4226 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4228 set_option (&global_options, enum_opts_set, opt, value,
4229 p + opt_len, DK_UNSPECIFIED, input_location,
4233 error ("attribute(target(\"%s\")) is unknown", orig_p);
4245 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4248 ix86_valid_target_attribute_tree (tree args)
4250 const char *orig_arch_string = ix86_arch_string;
4251 const char *orig_tune_string = ix86_tune_string;
4252 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4253 int orig_tune_defaulted = ix86_tune_defaulted;
4254 int orig_arch_specified = ix86_arch_specified;
4255 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4258 struct cl_target_option *def
4259 = TREE_TARGET_OPTION (target_option_default_node);
4260 struct gcc_options enum_opts_set;
4262 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4264 /* Process each of the options on the chain. */
4265 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4269 /* If the changed options are different from the default, rerun
4270 ix86_option_override_internal, and then save the options away.
4271 The string options are are attribute options, and will be undone
4272 when we copy the save structure. */
4273 if (ix86_isa_flags != def->x_ix86_isa_flags
4274 || target_flags != def->x_target_flags
4275 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4276 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4277 || enum_opts_set.x_ix86_fpmath)
4279 /* If we are using the default tune= or arch=, undo the string assigned,
4280 and use the default. */
4281 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4282 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4283 else if (!orig_arch_specified)
4284 ix86_arch_string = NULL;
4286 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4287 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4288 else if (orig_tune_defaulted)
4289 ix86_tune_string = NULL;
4291 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4292 if (enum_opts_set.x_ix86_fpmath)
4293 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4294 else if (!TARGET_64BIT && TARGET_SSE)
4296 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4297 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4300 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4301 ix86_option_override_internal (false);
4303 /* Add any builtin functions with the new isa if any. */
4304 ix86_add_new_builtins (ix86_isa_flags);
4306 /* Save the current options unless we are validating options for
4308 t = build_target_option_node ();
4310 ix86_arch_string = orig_arch_string;
4311 ix86_tune_string = orig_tune_string;
4312 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4314 /* Free up memory allocated to hold the strings */
4315 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4316 free (option_strings[i]);
4322 /* Hook to validate attribute((target("string"))). */
4325 ix86_valid_target_attribute_p (tree fndecl,
4326 tree ARG_UNUSED (name),
4328 int ARG_UNUSED (flags))
4330 struct cl_target_option cur_target;
4332 tree old_optimize = build_optimization_node ();
4333 tree new_target, new_optimize;
4334 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4336 /* If the function changed the optimization levels as well as setting target
4337 options, start with the optimizations specified. */
4338 if (func_optimize && func_optimize != old_optimize)
4339 cl_optimization_restore (&global_options,
4340 TREE_OPTIMIZATION (func_optimize));
4342 /* The target attributes may also change some optimization flags, so update
4343 the optimization options if necessary. */
4344 cl_target_option_save (&cur_target, &global_options);
4345 new_target = ix86_valid_target_attribute_tree (args);
4346 new_optimize = build_optimization_node ();
4353 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4355 if (old_optimize != new_optimize)
4356 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4359 cl_target_option_restore (&global_options, &cur_target);
4361 if (old_optimize != new_optimize)
4362 cl_optimization_restore (&global_options,
4363 TREE_OPTIMIZATION (old_optimize));
4369 /* Hook to determine if one function can safely inline another. */
4372 ix86_can_inline_p (tree caller, tree callee)
4375 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4376 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4378 /* If callee has no option attributes, then it is ok to inline. */
4382 /* If caller has no option attributes, but callee does then it is not ok to
4384 else if (!caller_tree)
4389 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4390 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4392 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4393 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4395 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4396 != callee_opts->x_ix86_isa_flags)
4399 /* See if we have the same non-isa options. */
4400 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4403 /* See if arch, tune, etc. are the same. */
4404 else if (caller_opts->arch != callee_opts->arch)
4407 else if (caller_opts->tune != callee_opts->tune)
4410 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4413 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4424 /* Remember the last target of ix86_set_current_function. */
4425 static GTY(()) tree ix86_previous_fndecl;
4427 /* Establish appropriate back-end context for processing the function
4428 FNDECL. The argument might be NULL to indicate processing at top
4429 level, outside of any function scope. */
4431 ix86_set_current_function (tree fndecl)
4433 /* Only change the context if the function changes. This hook is called
4434 several times in the course of compiling a function, and we don't want to
4435 slow things down too much or call target_reinit when it isn't safe. */
4436 if (fndecl && fndecl != ix86_previous_fndecl)
4438 tree old_tree = (ix86_previous_fndecl
4439 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4442 tree new_tree = (fndecl
4443 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4446 ix86_previous_fndecl = fndecl;
4447 if (old_tree == new_tree)
4452 cl_target_option_restore (&global_options,
4453 TREE_TARGET_OPTION (new_tree));
4459 struct cl_target_option *def
4460 = TREE_TARGET_OPTION (target_option_current_node);
4462 cl_target_option_restore (&global_options, def);
4469 /* Return true if this goes in large data/bss. */
4472 ix86_in_large_data_p (tree exp)
4474 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4477 /* Functions are never large data. */
4478 if (TREE_CODE (exp) == FUNCTION_DECL)
4481 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4483 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4484 if (strcmp (section, ".ldata") == 0
4485 || strcmp (section, ".lbss") == 0)
4491 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4493 /* If this is an incomplete type with size 0, then we can't put it
4494 in data because it might be too big when completed. */
4495 if (!size || size > ix86_section_threshold)
4502 /* Switch to the appropriate section for output of DECL.
4503 DECL is either a `VAR_DECL' node or a constant of some sort.
4504 RELOC indicates whether forming the initial value of DECL requires
4505 link-time relocations. */
4507 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4511 x86_64_elf_select_section (tree decl, int reloc,
4512 unsigned HOST_WIDE_INT align)
4514 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4515 && ix86_in_large_data_p (decl))
4517 const char *sname = NULL;
4518 unsigned int flags = SECTION_WRITE;
4519 switch (categorize_decl_for_section (decl, reloc))
4524 case SECCAT_DATA_REL:
4525 sname = ".ldata.rel";
4527 case SECCAT_DATA_REL_LOCAL:
4528 sname = ".ldata.rel.local";
4530 case SECCAT_DATA_REL_RO:
4531 sname = ".ldata.rel.ro";
4533 case SECCAT_DATA_REL_RO_LOCAL:
4534 sname = ".ldata.rel.ro.local";
4538 flags |= SECTION_BSS;
4541 case SECCAT_RODATA_MERGE_STR:
4542 case SECCAT_RODATA_MERGE_STR_INIT:
4543 case SECCAT_RODATA_MERGE_CONST:
4547 case SECCAT_SRODATA:
4554 /* We don't split these for medium model. Place them into
4555 default sections and hope for best. */
4560 /* We might get called with string constants, but get_named_section
4561 doesn't like them as they are not DECLs. Also, we need to set
4562 flags in that case. */
4564 return get_section (sname, flags, NULL);
4565 return get_named_section (decl, sname, reloc);
4568 return default_elf_select_section (decl, reloc, align);
4571 /* Build up a unique section name, expressed as a
4572 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4573 RELOC indicates whether the initial value of EXP requires
4574 link-time relocations. */
4576 static void ATTRIBUTE_UNUSED
4577 x86_64_elf_unique_section (tree decl, int reloc)
4579 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4580 && ix86_in_large_data_p (decl))
4582 const char *prefix = NULL;
4583 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4584 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4586 switch (categorize_decl_for_section (decl, reloc))
4589 case SECCAT_DATA_REL:
4590 case SECCAT_DATA_REL_LOCAL:
4591 case SECCAT_DATA_REL_RO:
4592 case SECCAT_DATA_REL_RO_LOCAL:
4593 prefix = one_only ? ".ld" : ".ldata";
4596 prefix = one_only ? ".lb" : ".lbss";
4599 case SECCAT_RODATA_MERGE_STR:
4600 case SECCAT_RODATA_MERGE_STR_INIT:
4601 case SECCAT_RODATA_MERGE_CONST:
4602 prefix = one_only ? ".lr" : ".lrodata";
4604 case SECCAT_SRODATA:
4611 /* We don't split these for medium model. Place them into
4612 default sections and hope for best. */
4617 const char *name, *linkonce;
4620 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4621 name = targetm.strip_name_encoding (name);
4623 /* If we're using one_only, then there needs to be a .gnu.linkonce
4624 prefix to the section name. */
4625 linkonce = one_only ? ".gnu.linkonce" : "";
4627 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4629 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4633 default_unique_section (decl, reloc);
4636 #ifdef COMMON_ASM_OP
4637 /* This says how to output assembler code to declare an
4638 uninitialized external linkage data object.
4640 For medium model x86-64 we need to use .largecomm opcode for
4643 x86_elf_aligned_common (FILE *file,
4644 const char *name, unsigned HOST_WIDE_INT size,
4647 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4648 && size > (unsigned int)ix86_section_threshold)
4649 fputs (".largecomm\t", file);
4651 fputs (COMMON_ASM_OP, file);
4652 assemble_name (file, name);
4653 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4654 size, align / BITS_PER_UNIT);
4658 /* Utility function for targets to use in implementing
4659 ASM_OUTPUT_ALIGNED_BSS. */
4662 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4663 const char *name, unsigned HOST_WIDE_INT size,
4666 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4667 && size > (unsigned int)ix86_section_threshold)
4668 switch_to_section (get_named_section (decl, ".lbss", 0));
4670 switch_to_section (bss_section);
4671 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4672 #ifdef ASM_DECLARE_OBJECT_NAME
4673 last_assemble_variable_decl = decl;
4674 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4676 /* Standard thing is just output label for the object. */
4677 ASM_OUTPUT_LABEL (file, name);
4678 #endif /* ASM_DECLARE_OBJECT_NAME */
4679 ASM_OUTPUT_SKIP (file, size ? size : 1);
4682 /* Decide whether we must probe the stack before any space allocation
4683 on this target. It's essentially TARGET_STACK_PROBE except when
4684 -fstack-check causes the stack to be already probed differently. */
4687 ix86_target_stack_probe (void)
4689 /* Do not probe the stack twice if static stack checking is enabled. */
4690 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4693 return TARGET_STACK_PROBE;
4696 /* Decide whether we can make a sibling call to a function. DECL is the
4697 declaration of the function being targeted by the call and EXP is the
4698 CALL_EXPR representing the call. */
4701 ix86_function_ok_for_sibcall (tree decl, tree exp)
4703 tree type, decl_or_type;
4706 /* If we are generating position-independent code, we cannot sibcall
4707 optimize any indirect call, or a direct call to a global function,
4708 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4712 && (!decl || !targetm.binds_local_p (decl)))
4715 /* If we need to align the outgoing stack, then sibcalling would
4716 unalign the stack, which may break the called function. */
4717 if (ix86_minimum_incoming_stack_boundary (true)
4718 < PREFERRED_STACK_BOUNDARY)
4723 decl_or_type = decl;
4724 type = TREE_TYPE (decl);
4728 /* We're looking at the CALL_EXPR, we need the type of the function. */
4729 type = CALL_EXPR_FN (exp); /* pointer expression */
4730 type = TREE_TYPE (type); /* pointer type */
4731 type = TREE_TYPE (type); /* function type */
4732 decl_or_type = type;
4735 /* Check that the return value locations are the same. Like
4736 if we are returning floats on the 80387 register stack, we cannot
4737 make a sibcall from a function that doesn't return a float to a
4738 function that does or, conversely, from a function that does return
4739 a float to a function that doesn't; the necessary stack adjustment
4740 would not be executed. This is also the place we notice
4741 differences in the return value ABI. Note that it is ok for one
4742 of the functions to have void return type as long as the return
4743 value of the other is passed in a register. */
4744 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4745 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4747 if (STACK_REG_P (a) || STACK_REG_P (b))
4749 if (!rtx_equal_p (a, b))
4752 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4754 /* Disable sibcall if we need to generate vzeroupper after
4756 if (TARGET_VZEROUPPER
4757 && cfun->machine->callee_return_avx256_p
4758 && !cfun->machine->caller_return_avx256_p)
4761 else if (!rtx_equal_p (a, b))
4766 /* The SYSV ABI has more call-clobbered registers;
4767 disallow sibcalls from MS to SYSV. */
4768 if (cfun->machine->call_abi == MS_ABI
4769 && ix86_function_type_abi (type) == SYSV_ABI)
4774 /* If this call is indirect, we'll need to be able to use a
4775 call-clobbered register for the address of the target function.
4776 Make sure that all such registers are not used for passing
4777 parameters. Note that DLLIMPORT functions are indirect. */
4779 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4781 if (ix86_function_regparm (type, NULL) >= 3)
4783 /* ??? Need to count the actual number of registers to be used,
4784 not the possible number of registers. Fix later. */
4790 /* Otherwise okay. That also includes certain types of indirect calls. */
4794 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4795 and "sseregparm" calling convention attributes;
4796 arguments as in struct attribute_spec.handler. */
4799 ix86_handle_cconv_attribute (tree *node, tree name,
4801 int flags ATTRIBUTE_UNUSED,
4804 if (TREE_CODE (*node) != FUNCTION_TYPE
4805 && TREE_CODE (*node) != METHOD_TYPE
4806 && TREE_CODE (*node) != FIELD_DECL
4807 && TREE_CODE (*node) != TYPE_DECL)
4809 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4811 *no_add_attrs = true;
4815 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4816 if (is_attribute_p ("regparm", name))
4820 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4822 error ("fastcall and regparm attributes are not compatible");
4825 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4827 error ("regparam and thiscall attributes are not compatible");
4830 cst = TREE_VALUE (args);
4831 if (TREE_CODE (cst) != INTEGER_CST)
4833 warning (OPT_Wattributes,
4834 "%qE attribute requires an integer constant argument",
4836 *no_add_attrs = true;
4838 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4840 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4842 *no_add_attrs = true;
4850 /* Do not warn when emulating the MS ABI. */
4851 if ((TREE_CODE (*node) != FUNCTION_TYPE
4852 && TREE_CODE (*node) != METHOD_TYPE)
4853 || ix86_function_type_abi (*node) != MS_ABI)
4854 warning (OPT_Wattributes, "%qE attribute ignored",
4856 *no_add_attrs = true;
4860 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4861 if (is_attribute_p ("fastcall", name))
4863 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4865 error ("fastcall and cdecl attributes are not compatible");
4867 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4869 error ("fastcall and stdcall attributes are not compatible");
4871 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4873 error ("fastcall and regparm attributes are not compatible");
4875 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4877 error ("fastcall and thiscall attributes are not compatible");
4881 /* Can combine stdcall with fastcall (redundant), regparm and
4883 else if (is_attribute_p ("stdcall", name))
4885 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4887 error ("stdcall and cdecl attributes are not compatible");
4889 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4891 error ("stdcall and fastcall attributes are not compatible");
4893 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4895 error ("stdcall and thiscall attributes are not compatible");
4899 /* Can combine cdecl with regparm and sseregparm. */
4900 else if (is_attribute_p ("cdecl", name))
4902 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4904 error ("stdcall and cdecl attributes are not compatible");
4906 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4908 error ("fastcall and cdecl attributes are not compatible");
4910 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4912 error ("cdecl and thiscall attributes are not compatible");
4915 else if (is_attribute_p ("thiscall", name))
4917 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4918 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4920 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4922 error ("stdcall and thiscall attributes are not compatible");
4924 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4926 error ("fastcall and thiscall attributes are not compatible");
4928 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4930 error ("cdecl and thiscall attributes are not compatible");
4934 /* Can combine sseregparm with all attributes. */
4939 /* This function determines from TYPE the calling-convention. */
4942 ix86_get_callcvt (const_tree type)
4944 unsigned int ret = 0;
4949 return IX86_CALLCVT_CDECL;
4951 attrs = TYPE_ATTRIBUTES (type);
4952 if (attrs != NULL_TREE)
4954 if (lookup_attribute ("cdecl", attrs))
4955 ret |= IX86_CALLCVT_CDECL;
4956 else if (lookup_attribute ("stdcall", attrs))
4957 ret |= IX86_CALLCVT_STDCALL;
4958 else if (lookup_attribute ("fastcall", attrs))
4959 ret |= IX86_CALLCVT_FASTCALL;
4960 else if (lookup_attribute ("thiscall", attrs))
4961 ret |= IX86_CALLCVT_THISCALL;
4963 /* Regparam isn't allowed for thiscall and fastcall. */
4964 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
4966 if (lookup_attribute ("regparm", attrs))
4967 ret |= IX86_CALLCVT_REGPARM;
4968 if (lookup_attribute ("sseregparm", attrs))
4969 ret |= IX86_CALLCVT_SSEREGPARM;
4972 if (IX86_BASE_CALLCVT(ret) != 0)
4976 is_stdarg = stdarg_p (type);
4977 if (TARGET_RTD && !is_stdarg)
4978 return IX86_CALLCVT_STDCALL | ret;
4982 || TREE_CODE (type) != METHOD_TYPE
4983 || ix86_function_type_abi (type) != MS_ABI)
4984 return IX86_CALLCVT_CDECL | ret;
4986 return IX86_CALLCVT_THISCALL;
4989 /* Return 0 if the attributes for two types are incompatible, 1 if they
4990 are compatible, and 2 if they are nearly compatible (which causes a
4991 warning to be generated). */
4994 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4996 unsigned int ccvt1, ccvt2;
4998 if (TREE_CODE (type1) != FUNCTION_TYPE
4999 && TREE_CODE (type1) != METHOD_TYPE)
5002 ccvt1 = ix86_get_callcvt (type1);
5003 ccvt2 = ix86_get_callcvt (type2);
5006 if (ix86_function_regparm (type1, NULL)
5007 != ix86_function_regparm (type2, NULL))
5013 /* Return the regparm value for a function with the indicated TYPE and DECL.
5014 DECL may be NULL when calling function indirectly
5015 or considering a libcall. */
5018 ix86_function_regparm (const_tree type, const_tree decl)
5025 return (ix86_function_type_abi (type) == SYSV_ABI
5026 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5027 ccvt = ix86_get_callcvt (type);
5028 regparm = ix86_regparm;
5030 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5032 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5035 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5039 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5041 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5044 /* Use register calling convention for local functions when possible. */
5046 && TREE_CODE (decl) == FUNCTION_DECL
5048 && !(profile_flag && !flag_fentry))
5050 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5051 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5052 if (i && i->local && i->can_change_signature)
5054 int local_regparm, globals = 0, regno;
5056 /* Make sure no regparm register is taken by a
5057 fixed register variable. */
5058 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5059 if (fixed_regs[local_regparm])
5062 /* We don't want to use regparm(3) for nested functions as
5063 these use a static chain pointer in the third argument. */
5064 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5067 /* In 32-bit mode save a register for the split stack. */
5068 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5071 /* Each fixed register usage increases register pressure,
5072 so less registers should be used for argument passing.
5073 This functionality can be overriden by an explicit
5075 for (regno = 0; regno <= DI_REG; regno++)
5076 if (fixed_regs[regno])
5080 = globals < local_regparm ? local_regparm - globals : 0;
5082 if (local_regparm > regparm)
5083 regparm = local_regparm;
5090 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5091 DFmode (2) arguments in SSE registers for a function with the
5092 indicated TYPE and DECL. DECL may be NULL when calling function
5093 indirectly or considering a libcall. Otherwise return 0. */
5096 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5098 gcc_assert (!TARGET_64BIT);
5100 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5101 by the sseregparm attribute. */
5102 if (TARGET_SSEREGPARM
5103 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5110 error ("calling %qD with attribute sseregparm without "
5111 "SSE/SSE2 enabled", decl);
5113 error ("calling %qT with attribute sseregparm without "
5114 "SSE/SSE2 enabled", type);
5122 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5123 (and DFmode for SSE2) arguments in SSE registers. */
5124 if (decl && TARGET_SSE_MATH && optimize
5125 && !(profile_flag && !flag_fentry))
5127 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5128 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5129 if (i && i->local && i->can_change_signature)
5130 return TARGET_SSE2 ? 2 : 1;
5136 /* Return true if EAX is live at the start of the function. Used by
5137 ix86_expand_prologue to determine if we need special help before
5138 calling allocate_stack_worker. */
5141 ix86_eax_live_at_start_p (void)
5143 /* Cheat. Don't bother working forward from ix86_function_regparm
5144 to the function type to whether an actual argument is located in
5145 eax. Instead just look at cfg info, which is still close enough
5146 to correct at this point. This gives false positives for broken
5147 functions that might use uninitialized data that happens to be
5148 allocated in eax, but who cares? */
5149 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5153 ix86_keep_aggregate_return_pointer (tree fntype)
5159 attr = lookup_attribute ("callee_pop_aggregate_return",
5160 TYPE_ATTRIBUTES (fntype));
5162 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5164 /* For 32-bit MS-ABI the default is to keep aggregate
5166 if (ix86_function_type_abi (fntype) == MS_ABI)
5169 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5172 /* Value is the number of bytes of arguments automatically
5173 popped when returning from a subroutine call.
5174 FUNDECL is the declaration node of the function (as a tree),
5175 FUNTYPE is the data type of the function (as a tree),
5176 or for a library call it is an identifier node for the subroutine name.
5177 SIZE is the number of bytes of arguments passed on the stack.
5179 On the 80386, the RTD insn may be used to pop them if the number
5180 of args is fixed, but if the number is variable then the caller
5181 must pop them all. RTD can't be used for library calls now
5182 because the library is compiled with the Unix compiler.
5183 Use of RTD is a selectable option, since it is incompatible with
5184 standard Unix calling sequences. If the option is not selected,
5185 the caller must always pop the args.
5187 The attribute stdcall is equivalent to RTD on a per module basis. */
5190 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5194 /* None of the 64-bit ABIs pop arguments. */
5198 ccvt = ix86_get_callcvt (funtype);
5200 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5201 | IX86_CALLCVT_THISCALL)) != 0
5202 && ! stdarg_p (funtype))
5205 /* Lose any fake structure return argument if it is passed on the stack. */
5206 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5207 && !ix86_keep_aggregate_return_pointer (funtype))
5209 int nregs = ix86_function_regparm (funtype, fundecl);
5211 return GET_MODE_SIZE (Pmode);
5217 /* Argument support functions. */
5219 /* Return true when register may be used to pass function parameters. */
5221 ix86_function_arg_regno_p (int regno)
5224 const int *parm_regs;
5229 return (regno < REGPARM_MAX
5230 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5232 return (regno < REGPARM_MAX
5233 || (TARGET_MMX && MMX_REGNO_P (regno)
5234 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5235 || (TARGET_SSE && SSE_REGNO_P (regno)
5236 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5241 if (SSE_REGNO_P (regno) && TARGET_SSE)
5246 if (TARGET_SSE && SSE_REGNO_P (regno)
5247 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5251 /* TODO: The function should depend on current function ABI but
5252 builtins.c would need updating then. Therefore we use the
5255 /* RAX is used as hidden argument to va_arg functions. */
5256 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5259 if (ix86_abi == MS_ABI)
5260 parm_regs = x86_64_ms_abi_int_parameter_registers;
5262 parm_regs = x86_64_int_parameter_registers;
5263 for (i = 0; i < (ix86_abi == MS_ABI
5264 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5265 if (regno == parm_regs[i])
5270 /* Return if we do not know how to pass TYPE solely in registers. */
5273 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5275 if (must_pass_in_stack_var_size_or_pad (mode, type))
5278 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5279 The layout_type routine is crafty and tries to trick us into passing
5280 currently unsupported vector types on the stack by using TImode. */
5281 return (!TARGET_64BIT && mode == TImode
5282 && type && TREE_CODE (type) != VECTOR_TYPE);
5285 /* It returns the size, in bytes, of the area reserved for arguments passed
5286 in registers for the function represented by fndecl dependent to the used
5289 ix86_reg_parm_stack_space (const_tree fndecl)
5291 enum calling_abi call_abi = SYSV_ABI;
5292 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5293 call_abi = ix86_function_abi (fndecl);
5295 call_abi = ix86_function_type_abi (fndecl);
5296 if (TARGET_64BIT && call_abi == MS_ABI)
5301 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5304 ix86_function_type_abi (const_tree fntype)
5306 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5308 enum calling_abi abi = ix86_abi;
5309 if (abi == SYSV_ABI)
5311 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5314 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5322 ix86_function_ms_hook_prologue (const_tree fn)
5324 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5326 if (decl_function_context (fn) != NULL_TREE)
5327 error_at (DECL_SOURCE_LOCATION (fn),
5328 "ms_hook_prologue is not compatible with nested function");
5335 static enum calling_abi
5336 ix86_function_abi (const_tree fndecl)
5340 return ix86_function_type_abi (TREE_TYPE (fndecl));
5343 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5346 ix86_cfun_abi (void)
5350 return cfun->machine->call_abi;
5353 /* Write the extra assembler code needed to declare a function properly. */
5356 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5359 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5363 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5364 unsigned int filler_cc = 0xcccccccc;
5366 for (i = 0; i < filler_count; i += 4)
5367 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5370 #ifdef SUBTARGET_ASM_UNWIND_INIT
5371 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5374 ASM_OUTPUT_LABEL (asm_out_file, fname);
5376 /* Output magic byte marker, if hot-patch attribute is set. */
5381 /* leaq [%rsp + 0], %rsp */
5382 asm_fprintf (asm_out_file, ASM_BYTE
5383 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5387 /* movl.s %edi, %edi
5389 movl.s %esp, %ebp */
5390 asm_fprintf (asm_out_file, ASM_BYTE
5391 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5397 extern void init_regs (void);
5399 /* Implementation of call abi switching target hook. Specific to FNDECL
5400 the specific call register sets are set. See also
5401 ix86_conditional_register_usage for more details. */
5403 ix86_call_abi_override (const_tree fndecl)
5405 if (fndecl == NULL_TREE)
5406 cfun->machine->call_abi = ix86_abi;
5408 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5411 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5412 expensive re-initialization of init_regs each time we switch function context
5413 since this is needed only during RTL expansion. */
5415 ix86_maybe_switch_abi (void)
5418 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5422 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5423 for a call to a function whose data type is FNTYPE.
5424 For a library call, FNTYPE is 0. */
5427 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5428 tree fntype, /* tree ptr for function decl */
5429 rtx libname, /* SYMBOL_REF of library name or 0 */
5433 struct cgraph_local_info *i;
5436 memset (cum, 0, sizeof (*cum));
5438 /* Initialize for the current callee. */
5441 cfun->machine->callee_pass_avx256_p = false;
5442 cfun->machine->callee_return_avx256_p = false;
5447 i = cgraph_local_info (fndecl);
5448 cum->call_abi = ix86_function_abi (fndecl);
5449 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5454 cum->call_abi = ix86_function_type_abi (fntype);
5456 fnret_type = TREE_TYPE (fntype);
5461 if (TARGET_VZEROUPPER && fnret_type)
5463 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5465 if (function_pass_avx256_p (fnret_value))
5467 /* The return value of this function uses 256bit AVX modes. */
5469 cfun->machine->callee_return_avx256_p = true;
5471 cfun->machine->caller_return_avx256_p = true;
5475 cum->caller = caller;
5477 /* Set up the number of registers to use for passing arguments. */
5479 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5480 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5481 "or subtarget optimization implying it");
5482 cum->nregs = ix86_regparm;
5485 cum->nregs = (cum->call_abi == SYSV_ABI
5486 ? X86_64_REGPARM_MAX
5487 : X86_64_MS_REGPARM_MAX);
5491 cum->sse_nregs = SSE_REGPARM_MAX;
5494 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5495 ? X86_64_SSE_REGPARM_MAX
5496 : X86_64_MS_SSE_REGPARM_MAX);
5500 cum->mmx_nregs = MMX_REGPARM_MAX;
5501 cum->warn_avx = true;
5502 cum->warn_sse = true;
5503 cum->warn_mmx = true;
5505 /* Because type might mismatch in between caller and callee, we need to
5506 use actual type of function for local calls.
5507 FIXME: cgraph_analyze can be told to actually record if function uses
5508 va_start so for local functions maybe_vaarg can be made aggressive
5510 FIXME: once typesytem is fixed, we won't need this code anymore. */
5511 if (i && i->local && i->can_change_signature)
5512 fntype = TREE_TYPE (fndecl);
5513 cum->maybe_vaarg = (fntype
5514 ? (!prototype_p (fntype) || stdarg_p (fntype))
5519 /* If there are variable arguments, then we won't pass anything
5520 in registers in 32-bit mode. */
5521 if (stdarg_p (fntype))
5532 /* Use ecx and edx registers if function has fastcall attribute,
5533 else look for regparm information. */
5536 unsigned int ccvt = ix86_get_callcvt (fntype);
5537 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5540 cum->fastcall = 1; /* Same first register as in fastcall. */
5542 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5548 cum->nregs = ix86_function_regparm (fntype, fndecl);
5551 /* Set up the number of SSE registers used for passing SFmode
5552 and DFmode arguments. Warn for mismatching ABI. */
5553 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5557 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5558 But in the case of vector types, it is some vector mode.
5560 When we have only some of our vector isa extensions enabled, then there
5561 are some modes for which vector_mode_supported_p is false. For these
5562 modes, the generic vector support in gcc will choose some non-vector mode
5563 in order to implement the type. By computing the natural mode, we'll
5564 select the proper ABI location for the operand and not depend on whatever
5565 the middle-end decides to do with these vector types.
5567 The midde-end can't deal with the vector types > 16 bytes. In this
5568 case, we return the original mode and warn ABI change if CUM isn't
5571 static enum machine_mode
5572 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5574 enum machine_mode mode = TYPE_MODE (type);
5576 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5578 HOST_WIDE_INT size = int_size_in_bytes (type);
5579 if ((size == 8 || size == 16 || size == 32)
5580 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5581 && TYPE_VECTOR_SUBPARTS (type) > 1)
5583 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5585 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5586 mode = MIN_MODE_VECTOR_FLOAT;
5588 mode = MIN_MODE_VECTOR_INT;
5590 /* Get the mode which has this inner mode and number of units. */
5591 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5592 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5593 && GET_MODE_INNER (mode) == innermode)
5595 if (size == 32 && !TARGET_AVX)
5597 static bool warnedavx;
5604 warning (0, "AVX vector argument without AVX "
5605 "enabled changes the ABI");
5607 return TYPE_MODE (type);
5620 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5621 this may not agree with the mode that the type system has chosen for the
5622 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5623 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5626 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5631 if (orig_mode != BLKmode)
5632 tmp = gen_rtx_REG (orig_mode, regno);
5635 tmp = gen_rtx_REG (mode, regno);
5636 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5637 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5643 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5644 of this code is to classify each 8bytes of incoming argument by the register
5645 class and assign registers accordingly. */
5647 /* Return the union class of CLASS1 and CLASS2.
5648 See the x86-64 PS ABI for details. */
5650 static enum x86_64_reg_class
5651 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5653 /* Rule #1: If both classes are equal, this is the resulting class. */
5654 if (class1 == class2)
5657 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5659 if (class1 == X86_64_NO_CLASS)
5661 if (class2 == X86_64_NO_CLASS)
5664 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5665 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5666 return X86_64_MEMORY_CLASS;
5668 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5669 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5670 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5671 return X86_64_INTEGERSI_CLASS;
5672 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5673 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5674 return X86_64_INTEGER_CLASS;
5676 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5678 if (class1 == X86_64_X87_CLASS
5679 || class1 == X86_64_X87UP_CLASS
5680 || class1 == X86_64_COMPLEX_X87_CLASS
5681 || class2 == X86_64_X87_CLASS
5682 || class2 == X86_64_X87UP_CLASS
5683 || class2 == X86_64_COMPLEX_X87_CLASS)
5684 return X86_64_MEMORY_CLASS;
5686 /* Rule #6: Otherwise class SSE is used. */
5687 return X86_64_SSE_CLASS;
5690 /* Classify the argument of type TYPE and mode MODE.
5691 CLASSES will be filled by the register class used to pass each word
5692 of the operand. The number of words is returned. In case the parameter
5693 should be passed in memory, 0 is returned. As a special case for zero
5694 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5696 BIT_OFFSET is used internally for handling records and specifies offset
5697 of the offset in bits modulo 256 to avoid overflow cases.
5699 See the x86-64 PS ABI for details.
5703 classify_argument (enum machine_mode mode, const_tree type,
5704 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5706 HOST_WIDE_INT bytes =
5707 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5708 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5710 /* Variable sized entities are always passed/returned in memory. */
5714 if (mode != VOIDmode
5715 && targetm.calls.must_pass_in_stack (mode, type))
5718 if (type && AGGREGATE_TYPE_P (type))
5722 enum x86_64_reg_class subclasses[MAX_CLASSES];
5724 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5728 for (i = 0; i < words; i++)
5729 classes[i] = X86_64_NO_CLASS;
5731 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5732 signalize memory class, so handle it as special case. */
5735 classes[0] = X86_64_NO_CLASS;
5739 /* Classify each field of record and merge classes. */
5740 switch (TREE_CODE (type))
5743 /* And now merge the fields of structure. */
5744 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5746 if (TREE_CODE (field) == FIELD_DECL)
5750 if (TREE_TYPE (field) == error_mark_node)
5753 /* Bitfields are always classified as integer. Handle them
5754 early, since later code would consider them to be
5755 misaligned integers. */
5756 if (DECL_BIT_FIELD (field))
5758 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5759 i < ((int_bit_position (field) + (bit_offset % 64))
5760 + tree_low_cst (DECL_SIZE (field), 0)
5763 merge_classes (X86_64_INTEGER_CLASS,
5770 type = TREE_TYPE (field);
5772 /* Flexible array member is ignored. */
5773 if (TYPE_MODE (type) == BLKmode
5774 && TREE_CODE (type) == ARRAY_TYPE
5775 && TYPE_SIZE (type) == NULL_TREE
5776 && TYPE_DOMAIN (type) != NULL_TREE
5777 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5782 if (!warned && warn_psabi)
5785 inform (input_location,
5786 "the ABI of passing struct with"
5787 " a flexible array member has"
5788 " changed in GCC 4.4");
5792 num = classify_argument (TYPE_MODE (type), type,
5794 (int_bit_position (field)
5795 + bit_offset) % 256);
5798 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5799 for (i = 0; i < num && (i + pos) < words; i++)
5801 merge_classes (subclasses[i], classes[i + pos]);
5808 /* Arrays are handled as small records. */
5811 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5812 TREE_TYPE (type), subclasses, bit_offset);
5816 /* The partial classes are now full classes. */
5817 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5818 subclasses[0] = X86_64_SSE_CLASS;
5819 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5820 && !((bit_offset % 64) == 0 && bytes == 4))
5821 subclasses[0] = X86_64_INTEGER_CLASS;
5823 for (i = 0; i < words; i++)
5824 classes[i] = subclasses[i % num];
5829 case QUAL_UNION_TYPE:
5830 /* Unions are similar to RECORD_TYPE but offset is always 0.
5832 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5834 if (TREE_CODE (field) == FIELD_DECL)
5838 if (TREE_TYPE (field) == error_mark_node)
5841 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5842 TREE_TYPE (field), subclasses,
5846 for (i = 0; i < num; i++)
5847 classes[i] = merge_classes (subclasses[i], classes[i]);
5858 /* When size > 16 bytes, if the first one isn't
5859 X86_64_SSE_CLASS or any other ones aren't
5860 X86_64_SSEUP_CLASS, everything should be passed in
5862 if (classes[0] != X86_64_SSE_CLASS)
5865 for (i = 1; i < words; i++)
5866 if (classes[i] != X86_64_SSEUP_CLASS)
5870 /* Final merger cleanup. */
5871 for (i = 0; i < words; i++)
5873 /* If one class is MEMORY, everything should be passed in
5875 if (classes[i] == X86_64_MEMORY_CLASS)
5878 /* The X86_64_SSEUP_CLASS should be always preceded by
5879 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5880 if (classes[i] == X86_64_SSEUP_CLASS
5881 && classes[i - 1] != X86_64_SSE_CLASS
5882 && classes[i - 1] != X86_64_SSEUP_CLASS)
5884 /* The first one should never be X86_64_SSEUP_CLASS. */
5885 gcc_assert (i != 0);
5886 classes[i] = X86_64_SSE_CLASS;
5889 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5890 everything should be passed in memory. */
5891 if (classes[i] == X86_64_X87UP_CLASS
5892 && (classes[i - 1] != X86_64_X87_CLASS))
5896 /* The first one should never be X86_64_X87UP_CLASS. */
5897 gcc_assert (i != 0);
5898 if (!warned && warn_psabi)
5901 inform (input_location,
5902 "the ABI of passing union with long double"
5903 " has changed in GCC 4.4");
5911 /* Compute alignment needed. We align all types to natural boundaries with
5912 exception of XFmode that is aligned to 64bits. */
5913 if (mode != VOIDmode && mode != BLKmode)
5915 int mode_alignment = GET_MODE_BITSIZE (mode);
5918 mode_alignment = 128;
5919 else if (mode == XCmode)
5920 mode_alignment = 256;
5921 if (COMPLEX_MODE_P (mode))
5922 mode_alignment /= 2;
5923 /* Misaligned fields are always returned in memory. */
5924 if (bit_offset % mode_alignment)
5928 /* for V1xx modes, just use the base mode */
5929 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5930 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5931 mode = GET_MODE_INNER (mode);
5933 /* Classification of atomic types. */
5938 classes[0] = X86_64_SSE_CLASS;
5941 classes[0] = X86_64_SSE_CLASS;
5942 classes[1] = X86_64_SSEUP_CLASS;
5952 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5956 classes[0] = X86_64_INTEGERSI_CLASS;
5959 else if (size <= 64)
5961 classes[0] = X86_64_INTEGER_CLASS;
5964 else if (size <= 64+32)
5966 classes[0] = X86_64_INTEGER_CLASS;
5967 classes[1] = X86_64_INTEGERSI_CLASS;
5970 else if (size <= 64+64)
5972 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5980 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5984 /* OImode shouldn't be used directly. */
5989 if (!(bit_offset % 64))
5990 classes[0] = X86_64_SSESF_CLASS;
5992 classes[0] = X86_64_SSE_CLASS;
5995 classes[0] = X86_64_SSEDF_CLASS;
5998 classes[0] = X86_64_X87_CLASS;
5999 classes[1] = X86_64_X87UP_CLASS;
6002 classes[0] = X86_64_SSE_CLASS;
6003 classes[1] = X86_64_SSEUP_CLASS;
6006 classes[0] = X86_64_SSE_CLASS;
6007 if (!(bit_offset % 64))
6013 if (!warned && warn_psabi)
6016 inform (input_location,
6017 "the ABI of passing structure with complex float"
6018 " member has changed in GCC 4.4");
6020 classes[1] = X86_64_SSESF_CLASS;
6024 classes[0] = X86_64_SSEDF_CLASS;
6025 classes[1] = X86_64_SSEDF_CLASS;
6028 classes[0] = X86_64_COMPLEX_X87_CLASS;
6031 /* This modes is larger than 16 bytes. */
6039 classes[0] = X86_64_SSE_CLASS;
6040 classes[1] = X86_64_SSEUP_CLASS;
6041 classes[2] = X86_64_SSEUP_CLASS;
6042 classes[3] = X86_64_SSEUP_CLASS;
6050 classes[0] = X86_64_SSE_CLASS;
6051 classes[1] = X86_64_SSEUP_CLASS;
6059 classes[0] = X86_64_SSE_CLASS;
6065 gcc_assert (VECTOR_MODE_P (mode));
6070 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6072 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6073 classes[0] = X86_64_INTEGERSI_CLASS;
6075 classes[0] = X86_64_INTEGER_CLASS;
6076 classes[1] = X86_64_INTEGER_CLASS;
6077 return 1 + (bytes > 8);
6081 /* Examine the argument and return set number of register required in each
6082 class. Return 0 iff parameter should be passed in memory. */
6084 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6085 int *int_nregs, int *sse_nregs)
6087 enum x86_64_reg_class regclass[MAX_CLASSES];
6088 int n = classify_argument (mode, type, regclass, 0);
6094 for (n--; n >= 0; n--)
6095 switch (regclass[n])
6097 case X86_64_INTEGER_CLASS:
6098 case X86_64_INTEGERSI_CLASS:
6101 case X86_64_SSE_CLASS:
6102 case X86_64_SSESF_CLASS:
6103 case X86_64_SSEDF_CLASS:
6106 case X86_64_NO_CLASS:
6107 case X86_64_SSEUP_CLASS:
6109 case X86_64_X87_CLASS:
6110 case X86_64_X87UP_CLASS:
6114 case X86_64_COMPLEX_X87_CLASS:
6115 return in_return ? 2 : 0;
6116 case X86_64_MEMORY_CLASS:
6122 /* Construct container for the argument used by GCC interface. See
6123 FUNCTION_ARG for the detailed description. */
6126 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6127 const_tree type, int in_return, int nintregs, int nsseregs,
6128 const int *intreg, int sse_regno)
6130 /* The following variables hold the static issued_error state. */
6131 static bool issued_sse_arg_error;
6132 static bool issued_sse_ret_error;
6133 static bool issued_x87_ret_error;
6135 enum machine_mode tmpmode;
6137 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6138 enum x86_64_reg_class regclass[MAX_CLASSES];
6142 int needed_sseregs, needed_intregs;
6143 rtx exp[MAX_CLASSES];
6146 n = classify_argument (mode, type, regclass, 0);
6149 if (!examine_argument (mode, type, in_return, &needed_intregs,
6152 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6155 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6156 some less clueful developer tries to use floating-point anyway. */
6157 if (needed_sseregs && !TARGET_SSE)
6161 if (!issued_sse_ret_error)
6163 error ("SSE register return with SSE disabled");
6164 issued_sse_ret_error = true;
6167 else if (!issued_sse_arg_error)
6169 error ("SSE register argument with SSE disabled");
6170 issued_sse_arg_error = true;
6175 /* Likewise, error if the ABI requires us to return values in the
6176 x87 registers and the user specified -mno-80387. */
6177 if (!TARGET_80387 && in_return)
6178 for (i = 0; i < n; i++)
6179 if (regclass[i] == X86_64_X87_CLASS
6180 || regclass[i] == X86_64_X87UP_CLASS
6181 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6183 if (!issued_x87_ret_error)
6185 error ("x87 register return with x87 disabled");
6186 issued_x87_ret_error = true;
6191 /* First construct simple cases. Avoid SCmode, since we want to use
6192 single register to pass this type. */
6193 if (n == 1 && mode != SCmode)
6194 switch (regclass[0])
6196 case X86_64_INTEGER_CLASS:
6197 case X86_64_INTEGERSI_CLASS:
6198 return gen_rtx_REG (mode, intreg[0]);
6199 case X86_64_SSE_CLASS:
6200 case X86_64_SSESF_CLASS:
6201 case X86_64_SSEDF_CLASS:
6202 if (mode != BLKmode)
6203 return gen_reg_or_parallel (mode, orig_mode,
6204 SSE_REGNO (sse_regno));
6206 case X86_64_X87_CLASS:
6207 case X86_64_COMPLEX_X87_CLASS:
6208 return gen_rtx_REG (mode, FIRST_STACK_REG);
6209 case X86_64_NO_CLASS:
6210 /* Zero sized array, struct or class. */
6215 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6216 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6217 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6219 && regclass[0] == X86_64_SSE_CLASS
6220 && regclass[1] == X86_64_SSEUP_CLASS
6221 && regclass[2] == X86_64_SSEUP_CLASS
6222 && regclass[3] == X86_64_SSEUP_CLASS
6224 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6227 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6228 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6229 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6230 && regclass[1] == X86_64_INTEGER_CLASS
6231 && (mode == CDImode || mode == TImode || mode == TFmode)
6232 && intreg[0] + 1 == intreg[1])
6233 return gen_rtx_REG (mode, intreg[0]);
6235 /* Otherwise figure out the entries of the PARALLEL. */
6236 for (i = 0; i < n; i++)
6240 switch (regclass[i])
6242 case X86_64_NO_CLASS:
6244 case X86_64_INTEGER_CLASS:
6245 case X86_64_INTEGERSI_CLASS:
6246 /* Merge TImodes on aligned occasions here too. */
6247 if (i * 8 + 8 > bytes)
6248 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6249 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6253 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6254 if (tmpmode == BLKmode)
6256 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6257 gen_rtx_REG (tmpmode, *intreg),
6261 case X86_64_SSESF_CLASS:
6262 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6263 gen_rtx_REG (SFmode,
6264 SSE_REGNO (sse_regno)),
6268 case X86_64_SSEDF_CLASS:
6269 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6270 gen_rtx_REG (DFmode,
6271 SSE_REGNO (sse_regno)),
6275 case X86_64_SSE_CLASS:
6283 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6293 && regclass[1] == X86_64_SSEUP_CLASS
6294 && regclass[2] == X86_64_SSEUP_CLASS
6295 && regclass[3] == X86_64_SSEUP_CLASS);
6302 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6303 gen_rtx_REG (tmpmode,
6304 SSE_REGNO (sse_regno)),
6313 /* Empty aligned struct, union or class. */
6317 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6318 for (i = 0; i < nexps; i++)
6319 XVECEXP (ret, 0, i) = exp [i];
6323 /* Update the data in CUM to advance over an argument of mode MODE
6324 and data type TYPE. (TYPE is null for libcalls where that information
6325 may not be available.) */
6328 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6329 const_tree type, HOST_WIDE_INT bytes,
6330 HOST_WIDE_INT words)
6346 cum->words += words;
6347 cum->nregs -= words;
6348 cum->regno += words;
6350 if (cum->nregs <= 0)
6358 /* OImode shouldn't be used directly. */
6362 if (cum->float_in_sse < 2)
6365 if (cum->float_in_sse < 1)
6382 if (!type || !AGGREGATE_TYPE_P (type))
6384 cum->sse_words += words;
6385 cum->sse_nregs -= 1;
6386 cum->sse_regno += 1;
6387 if (cum->sse_nregs <= 0)
6401 if (!type || !AGGREGATE_TYPE_P (type))
6403 cum->mmx_words += words;
6404 cum->mmx_nregs -= 1;
6405 cum->mmx_regno += 1;
6406 if (cum->mmx_nregs <= 0)
6417 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6418 const_tree type, HOST_WIDE_INT words, bool named)
6420 int int_nregs, sse_nregs;
6422 /* Unnamed 256bit vector mode parameters are passed on stack. */
6423 if (!named && VALID_AVX256_REG_MODE (mode))
6426 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6427 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6429 cum->nregs -= int_nregs;
6430 cum->sse_nregs -= sse_nregs;
6431 cum->regno += int_nregs;
6432 cum->sse_regno += sse_nregs;
6436 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6437 cum->words = (cum->words + align - 1) & ~(align - 1);
6438 cum->words += words;
6443 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6444 HOST_WIDE_INT words)
6446 /* Otherwise, this should be passed indirect. */
6447 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6449 cum->words += words;
6457 /* Update the data in CUM to advance over an argument of mode MODE and
6458 data type TYPE. (TYPE is null for libcalls where that information
6459 may not be available.) */
6462 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6463 const_tree type, bool named)
6465 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6466 HOST_WIDE_INT bytes, words;
6468 if (mode == BLKmode)
6469 bytes = int_size_in_bytes (type);
6471 bytes = GET_MODE_SIZE (mode);
6472 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6475 mode = type_natural_mode (type, NULL);
6477 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6478 function_arg_advance_ms_64 (cum, bytes, words);
6479 else if (TARGET_64BIT)
6480 function_arg_advance_64 (cum, mode, type, words, named);
6482 function_arg_advance_32 (cum, mode, type, bytes, words);
6485 /* Define where to put the arguments to a function.
6486 Value is zero to push the argument on the stack,
6487 or a hard register in which to store the argument.
6489 MODE is the argument's machine mode.
6490 TYPE is the data type of the argument (as a tree).
6491 This is null for libcalls where that information may
6493 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6494 the preceding args and about the function being called.
6495 NAMED is nonzero if this argument is a named parameter
6496 (otherwise it is an extra parameter matching an ellipsis). */
6499 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6500 enum machine_mode orig_mode, const_tree type,
6501 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6503 static bool warnedsse, warnedmmx;
6505 /* Avoid the AL settings for the Unix64 ABI. */
6506 if (mode == VOIDmode)
6522 if (words <= cum->nregs)
6524 int regno = cum->regno;
6526 /* Fastcall allocates the first two DWORD (SImode) or
6527 smaller arguments to ECX and EDX if it isn't an
6533 || (type && AGGREGATE_TYPE_P (type)))
6536 /* ECX not EAX is the first allocated register. */
6537 if (regno == AX_REG)
6540 return gen_rtx_REG (mode, regno);
6545 if (cum->float_in_sse < 2)
6548 if (cum->float_in_sse < 1)
6552 /* In 32bit, we pass TImode in xmm registers. */
6559 if (!type || !AGGREGATE_TYPE_P (type))
6561 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6564 warning (0, "SSE vector argument without SSE enabled "
6568 return gen_reg_or_parallel (mode, orig_mode,
6569 cum->sse_regno + FIRST_SSE_REG);
6574 /* OImode shouldn't be used directly. */
6583 if (!type || !AGGREGATE_TYPE_P (type))
6586 return gen_reg_or_parallel (mode, orig_mode,
6587 cum->sse_regno + FIRST_SSE_REG);
6597 if (!type || !AGGREGATE_TYPE_P (type))
6599 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6602 warning (0, "MMX vector argument without MMX enabled "
6606 return gen_reg_or_parallel (mode, orig_mode,
6607 cum->mmx_regno + FIRST_MMX_REG);
6616 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6617 enum machine_mode orig_mode, const_tree type, bool named)
6619 /* Handle a hidden AL argument containing number of registers
6620 for varargs x86-64 functions. */
6621 if (mode == VOIDmode)
6622 return GEN_INT (cum->maybe_vaarg
6623 ? (cum->sse_nregs < 0
6624 ? X86_64_SSE_REGPARM_MAX
6639 /* Unnamed 256bit vector mode parameters are passed on stack. */
6645 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6647 &x86_64_int_parameter_registers [cum->regno],
6652 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6653 enum machine_mode orig_mode, bool named,
6654 HOST_WIDE_INT bytes)
6658 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6659 We use value of -2 to specify that current function call is MSABI. */
6660 if (mode == VOIDmode)
6661 return GEN_INT (-2);
6663 /* If we've run out of registers, it goes on the stack. */
6664 if (cum->nregs == 0)
6667 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6669 /* Only floating point modes are passed in anything but integer regs. */
6670 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6673 regno = cum->regno + FIRST_SSE_REG;
6678 /* Unnamed floating parameters are passed in both the
6679 SSE and integer registers. */
6680 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6681 t2 = gen_rtx_REG (mode, regno);
6682 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6683 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6684 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6687 /* Handle aggregated types passed in register. */
6688 if (orig_mode == BLKmode)
6690 if (bytes > 0 && bytes <= 8)
6691 mode = (bytes > 4 ? DImode : SImode);
6692 if (mode == BLKmode)
6696 return gen_reg_or_parallel (mode, orig_mode, regno);
6699 /* Return where to put the arguments to a function.
6700 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6702 MODE is the argument's machine mode. TYPE is the data type of the
6703 argument. It is null for libcalls where that information may not be
6704 available. CUM gives information about the preceding args and about
6705 the function being called. NAMED is nonzero if this argument is a
6706 named parameter (otherwise it is an extra parameter matching an
6710 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
6711 const_tree type, bool named)
6713 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6714 enum machine_mode mode = omode;
6715 HOST_WIDE_INT bytes, words;
6718 if (mode == BLKmode)
6719 bytes = int_size_in_bytes (type);
6721 bytes = GET_MODE_SIZE (mode);
6722 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6724 /* To simplify the code below, represent vector types with a vector mode
6725 even if MMX/SSE are not active. */
6726 if (type && TREE_CODE (type) == VECTOR_TYPE)
6727 mode = type_natural_mode (type, cum);
6729 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6730 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6731 else if (TARGET_64BIT)
6732 arg = function_arg_64 (cum, mode, omode, type, named);
6734 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6736 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6738 /* This argument uses 256bit AVX modes. */
6740 cfun->machine->callee_pass_avx256_p = true;
6742 cfun->machine->caller_pass_avx256_p = true;
6748 /* A C expression that indicates when an argument must be passed by
6749 reference. If nonzero for an argument, a copy of that argument is
6750 made in memory and a pointer to the argument is passed instead of
6751 the argument itself. The pointer is passed in whatever way is
6752 appropriate for passing a pointer to that type. */
6755 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
6756 enum machine_mode mode ATTRIBUTE_UNUSED,
6757 const_tree type, bool named ATTRIBUTE_UNUSED)
6759 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6761 /* See Windows x64 Software Convention. */
6762 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6764 int msize = (int) GET_MODE_SIZE (mode);
6767 /* Arrays are passed by reference. */
6768 if (TREE_CODE (type) == ARRAY_TYPE)
6771 if (AGGREGATE_TYPE_P (type))
6773 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6774 are passed by reference. */
6775 msize = int_size_in_bytes (type);
6779 /* __m128 is passed by reference. */
6781 case 1: case 2: case 4: case 8:
6787 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6793 /* Return true when TYPE should be 128bit aligned for 32bit argument
6794 passing ABI. XXX: This function is obsolete and is only used for
6795 checking psABI compatibility with previous versions of GCC. */
6798 ix86_compat_aligned_value_p (const_tree type)
6800 enum machine_mode mode = TYPE_MODE (type);
6801 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6805 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6807 if (TYPE_ALIGN (type) < 128)
6810 if (AGGREGATE_TYPE_P (type))
6812 /* Walk the aggregates recursively. */
6813 switch (TREE_CODE (type))
6817 case QUAL_UNION_TYPE:
6821 /* Walk all the structure fields. */
6822 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6824 if (TREE_CODE (field) == FIELD_DECL
6825 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6832 /* Just for use if some languages passes arrays by value. */
6833 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6844 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6845 XXX: This function is obsolete and is only used for checking psABI
6846 compatibility with previous versions of GCC. */
6849 ix86_compat_function_arg_boundary (enum machine_mode mode,
6850 const_tree type, unsigned int align)
6852 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6853 natural boundaries. */
6854 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6856 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6857 make an exception for SSE modes since these require 128bit
6860 The handling here differs from field_alignment. ICC aligns MMX
6861 arguments to 4 byte boundaries, while structure fields are aligned
6862 to 8 byte boundaries. */
6865 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6866 align = PARM_BOUNDARY;
6870 if (!ix86_compat_aligned_value_p (type))
6871 align = PARM_BOUNDARY;
6874 if (align > BIGGEST_ALIGNMENT)
6875 align = BIGGEST_ALIGNMENT;
6879 /* Return true when TYPE should be 128bit aligned for 32bit argument
6883 ix86_contains_aligned_value_p (const_tree type)
6885 enum machine_mode mode = TYPE_MODE (type);
6887 if (mode == XFmode || mode == XCmode)
6890 if (TYPE_ALIGN (type) < 128)
6893 if (AGGREGATE_TYPE_P (type))
6895 /* Walk the aggregates recursively. */
6896 switch (TREE_CODE (type))
6900 case QUAL_UNION_TYPE:
6904 /* Walk all the structure fields. */
6905 for (field = TYPE_FIELDS (type);
6907 field = DECL_CHAIN (field))
6909 if (TREE_CODE (field) == FIELD_DECL
6910 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
6917 /* Just for use if some languages passes arrays by value. */
6918 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
6927 return TYPE_ALIGN (type) >= 128;
6932 /* Gives the alignment boundary, in bits, of an argument with the
6933 specified mode and type. */
6936 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6941 /* Since the main variant type is used for call, we convert it to
6942 the main variant type. */
6943 type = TYPE_MAIN_VARIANT (type);
6944 align = TYPE_ALIGN (type);
6947 align = GET_MODE_ALIGNMENT (mode);
6948 if (align < PARM_BOUNDARY)
6949 align = PARM_BOUNDARY;
6953 unsigned int saved_align = align;
6957 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
6960 if (mode == XFmode || mode == XCmode)
6961 align = PARM_BOUNDARY;
6963 else if (!ix86_contains_aligned_value_p (type))
6964 align = PARM_BOUNDARY;
6967 align = PARM_BOUNDARY;
6972 && align != ix86_compat_function_arg_boundary (mode, type,
6976 inform (input_location,
6977 "The ABI for passing parameters with %d-byte"
6978 " alignment has changed in GCC 4.6",
6979 align / BITS_PER_UNIT);
6986 /* Return true if N is a possible register number of function value. */
6989 ix86_function_value_regno_p (const unsigned int regno)
6996 case FIRST_FLOAT_REG:
6997 /* TODO: The function should depend on current function ABI but
6998 builtins.c would need updating then. Therefore we use the
7000 if (TARGET_64BIT && ix86_abi == MS_ABI)
7002 return TARGET_FLOAT_RETURNS_IN_80387;
7008 if (TARGET_MACHO || TARGET_64BIT)
7016 /* Define how to find the value returned by a function.
7017 VALTYPE is the data type of the value (as a tree).
7018 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7019 otherwise, FUNC is 0. */
7022 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7023 const_tree fntype, const_tree fn)
7027 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7028 we normally prevent this case when mmx is not available. However
7029 some ABIs may require the result to be returned like DImode. */
7030 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7031 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7033 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7034 we prevent this case when sse is not available. However some ABIs
7035 may require the result to be returned like integer TImode. */
7036 else if (mode == TImode
7037 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7038 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7040 /* 32-byte vector modes in %ymm0. */
7041 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7042 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7044 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7045 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7046 regno = FIRST_FLOAT_REG;
7048 /* Most things go in %eax. */
7051 /* Override FP return register with %xmm0 for local functions when
7052 SSE math is enabled or for functions with sseregparm attribute. */
7053 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7055 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7056 if ((sse_level >= 1 && mode == SFmode)
7057 || (sse_level == 2 && mode == DFmode))
7058 regno = FIRST_SSE_REG;
7061 /* OImode shouldn't be used directly. */
7062 gcc_assert (mode != OImode);
7064 return gen_rtx_REG (orig_mode, regno);
7068 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7073 /* Handle libcalls, which don't provide a type node. */
7074 if (valtype == NULL)
7086 return gen_rtx_REG (mode, FIRST_SSE_REG);
7089 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7093 return gen_rtx_REG (mode, AX_REG);
7096 else if (POINTER_TYPE_P (valtype))
7098 /* Pointers are always returned in Pmode. */
7102 ret = construct_container (mode, orig_mode, valtype, 1,
7103 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7104 x86_64_int_return_registers, 0);
7106 /* For zero sized structures, construct_container returns NULL, but we
7107 need to keep rest of compiler happy by returning meaningful value. */
7109 ret = gen_rtx_REG (orig_mode, AX_REG);
7115 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7117 unsigned int regno = AX_REG;
7121 switch (GET_MODE_SIZE (mode))
7124 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7125 && !COMPLEX_MODE_P (mode))
7126 regno = FIRST_SSE_REG;
7130 if (mode == SFmode || mode == DFmode)
7131 regno = FIRST_SSE_REG;
7137 return gen_rtx_REG (orig_mode, regno);
7141 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7142 enum machine_mode orig_mode, enum machine_mode mode)
7144 const_tree fn, fntype;
7147 if (fntype_or_decl && DECL_P (fntype_or_decl))
7148 fn = fntype_or_decl;
7149 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7151 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7152 return function_value_ms_64 (orig_mode, mode);
7153 else if (TARGET_64BIT)
7154 return function_value_64 (orig_mode, mode, valtype);
7156 return function_value_32 (orig_mode, mode, fntype, fn);
7160 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7161 bool outgoing ATTRIBUTE_UNUSED)
7163 enum machine_mode mode, orig_mode;
7165 orig_mode = TYPE_MODE (valtype);
7166 mode = type_natural_mode (valtype, NULL);
7167 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7170 /* Pointer function arguments and return values are promoted to Pmode. */
7172 static enum machine_mode
7173 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7174 int *punsignedp, const_tree fntype,
7177 if (type != NULL_TREE && POINTER_TYPE_P (type))
7179 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7182 return default_promote_function_mode (type, mode, punsignedp, fntype,
7187 ix86_libcall_value (enum machine_mode mode)
7189 return ix86_function_value_1 (NULL, NULL, mode, mode);
7192 /* Return true iff type is returned in memory. */
7194 static bool ATTRIBUTE_UNUSED
7195 return_in_memory_32 (const_tree type, enum machine_mode mode)
7199 if (mode == BLKmode)
7202 size = int_size_in_bytes (type);
7204 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7207 if (VECTOR_MODE_P (mode) || mode == TImode)
7209 /* User-created vectors small enough to fit in EAX. */
7213 /* MMX/3dNow values are returned in MM0,
7214 except when it doesn't exits or the ABI prescribes otherwise. */
7216 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7218 /* SSE values are returned in XMM0, except when it doesn't exist. */
7222 /* AVX values are returned in YMM0, except when it doesn't exist. */
7233 /* OImode shouldn't be used directly. */
7234 gcc_assert (mode != OImode);
7239 static bool ATTRIBUTE_UNUSED
7240 return_in_memory_64 (const_tree type, enum machine_mode mode)
7242 int needed_intregs, needed_sseregs;
7243 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7246 static bool ATTRIBUTE_UNUSED
7247 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7249 HOST_WIDE_INT size = int_size_in_bytes (type);
7251 /* __m128 is returned in xmm0. */
7252 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7253 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7256 /* Otherwise, the size must be exactly in [1248]. */
7257 return size != 1 && size != 2 && size != 4 && size != 8;
7261 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7263 #ifdef SUBTARGET_RETURN_IN_MEMORY
7264 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7266 const enum machine_mode mode = type_natural_mode (type, NULL);
7270 if (ix86_function_type_abi (fntype) == MS_ABI)
7271 return return_in_memory_ms_64 (type, mode);
7273 return return_in_memory_64 (type, mode);
7276 return return_in_memory_32 (type, mode);
7280 /* When returning SSE vector types, we have a choice of either
7281 (1) being abi incompatible with a -march switch, or
7282 (2) generating an error.
7283 Given no good solution, I think the safest thing is one warning.
7284 The user won't be able to use -Werror, but....
7286 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7287 called in response to actually generating a caller or callee that
7288 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7289 via aggregate_value_p for general type probing from tree-ssa. */
7292 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7294 static bool warnedsse, warnedmmx;
7296 if (!TARGET_64BIT && type)
7298 /* Look at the return type of the function, not the function type. */
7299 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7301 if (!TARGET_SSE && !warnedsse)
7304 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7307 warning (0, "SSE vector return without SSE enabled "
7312 if (!TARGET_MMX && !warnedmmx)
7314 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7317 warning (0, "MMX vector return without MMX enabled "
7327 /* Create the va_list data type. */
7329 /* Returns the calling convention specific va_list date type.
7330 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7333 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7335 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7337 /* For i386 we use plain pointer to argument area. */
7338 if (!TARGET_64BIT || abi == MS_ABI)
7339 return build_pointer_type (char_type_node);
7341 record = lang_hooks.types.make_type (RECORD_TYPE);
7342 type_decl = build_decl (BUILTINS_LOCATION,
7343 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7345 f_gpr = build_decl (BUILTINS_LOCATION,
7346 FIELD_DECL, get_identifier ("gp_offset"),
7347 unsigned_type_node);
7348 f_fpr = build_decl (BUILTINS_LOCATION,
7349 FIELD_DECL, get_identifier ("fp_offset"),
7350 unsigned_type_node);
7351 f_ovf = build_decl (BUILTINS_LOCATION,
7352 FIELD_DECL, get_identifier ("overflow_arg_area"),
7354 f_sav = build_decl (BUILTINS_LOCATION,
7355 FIELD_DECL, get_identifier ("reg_save_area"),
7358 va_list_gpr_counter_field = f_gpr;
7359 va_list_fpr_counter_field = f_fpr;
7361 DECL_FIELD_CONTEXT (f_gpr) = record;
7362 DECL_FIELD_CONTEXT (f_fpr) = record;
7363 DECL_FIELD_CONTEXT (f_ovf) = record;
7364 DECL_FIELD_CONTEXT (f_sav) = record;
7366 TYPE_STUB_DECL (record) = type_decl;
7367 TYPE_NAME (record) = type_decl;
7368 TYPE_FIELDS (record) = f_gpr;
7369 DECL_CHAIN (f_gpr) = f_fpr;
7370 DECL_CHAIN (f_fpr) = f_ovf;
7371 DECL_CHAIN (f_ovf) = f_sav;
7373 layout_type (record);
7375 /* The correct type is an array type of one element. */
7376 return build_array_type (record, build_index_type (size_zero_node));
7379 /* Setup the builtin va_list data type and for 64-bit the additional
7380 calling convention specific va_list data types. */
7383 ix86_build_builtin_va_list (void)
7385 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7387 /* Initialize abi specific va_list builtin types. */
7391 if (ix86_abi == MS_ABI)
7393 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7394 if (TREE_CODE (t) != RECORD_TYPE)
7395 t = build_variant_type_copy (t);
7396 sysv_va_list_type_node = t;
7401 if (TREE_CODE (t) != RECORD_TYPE)
7402 t = build_variant_type_copy (t);
7403 sysv_va_list_type_node = t;
7405 if (ix86_abi != MS_ABI)
7407 t = ix86_build_builtin_va_list_abi (MS_ABI);
7408 if (TREE_CODE (t) != RECORD_TYPE)
7409 t = build_variant_type_copy (t);
7410 ms_va_list_type_node = t;
7415 if (TREE_CODE (t) != RECORD_TYPE)
7416 t = build_variant_type_copy (t);
7417 ms_va_list_type_node = t;
7424 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7427 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7433 /* GPR size of varargs save area. */
7434 if (cfun->va_list_gpr_size)
7435 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7437 ix86_varargs_gpr_size = 0;
7439 /* FPR size of varargs save area. We don't need it if we don't pass
7440 anything in SSE registers. */
7441 if (TARGET_SSE && cfun->va_list_fpr_size)
7442 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7444 ix86_varargs_fpr_size = 0;
7446 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7449 save_area = frame_pointer_rtx;
7450 set = get_varargs_alias_set ();
7452 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7453 if (max > X86_64_REGPARM_MAX)
7454 max = X86_64_REGPARM_MAX;
7456 for (i = cum->regno; i < max; i++)
7458 mem = gen_rtx_MEM (Pmode,
7459 plus_constant (save_area, i * UNITS_PER_WORD));
7460 MEM_NOTRAP_P (mem) = 1;
7461 set_mem_alias_set (mem, set);
7462 emit_move_insn (mem, gen_rtx_REG (Pmode,
7463 x86_64_int_parameter_registers[i]));
7466 if (ix86_varargs_fpr_size)
7468 enum machine_mode smode;
7471 /* Now emit code to save SSE registers. The AX parameter contains number
7472 of SSE parameter registers used to call this function, though all we
7473 actually check here is the zero/non-zero status. */
7475 label = gen_label_rtx ();
7476 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7477 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7480 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7481 we used movdqa (i.e. TImode) instead? Perhaps even better would
7482 be if we could determine the real mode of the data, via a hook
7483 into pass_stdarg. Ignore all that for now. */
7485 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7486 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7488 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7489 if (max > X86_64_SSE_REGPARM_MAX)
7490 max = X86_64_SSE_REGPARM_MAX;
7492 for (i = cum->sse_regno; i < max; ++i)
7494 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7495 mem = gen_rtx_MEM (smode, mem);
7496 MEM_NOTRAP_P (mem) = 1;
7497 set_mem_alias_set (mem, set);
7498 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7500 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7508 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7510 alias_set_type set = get_varargs_alias_set ();
7513 /* Reset to zero, as there might be a sysv vaarg used
7515 ix86_varargs_gpr_size = 0;
7516 ix86_varargs_fpr_size = 0;
7518 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7522 mem = gen_rtx_MEM (Pmode,
7523 plus_constant (virtual_incoming_args_rtx,
7524 i * UNITS_PER_WORD));
7525 MEM_NOTRAP_P (mem) = 1;
7526 set_mem_alias_set (mem, set);
7528 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7529 emit_move_insn (mem, reg);
7534 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7535 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7538 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7539 CUMULATIVE_ARGS next_cum;
7542 /* This argument doesn't appear to be used anymore. Which is good,
7543 because the old code here didn't suppress rtl generation. */
7544 gcc_assert (!no_rtl);
7549 fntype = TREE_TYPE (current_function_decl);
7551 /* For varargs, we do not want to skip the dummy va_dcl argument.
7552 For stdargs, we do want to skip the last named argument. */
7554 if (stdarg_p (fntype))
7555 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7558 if (cum->call_abi == MS_ABI)
7559 setup_incoming_varargs_ms_64 (&next_cum);
7561 setup_incoming_varargs_64 (&next_cum);
7564 /* Checks if TYPE is of kind va_list char *. */
7567 is_va_list_char_pointer (tree type)
7571 /* For 32-bit it is always true. */
7574 canonic = ix86_canonical_va_list_type (type);
7575 return (canonic == ms_va_list_type_node
7576 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7579 /* Implement va_start. */
7582 ix86_va_start (tree valist, rtx nextarg)
7584 HOST_WIDE_INT words, n_gpr, n_fpr;
7585 tree f_gpr, f_fpr, f_ovf, f_sav;
7586 tree gpr, fpr, ovf, sav, t;
7590 if (flag_split_stack
7591 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7593 unsigned int scratch_regno;
7595 /* When we are splitting the stack, we can't refer to the stack
7596 arguments using internal_arg_pointer, because they may be on
7597 the old stack. The split stack prologue will arrange to
7598 leave a pointer to the old stack arguments in a scratch
7599 register, which we here copy to a pseudo-register. The split
7600 stack prologue can't set the pseudo-register directly because
7601 it (the prologue) runs before any registers have been saved. */
7603 scratch_regno = split_stack_prologue_scratch_regno ();
7604 if (scratch_regno != INVALID_REGNUM)
7608 reg = gen_reg_rtx (Pmode);
7609 cfun->machine->split_stack_varargs_pointer = reg;
7612 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7616 push_topmost_sequence ();
7617 emit_insn_after (seq, entry_of_function ());
7618 pop_topmost_sequence ();
7622 /* Only 64bit target needs something special. */
7623 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7625 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7626 std_expand_builtin_va_start (valist, nextarg);
7631 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7632 next = expand_binop (ptr_mode, add_optab,
7633 cfun->machine->split_stack_varargs_pointer,
7634 crtl->args.arg_offset_rtx,
7635 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7636 convert_move (va_r, next, 0);
7641 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7642 f_fpr = DECL_CHAIN (f_gpr);
7643 f_ovf = DECL_CHAIN (f_fpr);
7644 f_sav = DECL_CHAIN (f_ovf);
7646 valist = build_simple_mem_ref (valist);
7647 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7648 /* The following should be folded into the MEM_REF offset. */
7649 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7651 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7653 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7655 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7658 /* Count number of gp and fp argument registers used. */
7659 words = crtl->args.info.words;
7660 n_gpr = crtl->args.info.regno;
7661 n_fpr = crtl->args.info.sse_regno;
7663 if (cfun->va_list_gpr_size)
7665 type = TREE_TYPE (gpr);
7666 t = build2 (MODIFY_EXPR, type,
7667 gpr, build_int_cst (type, n_gpr * 8));
7668 TREE_SIDE_EFFECTS (t) = 1;
7669 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7672 if (TARGET_SSE && cfun->va_list_fpr_size)
7674 type = TREE_TYPE (fpr);
7675 t = build2 (MODIFY_EXPR, type, fpr,
7676 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7677 TREE_SIDE_EFFECTS (t) = 1;
7678 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7681 /* Find the overflow area. */
7682 type = TREE_TYPE (ovf);
7683 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7684 ovf_rtx = crtl->args.internal_arg_pointer;
7686 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7687 t = make_tree (type, ovf_rtx);
7689 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
7690 t = build2 (MODIFY_EXPR, type, ovf, t);
7691 TREE_SIDE_EFFECTS (t) = 1;
7692 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7694 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7696 /* Find the register save area.
7697 Prologue of the function save it right above stack frame. */
7698 type = TREE_TYPE (sav);
7699 t = make_tree (type, frame_pointer_rtx);
7700 if (!ix86_varargs_gpr_size)
7701 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
7702 t = build2 (MODIFY_EXPR, type, sav, t);
7703 TREE_SIDE_EFFECTS (t) = 1;
7704 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7708 /* Implement va_arg. */
7711 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7714 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7715 tree f_gpr, f_fpr, f_ovf, f_sav;
7716 tree gpr, fpr, ovf, sav, t;
7718 tree lab_false, lab_over = NULL_TREE;
7723 enum machine_mode nat_mode;
7724 unsigned int arg_boundary;
7726 /* Only 64bit target needs something special. */
7727 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7728 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7730 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7731 f_fpr = DECL_CHAIN (f_gpr);
7732 f_ovf = DECL_CHAIN (f_fpr);
7733 f_sav = DECL_CHAIN (f_ovf);
7735 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7736 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7737 valist = build_va_arg_indirect_ref (valist);
7738 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7739 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7740 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7742 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7744 type = build_pointer_type (type);
7745 size = int_size_in_bytes (type);
7746 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7748 nat_mode = type_natural_mode (type, NULL);
7757 /* Unnamed 256bit vector mode parameters are passed on stack. */
7758 if (!TARGET_64BIT_MS_ABI)
7765 container = construct_container (nat_mode, TYPE_MODE (type),
7766 type, 0, X86_64_REGPARM_MAX,
7767 X86_64_SSE_REGPARM_MAX, intreg,
7772 /* Pull the value out of the saved registers. */
7774 addr = create_tmp_var (ptr_type_node, "addr");
7778 int needed_intregs, needed_sseregs;
7780 tree int_addr, sse_addr;
7782 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7783 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7785 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7787 need_temp = (!REG_P (container)
7788 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7789 || TYPE_ALIGN (type) > 128));
7791 /* In case we are passing structure, verify that it is consecutive block
7792 on the register save area. If not we need to do moves. */
7793 if (!need_temp && !REG_P (container))
7795 /* Verify that all registers are strictly consecutive */
7796 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7800 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7802 rtx slot = XVECEXP (container, 0, i);
7803 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7804 || INTVAL (XEXP (slot, 1)) != i * 16)
7812 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7814 rtx slot = XVECEXP (container, 0, i);
7815 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7816 || INTVAL (XEXP (slot, 1)) != i * 8)
7828 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7829 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7832 /* First ensure that we fit completely in registers. */
7835 t = build_int_cst (TREE_TYPE (gpr),
7836 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7837 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7838 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7839 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7840 gimplify_and_add (t, pre_p);
7844 t = build_int_cst (TREE_TYPE (fpr),
7845 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7846 + X86_64_REGPARM_MAX * 8);
7847 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7848 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7849 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7850 gimplify_and_add (t, pre_p);
7853 /* Compute index to start of area used for integer regs. */
7856 /* int_addr = gpr + sav; */
7857 t = fold_build_pointer_plus (sav, gpr);
7858 gimplify_assign (int_addr, t, pre_p);
7862 /* sse_addr = fpr + sav; */
7863 t = fold_build_pointer_plus (sav, fpr);
7864 gimplify_assign (sse_addr, t, pre_p);
7868 int i, prev_size = 0;
7869 tree temp = create_tmp_var (type, "va_arg_tmp");
7872 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7873 gimplify_assign (addr, t, pre_p);
7875 for (i = 0; i < XVECLEN (container, 0); i++)
7877 rtx slot = XVECEXP (container, 0, i);
7878 rtx reg = XEXP (slot, 0);
7879 enum machine_mode mode = GET_MODE (reg);
7885 tree dest_addr, dest;
7886 int cur_size = GET_MODE_SIZE (mode);
7888 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7889 prev_size = INTVAL (XEXP (slot, 1));
7890 if (prev_size + cur_size > size)
7892 cur_size = size - prev_size;
7893 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7894 if (mode == BLKmode)
7897 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7898 if (mode == GET_MODE (reg))
7899 addr_type = build_pointer_type (piece_type);
7901 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7903 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7906 if (SSE_REGNO_P (REGNO (reg)))
7908 src_addr = sse_addr;
7909 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7913 src_addr = int_addr;
7914 src_offset = REGNO (reg) * 8;
7916 src_addr = fold_convert (addr_type, src_addr);
7917 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
7919 dest_addr = fold_convert (daddr_type, addr);
7920 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
7921 if (cur_size == GET_MODE_SIZE (mode))
7923 src = build_va_arg_indirect_ref (src_addr);
7924 dest = build_va_arg_indirect_ref (dest_addr);
7926 gimplify_assign (dest, src, pre_p);
7931 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7932 3, dest_addr, src_addr,
7933 size_int (cur_size));
7934 gimplify_and_add (copy, pre_p);
7936 prev_size += cur_size;
7942 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7943 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7944 gimplify_assign (gpr, t, pre_p);
7949 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7950 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7951 gimplify_assign (fpr, t, pre_p);
7954 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7956 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7959 /* ... otherwise out of the overflow area. */
7961 /* When we align parameter on stack for caller, if the parameter
7962 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7963 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7964 here with caller. */
7965 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
7966 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7967 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7969 /* Care for on-stack alignment if needed. */
7970 if (arg_boundary <= 64 || size == 0)
7974 HOST_WIDE_INT align = arg_boundary / 8;
7975 t = fold_build_pointer_plus_hwi (ovf, align - 1);
7976 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7977 build_int_cst (TREE_TYPE (t), -align));
7980 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7981 gimplify_assign (addr, t, pre_p);
7983 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
7984 gimplify_assign (unshare_expr (ovf), t, pre_p);
7987 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7989 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7990 addr = fold_convert (ptrtype, addr);
7993 addr = build_va_arg_indirect_ref (addr);
7994 return build_va_arg_indirect_ref (addr);
7997 /* Return true if OPNUM's MEM should be matched
7998 in movabs* patterns. */
8001 ix86_check_movabs (rtx insn, int opnum)
8005 set = PATTERN (insn);
8006 if (GET_CODE (set) == PARALLEL)
8007 set = XVECEXP (set, 0, 0);
8008 gcc_assert (GET_CODE (set) == SET);
8009 mem = XEXP (set, opnum);
8010 while (GET_CODE (mem) == SUBREG)
8011 mem = SUBREG_REG (mem);
8012 gcc_assert (MEM_P (mem));
8013 return volatile_ok || !MEM_VOLATILE_P (mem);
8016 /* Initialize the table of extra 80387 mathematical constants. */
8019 init_ext_80387_constants (void)
8021 static const char * cst[5] =
8023 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8024 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8025 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8026 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8027 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8031 for (i = 0; i < 5; i++)
8033 real_from_string (&ext_80387_constants_table[i], cst[i]);
8034 /* Ensure each constant is rounded to XFmode precision. */
8035 real_convert (&ext_80387_constants_table[i],
8036 XFmode, &ext_80387_constants_table[i]);
8039 ext_80387_constants_init = 1;
8042 /* Return non-zero if the constant is something that
8043 can be loaded with a special instruction. */
8046 standard_80387_constant_p (rtx x)
8048 enum machine_mode mode = GET_MODE (x);
8052 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8055 if (x == CONST0_RTX (mode))
8057 if (x == CONST1_RTX (mode))
8060 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8062 /* For XFmode constants, try to find a special 80387 instruction when
8063 optimizing for size or on those CPUs that benefit from them. */
8065 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8069 if (! ext_80387_constants_init)
8070 init_ext_80387_constants ();
8072 for (i = 0; i < 5; i++)
8073 if (real_identical (&r, &ext_80387_constants_table[i]))
8077 /* Load of the constant -0.0 or -1.0 will be split as
8078 fldz;fchs or fld1;fchs sequence. */
8079 if (real_isnegzero (&r))
8081 if (real_identical (&r, &dconstm1))
8087 /* Return the opcode of the special instruction to be used to load
8091 standard_80387_constant_opcode (rtx x)
8093 switch (standard_80387_constant_p (x))
8117 /* Return the CONST_DOUBLE representing the 80387 constant that is
8118 loaded by the specified special instruction. The argument IDX
8119 matches the return value from standard_80387_constant_p. */
8122 standard_80387_constant_rtx (int idx)
8126 if (! ext_80387_constants_init)
8127 init_ext_80387_constants ();
8143 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8147 /* Return 1 if X is all 0s and 2 if x is all 1s
8148 in supported SSE vector mode. */
8151 standard_sse_constant_p (rtx x)
8153 enum machine_mode mode = GET_MODE (x);
8155 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8157 if (vector_all_ones_operand (x, mode))
8173 /* Return the opcode of the special instruction to be used to load
8177 standard_sse_constant_opcode (rtx insn, rtx x)
8179 switch (standard_sse_constant_p (x))
8182 switch (get_attr_mode (insn))
8185 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8186 return "%vpxor\t%0, %d0";
8188 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8189 return "%vxorpd\t%0, %d0";
8191 return "%vxorps\t%0, %d0";
8194 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8195 return "vpxor\t%x0, %x0, %x0";
8197 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8198 return "vxorpd\t%x0, %x0, %x0";
8200 return "vxorps\t%x0, %x0, %x0";
8207 return "%vpcmpeqd\t%0, %d0";
8214 /* Returns true if OP contains a symbol reference */
8217 symbolic_reference_mentioned_p (rtx op)
8222 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8225 fmt = GET_RTX_FORMAT (GET_CODE (op));
8226 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8232 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8233 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8237 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8244 /* Return true if it is appropriate to emit `ret' instructions in the
8245 body of a function. Do this only if the epilogue is simple, needing a
8246 couple of insns. Prior to reloading, we can't tell how many registers
8247 must be saved, so return false then. Return false if there is no frame
8248 marker to de-allocate. */
8251 ix86_can_use_return_insn_p (void)
8253 struct ix86_frame frame;
8255 if (! reload_completed || frame_pointer_needed)
8258 /* Don't allow more than 32k pop, since that's all we can do
8259 with one instruction. */
8260 if (crtl->args.pops_args && crtl->args.size >= 32768)
8263 ix86_compute_frame_layout (&frame);
8264 return (frame.stack_pointer_offset == UNITS_PER_WORD
8265 && (frame.nregs + frame.nsseregs) == 0);
8268 /* Value should be nonzero if functions must have frame pointers.
8269 Zero means the frame pointer need not be set up (and parms may
8270 be accessed via the stack pointer) in functions that seem suitable. */
8273 ix86_frame_pointer_required (void)
8275 /* If we accessed previous frames, then the generated code expects
8276 to be able to access the saved ebp value in our frame. */
8277 if (cfun->machine->accesses_prev_frame)
8280 /* Several x86 os'es need a frame pointer for other reasons,
8281 usually pertaining to setjmp. */
8282 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8285 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8286 turns off the frame pointer by default. Turn it back on now if
8287 we've not got a leaf function. */
8288 if (TARGET_OMIT_LEAF_FRAME_POINTER
8289 && (!current_function_is_leaf
8290 || ix86_current_function_calls_tls_descriptor))
8293 if (crtl->profile && !flag_fentry)
8299 /* Record that the current function accesses previous call frames. */
8302 ix86_setup_frame_addresses (void)
8304 cfun->machine->accesses_prev_frame = 1;
8307 #ifndef USE_HIDDEN_LINKONCE
8308 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8309 # define USE_HIDDEN_LINKONCE 1
8311 # define USE_HIDDEN_LINKONCE 0
8315 static int pic_labels_used;
8317 /* Fills in the label name that should be used for a pc thunk for
8318 the given register. */
8321 get_pc_thunk_name (char name[32], unsigned int regno)
8323 gcc_assert (!TARGET_64BIT);
8325 if (USE_HIDDEN_LINKONCE)
8326 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8328 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8332 /* This function generates code for -fpic that loads %ebx with
8333 the return address of the caller and then returns. */
8336 ix86_code_end (void)
8341 for (regno = AX_REG; regno <= SP_REG; regno++)
8346 if (!(pic_labels_used & (1 << regno)))
8349 get_pc_thunk_name (name, regno);
8351 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8352 get_identifier (name),
8353 build_function_type_list (void_type_node, NULL_TREE));
8354 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8355 NULL_TREE, void_type_node);
8356 TREE_PUBLIC (decl) = 1;
8357 TREE_STATIC (decl) = 1;
8362 switch_to_section (darwin_sections[text_coal_section]);
8363 fputs ("\t.weak_definition\t", asm_out_file);
8364 assemble_name (asm_out_file, name);
8365 fputs ("\n\t.private_extern\t", asm_out_file);
8366 assemble_name (asm_out_file, name);
8367 putc ('\n', asm_out_file);
8368 ASM_OUTPUT_LABEL (asm_out_file, name);
8369 DECL_WEAK (decl) = 1;
8373 if (USE_HIDDEN_LINKONCE)
8375 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8377 targetm.asm_out.unique_section (decl, 0);
8378 switch_to_section (get_named_section (decl, NULL, 0));
8380 targetm.asm_out.globalize_label (asm_out_file, name);
8381 fputs ("\t.hidden\t", asm_out_file);
8382 assemble_name (asm_out_file, name);
8383 putc ('\n', asm_out_file);
8384 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8388 switch_to_section (text_section);
8389 ASM_OUTPUT_LABEL (asm_out_file, name);
8392 DECL_INITIAL (decl) = make_node (BLOCK);
8393 current_function_decl = decl;
8394 init_function_start (decl);
8395 first_function_block_is_cold = false;
8396 /* Make sure unwind info is emitted for the thunk if needed. */
8397 final_start_function (emit_barrier (), asm_out_file, 1);
8399 /* Pad stack IP move with 4 instructions (two NOPs count
8400 as one instruction). */
8401 if (TARGET_PAD_SHORT_FUNCTION)
8406 fputs ("\tnop\n", asm_out_file);
8409 xops[0] = gen_rtx_REG (Pmode, regno);
8410 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8411 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8412 fputs ("\tret\n", asm_out_file);
8413 final_end_function ();
8414 init_insn_lengths ();
8415 free_after_compilation (cfun);
8417 current_function_decl = NULL;
8420 if (flag_split_stack)
8421 file_end_indicate_split_stack ();
8424 /* Emit code for the SET_GOT patterns. */
8427 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8433 if (TARGET_VXWORKS_RTP && flag_pic)
8435 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8436 xops[2] = gen_rtx_MEM (Pmode,
8437 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8438 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8440 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8441 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8442 an unadorned address. */
8443 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8444 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8445 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8449 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8453 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8455 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8458 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8459 is what will be referenced by the Mach-O PIC subsystem. */
8461 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8464 targetm.asm_out.internal_label (asm_out_file, "L",
8465 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8470 get_pc_thunk_name (name, REGNO (dest));
8471 pic_labels_used |= 1 << REGNO (dest);
8473 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8474 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8475 output_asm_insn ("call\t%X2", xops);
8476 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8477 is what will be referenced by the Mach-O PIC subsystem. */
8480 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8482 targetm.asm_out.internal_label (asm_out_file, "L",
8483 CODE_LABEL_NUMBER (label));
8488 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8493 /* Generate an "push" pattern for input ARG. */
8498 struct machine_function *m = cfun->machine;
8500 if (m->fs.cfa_reg == stack_pointer_rtx)
8501 m->fs.cfa_offset += UNITS_PER_WORD;
8502 m->fs.sp_offset += UNITS_PER_WORD;
8504 return gen_rtx_SET (VOIDmode,
8506 gen_rtx_PRE_DEC (Pmode,
8507 stack_pointer_rtx)),
8511 /* Generate an "pop" pattern for input ARG. */
8516 return gen_rtx_SET (VOIDmode,
8519 gen_rtx_POST_INC (Pmode,
8520 stack_pointer_rtx)));
8523 /* Return >= 0 if there is an unused call-clobbered register available
8524 for the entire function. */
8527 ix86_select_alt_pic_regnum (void)
8529 if (current_function_is_leaf
8531 && !ix86_current_function_calls_tls_descriptor)
8534 /* Can't use the same register for both PIC and DRAP. */
8536 drap = REGNO (crtl->drap_reg);
8539 for (i = 2; i >= 0; --i)
8540 if (i != drap && !df_regs_ever_live_p (i))
8544 return INVALID_REGNUM;
8547 /* Return TRUE if we need to save REGNO. */
8550 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8552 if (pic_offset_table_rtx
8553 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8554 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8556 || crtl->calls_eh_return
8557 || crtl->uses_const_pool))
8558 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8560 if (crtl->calls_eh_return && maybe_eh_return)
8565 unsigned test = EH_RETURN_DATA_REGNO (i);
8566 if (test == INVALID_REGNUM)
8573 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8576 return (df_regs_ever_live_p (regno)
8577 && !call_used_regs[regno]
8578 && !fixed_regs[regno]
8579 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8582 /* Return number of saved general prupose registers. */
8585 ix86_nsaved_regs (void)
8590 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8591 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8596 /* Return number of saved SSE registrers. */
8599 ix86_nsaved_sseregs (void)
8604 if (!TARGET_64BIT_MS_ABI)
8606 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8607 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8612 /* Given FROM and TO register numbers, say whether this elimination is
8613 allowed. If stack alignment is needed, we can only replace argument
8614 pointer with hard frame pointer, or replace frame pointer with stack
8615 pointer. Otherwise, frame pointer elimination is automatically
8616 handled and all other eliminations are valid. */
8619 ix86_can_eliminate (const int from, const int to)
8621 if (stack_realign_fp)
8622 return ((from == ARG_POINTER_REGNUM
8623 && to == HARD_FRAME_POINTER_REGNUM)
8624 || (from == FRAME_POINTER_REGNUM
8625 && to == STACK_POINTER_REGNUM));
8627 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8630 /* Return the offset between two registers, one to be eliminated, and the other
8631 its replacement, at the start of a routine. */
8634 ix86_initial_elimination_offset (int from, int to)
8636 struct ix86_frame frame;
8637 ix86_compute_frame_layout (&frame);
8639 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8640 return frame.hard_frame_pointer_offset;
8641 else if (from == FRAME_POINTER_REGNUM
8642 && to == HARD_FRAME_POINTER_REGNUM)
8643 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8646 gcc_assert (to == STACK_POINTER_REGNUM);
8648 if (from == ARG_POINTER_REGNUM)
8649 return frame.stack_pointer_offset;
8651 gcc_assert (from == FRAME_POINTER_REGNUM);
8652 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8656 /* In a dynamically-aligned function, we can't know the offset from
8657 stack pointer to frame pointer, so we must ensure that setjmp
8658 eliminates fp against the hard fp (%ebp) rather than trying to
8659 index from %esp up to the top of the frame across a gap that is
8660 of unknown (at compile-time) size. */
8662 ix86_builtin_setjmp_frame_value (void)
8664 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8667 /* When using -fsplit-stack, the allocation routines set a field in
8668 the TCB to the bottom of the stack plus this much space, measured
8671 #define SPLIT_STACK_AVAILABLE 256
8673 /* Fill structure ix86_frame about frame of currently computed function. */
8676 ix86_compute_frame_layout (struct ix86_frame *frame)
8678 unsigned int stack_alignment_needed;
8679 HOST_WIDE_INT offset;
8680 unsigned int preferred_alignment;
8681 HOST_WIDE_INT size = get_frame_size ();
8682 HOST_WIDE_INT to_allocate;
8684 frame->nregs = ix86_nsaved_regs ();
8685 frame->nsseregs = ix86_nsaved_sseregs ();
8687 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8688 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8690 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8691 function prologues and leaf. */
8692 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8693 && (!current_function_is_leaf || cfun->calls_alloca != 0
8694 || ix86_current_function_calls_tls_descriptor))
8696 preferred_alignment = 16;
8697 stack_alignment_needed = 16;
8698 crtl->preferred_stack_boundary = 128;
8699 crtl->stack_alignment_needed = 128;
8702 gcc_assert (!size || stack_alignment_needed);
8703 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8704 gcc_assert (preferred_alignment <= stack_alignment_needed);
8706 /* For SEH we have to limit the amount of code movement into the prologue.
8707 At present we do this via a BLOCKAGE, at which point there's very little
8708 scheduling that can be done, which means that there's very little point
8709 in doing anything except PUSHs. */
8711 cfun->machine->use_fast_prologue_epilogue = false;
8713 /* During reload iteration the amount of registers saved can change.
8714 Recompute the value as needed. Do not recompute when amount of registers
8715 didn't change as reload does multiple calls to the function and does not
8716 expect the decision to change within single iteration. */
8717 else if (!optimize_function_for_size_p (cfun)
8718 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8720 int count = frame->nregs;
8721 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8723 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8725 /* The fast prologue uses move instead of push to save registers. This
8726 is significantly longer, but also executes faster as modern hardware
8727 can execute the moves in parallel, but can't do that for push/pop.
8729 Be careful about choosing what prologue to emit: When function takes
8730 many instructions to execute we may use slow version as well as in
8731 case function is known to be outside hot spot (this is known with
8732 feedback only). Weight the size of function by number of registers
8733 to save as it is cheap to use one or two push instructions but very
8734 slow to use many of them. */
8736 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8737 if (node->frequency < NODE_FREQUENCY_NORMAL
8738 || (flag_branch_probabilities
8739 && node->frequency < NODE_FREQUENCY_HOT))
8740 cfun->machine->use_fast_prologue_epilogue = false;
8742 cfun->machine->use_fast_prologue_epilogue
8743 = !expensive_function_p (count);
8746 frame->save_regs_using_mov
8747 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
8748 /* If static stack checking is enabled and done with probes,
8749 the registers need to be saved before allocating the frame. */
8750 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
8752 /* Skip return address. */
8753 offset = UNITS_PER_WORD;
8755 /* Skip pushed static chain. */
8756 if (ix86_static_chain_on_stack)
8757 offset += UNITS_PER_WORD;
8759 /* Skip saved base pointer. */
8760 if (frame_pointer_needed)
8761 offset += UNITS_PER_WORD;
8762 frame->hfp_save_offset = offset;
8764 /* The traditional frame pointer location is at the top of the frame. */
8765 frame->hard_frame_pointer_offset = offset;
8767 /* Register save area */
8768 offset += frame->nregs * UNITS_PER_WORD;
8769 frame->reg_save_offset = offset;
8771 /* Align and set SSE register save area. */
8772 if (frame->nsseregs)
8774 /* The only ABI that has saved SSE registers (Win64) also has a
8775 16-byte aligned default stack, and thus we don't need to be
8776 within the re-aligned local stack frame to save them. */
8777 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8778 offset = (offset + 16 - 1) & -16;
8779 offset += frame->nsseregs * 16;
8781 frame->sse_reg_save_offset = offset;
8783 /* The re-aligned stack starts here. Values before this point are not
8784 directly comparable with values below this point. In order to make
8785 sure that no value happens to be the same before and after, force
8786 the alignment computation below to add a non-zero value. */
8787 if (stack_realign_fp)
8788 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8791 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8792 offset += frame->va_arg_size;
8794 /* Align start of frame for local function. */
8795 if (stack_realign_fp
8796 || offset != frame->sse_reg_save_offset
8798 || !current_function_is_leaf
8799 || cfun->calls_alloca
8800 || ix86_current_function_calls_tls_descriptor)
8801 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8803 /* Frame pointer points here. */
8804 frame->frame_pointer_offset = offset;
8808 /* Add outgoing arguments area. Can be skipped if we eliminated
8809 all the function calls as dead code.
8810 Skipping is however impossible when function calls alloca. Alloca
8811 expander assumes that last crtl->outgoing_args_size
8812 of stack frame are unused. */
8813 if (ACCUMULATE_OUTGOING_ARGS
8814 && (!current_function_is_leaf || cfun->calls_alloca
8815 || ix86_current_function_calls_tls_descriptor))
8817 offset += crtl->outgoing_args_size;
8818 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8821 frame->outgoing_arguments_size = 0;
8823 /* Align stack boundary. Only needed if we're calling another function
8825 if (!current_function_is_leaf || cfun->calls_alloca
8826 || ix86_current_function_calls_tls_descriptor)
8827 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8829 /* We've reached end of stack frame. */
8830 frame->stack_pointer_offset = offset;
8832 /* Size prologue needs to allocate. */
8833 to_allocate = offset - frame->sse_reg_save_offset;
8835 if ((!to_allocate && frame->nregs <= 1)
8836 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8837 frame->save_regs_using_mov = false;
8839 if (ix86_using_red_zone ()
8840 && current_function_sp_is_unchanging
8841 && current_function_is_leaf
8842 && !ix86_current_function_calls_tls_descriptor)
8844 frame->red_zone_size = to_allocate;
8845 if (frame->save_regs_using_mov)
8846 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8847 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8848 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8851 frame->red_zone_size = 0;
8852 frame->stack_pointer_offset -= frame->red_zone_size;
8854 /* The SEH frame pointer location is near the bottom of the frame.
8855 This is enforced by the fact that the difference between the
8856 stack pointer and the frame pointer is limited to 240 bytes in
8857 the unwind data structure. */
8862 /* If we can leave the frame pointer where it is, do so. */
8863 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
8864 if (diff > 240 || (diff & 15) != 0)
8866 /* Ideally we'd determine what portion of the local stack frame
8867 (within the constraint of the lowest 240) is most heavily used.
8868 But without that complication, simply bias the frame pointer
8869 by 128 bytes so as to maximize the amount of the local stack
8870 frame that is addressable with 8-bit offsets. */
8871 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
8876 /* This is semi-inlined memory_address_length, but simplified
8877 since we know that we're always dealing with reg+offset, and
8878 to avoid having to create and discard all that rtl. */
8881 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8887 /* EBP and R13 cannot be encoded without an offset. */
8888 len = (regno == BP_REG || regno == R13_REG);
8890 else if (IN_RANGE (offset, -128, 127))
8893 /* ESP and R12 must be encoded with a SIB byte. */
8894 if (regno == SP_REG || regno == R12_REG)
8900 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8901 The valid base registers are taken from CFUN->MACHINE->FS. */
8904 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8906 const struct machine_function *m = cfun->machine;
8907 rtx base_reg = NULL;
8908 HOST_WIDE_INT base_offset = 0;
8910 if (m->use_fast_prologue_epilogue)
8912 /* Choose the base register most likely to allow the most scheduling
8913 opportunities. Generally FP is valid througout the function,
8914 while DRAP must be reloaded within the epilogue. But choose either
8915 over the SP due to increased encoding size. */
8919 base_reg = hard_frame_pointer_rtx;
8920 base_offset = m->fs.fp_offset - cfa_offset;
8922 else if (m->fs.drap_valid)
8924 base_reg = crtl->drap_reg;
8925 base_offset = 0 - cfa_offset;
8927 else if (m->fs.sp_valid)
8929 base_reg = stack_pointer_rtx;
8930 base_offset = m->fs.sp_offset - cfa_offset;
8935 HOST_WIDE_INT toffset;
8938 /* Choose the base register with the smallest address encoding.
8939 With a tie, choose FP > DRAP > SP. */
8942 base_reg = stack_pointer_rtx;
8943 base_offset = m->fs.sp_offset - cfa_offset;
8944 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8946 if (m->fs.drap_valid)
8948 toffset = 0 - cfa_offset;
8949 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8952 base_reg = crtl->drap_reg;
8953 base_offset = toffset;
8959 toffset = m->fs.fp_offset - cfa_offset;
8960 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8963 base_reg = hard_frame_pointer_rtx;
8964 base_offset = toffset;
8969 gcc_assert (base_reg != NULL);
8971 return plus_constant (base_reg, base_offset);
8974 /* Emit code to save registers in the prologue. */
8977 ix86_emit_save_regs (void)
8982 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8983 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8985 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8986 RTX_FRAME_RELATED_P (insn) = 1;
8990 /* Emit a single register save at CFA - CFA_OFFSET. */
8993 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8994 HOST_WIDE_INT cfa_offset)
8996 struct machine_function *m = cfun->machine;
8997 rtx reg = gen_rtx_REG (mode, regno);
8998 rtx mem, addr, base, insn;
9000 addr = choose_baseaddr (cfa_offset);
9001 mem = gen_frame_mem (mode, addr);
9003 /* For SSE saves, we need to indicate the 128-bit alignment. */
9004 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9006 insn = emit_move_insn (mem, reg);
9007 RTX_FRAME_RELATED_P (insn) = 1;
9010 if (GET_CODE (base) == PLUS)
9011 base = XEXP (base, 0);
9012 gcc_checking_assert (REG_P (base));
9014 /* When saving registers into a re-aligned local stack frame, avoid
9015 any tricky guessing by dwarf2out. */
9016 if (m->fs.realigned)
9018 gcc_checking_assert (stack_realign_drap);
9020 if (regno == REGNO (crtl->drap_reg))
9022 /* A bit of a hack. We force the DRAP register to be saved in
9023 the re-aligned stack frame, which provides us with a copy
9024 of the CFA that will last past the prologue. Install it. */
9025 gcc_checking_assert (cfun->machine->fs.fp_valid);
9026 addr = plus_constant (hard_frame_pointer_rtx,
9027 cfun->machine->fs.fp_offset - cfa_offset);
9028 mem = gen_rtx_MEM (mode, addr);
9029 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9033 /* The frame pointer is a stable reference within the
9034 aligned frame. Use it. */
9035 gcc_checking_assert (cfun->machine->fs.fp_valid);
9036 addr = plus_constant (hard_frame_pointer_rtx,
9037 cfun->machine->fs.fp_offset - cfa_offset);
9038 mem = gen_rtx_MEM (mode, addr);
9039 add_reg_note (insn, REG_CFA_EXPRESSION,
9040 gen_rtx_SET (VOIDmode, mem, reg));
9044 /* The memory may not be relative to the current CFA register,
9045 which means that we may need to generate a new pattern for
9046 use by the unwind info. */
9047 else if (base != m->fs.cfa_reg)
9049 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9050 mem = gen_rtx_MEM (mode, addr);
9051 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9055 /* Emit code to save registers using MOV insns.
9056 First register is stored at CFA - CFA_OFFSET. */
9058 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9062 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9063 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9065 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9066 cfa_offset -= UNITS_PER_WORD;
9070 /* Emit code to save SSE registers using MOV insns.
9071 First register is stored at CFA - CFA_OFFSET. */
9073 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9077 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9078 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9080 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9085 static GTY(()) rtx queued_cfa_restores;
9087 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9088 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9089 Don't add the note if the previously saved value will be left untouched
9090 within stack red-zone till return, as unwinders can find the same value
9091 in the register and on the stack. */
9094 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9096 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9101 add_reg_note (insn, REG_CFA_RESTORE, reg);
9102 RTX_FRAME_RELATED_P (insn) = 1;
9106 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9109 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9112 ix86_add_queued_cfa_restore_notes (rtx insn)
9115 if (!queued_cfa_restores)
9117 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9119 XEXP (last, 1) = REG_NOTES (insn);
9120 REG_NOTES (insn) = queued_cfa_restores;
9121 queued_cfa_restores = NULL_RTX;
9122 RTX_FRAME_RELATED_P (insn) = 1;
9125 /* Expand prologue or epilogue stack adjustment.
9126 The pattern exist to put a dependency on all ebp-based memory accesses.
9127 STYLE should be negative if instructions should be marked as frame related,
9128 zero if %r11 register is live and cannot be freely used and positive
9132 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9133 int style, bool set_cfa)
9135 struct machine_function *m = cfun->machine;
9137 bool add_frame_related_expr = false;
9140 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9141 else if (x86_64_immediate_operand (offset, DImode))
9142 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9146 /* r11 is used by indirect sibcall return as well, set before the
9147 epilogue and used after the epilogue. */
9149 tmp = gen_rtx_REG (DImode, R11_REG);
9152 gcc_assert (src != hard_frame_pointer_rtx
9153 && dest != hard_frame_pointer_rtx);
9154 tmp = hard_frame_pointer_rtx;
9156 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9158 add_frame_related_expr = true;
9160 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9163 insn = emit_insn (insn);
9165 ix86_add_queued_cfa_restore_notes (insn);
9171 gcc_assert (m->fs.cfa_reg == src);
9172 m->fs.cfa_offset += INTVAL (offset);
9173 m->fs.cfa_reg = dest;
9175 r = gen_rtx_PLUS (Pmode, src, offset);
9176 r = gen_rtx_SET (VOIDmode, dest, r);
9177 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9178 RTX_FRAME_RELATED_P (insn) = 1;
9182 RTX_FRAME_RELATED_P (insn) = 1;
9183 if (add_frame_related_expr)
9185 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9186 r = gen_rtx_SET (VOIDmode, dest, r);
9187 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9191 if (dest == stack_pointer_rtx)
9193 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9194 bool valid = m->fs.sp_valid;
9196 if (src == hard_frame_pointer_rtx)
9198 valid = m->fs.fp_valid;
9199 ooffset = m->fs.fp_offset;
9201 else if (src == crtl->drap_reg)
9203 valid = m->fs.drap_valid;
9208 /* Else there are two possibilities: SP itself, which we set
9209 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9210 taken care of this by hand along the eh_return path. */
9211 gcc_checking_assert (src == stack_pointer_rtx
9212 || offset == const0_rtx);
9215 m->fs.sp_offset = ooffset - INTVAL (offset);
9216 m->fs.sp_valid = valid;
9220 /* Find an available register to be used as dynamic realign argument
9221 pointer regsiter. Such a register will be written in prologue and
9222 used in begin of body, so it must not be
9223 1. parameter passing register.
9225 We reuse static-chain register if it is available. Otherwise, we
9226 use DI for i386 and R13 for x86-64. We chose R13 since it has
9229 Return: the regno of chosen register. */
9232 find_drap_reg (void)
9234 tree decl = cfun->decl;
9238 /* Use R13 for nested function or function need static chain.
9239 Since function with tail call may use any caller-saved
9240 registers in epilogue, DRAP must not use caller-saved
9241 register in such case. */
9242 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9249 /* Use DI for nested function or function need static chain.
9250 Since function with tail call may use any caller-saved
9251 registers in epilogue, DRAP must not use caller-saved
9252 register in such case. */
9253 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9256 /* Reuse static chain register if it isn't used for parameter
9258 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9260 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9261 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9268 /* Return minimum incoming stack alignment. */
9271 ix86_minimum_incoming_stack_boundary (bool sibcall)
9273 unsigned int incoming_stack_boundary;
9275 /* Prefer the one specified at command line. */
9276 if (ix86_user_incoming_stack_boundary)
9277 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9278 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9279 if -mstackrealign is used, it isn't used for sibcall check and
9280 estimated stack alignment is 128bit. */
9283 && ix86_force_align_arg_pointer
9284 && crtl->stack_alignment_estimated == 128)
9285 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9287 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9289 /* Incoming stack alignment can be changed on individual functions
9290 via force_align_arg_pointer attribute. We use the smallest
9291 incoming stack boundary. */
9292 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9293 && lookup_attribute (ix86_force_align_arg_pointer_string,
9294 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9295 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9297 /* The incoming stack frame has to be aligned at least at
9298 parm_stack_boundary. */
9299 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9300 incoming_stack_boundary = crtl->parm_stack_boundary;
9302 /* Stack at entrance of main is aligned by runtime. We use the
9303 smallest incoming stack boundary. */
9304 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9305 && DECL_NAME (current_function_decl)
9306 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9307 && DECL_FILE_SCOPE_P (current_function_decl))
9308 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9310 return incoming_stack_boundary;
9313 /* Update incoming stack boundary and estimated stack alignment. */
9316 ix86_update_stack_boundary (void)
9318 ix86_incoming_stack_boundary
9319 = ix86_minimum_incoming_stack_boundary (false);
9321 /* x86_64 vararg needs 16byte stack alignment for register save
9325 && crtl->stack_alignment_estimated < 128)
9326 crtl->stack_alignment_estimated = 128;
9329 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9330 needed or an rtx for DRAP otherwise. */
9333 ix86_get_drap_rtx (void)
9335 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9336 crtl->need_drap = true;
9338 if (stack_realign_drap)
9340 /* Assign DRAP to vDRAP and returns vDRAP */
9341 unsigned int regno = find_drap_reg ();
9346 arg_ptr = gen_rtx_REG (Pmode, regno);
9347 crtl->drap_reg = arg_ptr;
9350 drap_vreg = copy_to_reg (arg_ptr);
9354 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9357 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9358 RTX_FRAME_RELATED_P (insn) = 1;
9366 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9369 ix86_internal_arg_pointer (void)
9371 return virtual_incoming_args_rtx;
9374 struct scratch_reg {
9379 /* Return a short-lived scratch register for use on function entry.
9380 In 32-bit mode, it is valid only after the registers are saved
9381 in the prologue. This register must be released by means of
9382 release_scratch_register_on_entry once it is dead. */
9385 get_scratch_register_on_entry (struct scratch_reg *sr)
9393 /* We always use R11 in 64-bit mode. */
9398 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9400 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9401 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9402 int regparm = ix86_function_regparm (fntype, decl);
9404 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9406 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9407 for the static chain register. */
9408 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9409 && drap_regno != AX_REG)
9411 else if (regparm < 2 && drap_regno != DX_REG)
9413 /* ecx is the static chain register. */
9414 else if (regparm < 3 && !fastcall_p && !static_chain_p
9415 && drap_regno != CX_REG)
9417 else if (ix86_save_reg (BX_REG, true))
9419 /* esi is the static chain register. */
9420 else if (!(regparm == 3 && static_chain_p)
9421 && ix86_save_reg (SI_REG, true))
9423 else if (ix86_save_reg (DI_REG, true))
9427 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9432 sr->reg = gen_rtx_REG (Pmode, regno);
9435 rtx insn = emit_insn (gen_push (sr->reg));
9436 RTX_FRAME_RELATED_P (insn) = 1;
9440 /* Release a scratch register obtained from the preceding function. */
9443 release_scratch_register_on_entry (struct scratch_reg *sr)
9447 rtx x, insn = emit_insn (gen_pop (sr->reg));
9449 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9450 RTX_FRAME_RELATED_P (insn) = 1;
9451 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9452 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9453 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9457 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9459 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9462 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9464 /* We skip the probe for the first interval + a small dope of 4 words and
9465 probe that many bytes past the specified size to maintain a protection
9466 area at the botton of the stack. */
9467 const int dope = 4 * UNITS_PER_WORD;
9468 rtx size_rtx = GEN_INT (size), last;
9470 /* See if we have a constant small number of probes to generate. If so,
9471 that's the easy case. The run-time loop is made up of 11 insns in the
9472 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9473 for n # of intervals. */
9474 if (size <= 5 * PROBE_INTERVAL)
9476 HOST_WIDE_INT i, adjust;
9477 bool first_probe = true;
9479 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9480 values of N from 1 until it exceeds SIZE. If only one probe is
9481 needed, this will not generate any code. Then adjust and probe
9482 to PROBE_INTERVAL + SIZE. */
9483 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9487 adjust = 2 * PROBE_INTERVAL + dope;
9488 first_probe = false;
9491 adjust = PROBE_INTERVAL;
9493 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9494 plus_constant (stack_pointer_rtx, -adjust)));
9495 emit_stack_probe (stack_pointer_rtx);
9499 adjust = size + PROBE_INTERVAL + dope;
9501 adjust = size + PROBE_INTERVAL - i;
9503 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9504 plus_constant (stack_pointer_rtx, -adjust)));
9505 emit_stack_probe (stack_pointer_rtx);
9507 /* Adjust back to account for the additional first interval. */
9508 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9509 plus_constant (stack_pointer_rtx,
9510 PROBE_INTERVAL + dope)));
9513 /* Otherwise, do the same as above, but in a loop. Note that we must be
9514 extra careful with variables wrapping around because we might be at
9515 the very top (or the very bottom) of the address space and we have
9516 to be able to handle this case properly; in particular, we use an
9517 equality test for the loop condition. */
9520 HOST_WIDE_INT rounded_size;
9521 struct scratch_reg sr;
9523 get_scratch_register_on_entry (&sr);
9526 /* Step 1: round SIZE to the previous multiple of the interval. */
9528 rounded_size = size & -PROBE_INTERVAL;
9531 /* Step 2: compute initial and final value of the loop counter. */
9533 /* SP = SP_0 + PROBE_INTERVAL. */
9534 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9535 plus_constant (stack_pointer_rtx,
9536 - (PROBE_INTERVAL + dope))));
9538 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9539 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9540 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9541 gen_rtx_PLUS (Pmode, sr.reg,
9542 stack_pointer_rtx)));
9547 while (SP != LAST_ADDR)
9549 SP = SP + PROBE_INTERVAL
9553 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9554 values of N from 1 until it is equal to ROUNDED_SIZE. */
9556 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9559 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9560 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9562 if (size != rounded_size)
9564 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9565 plus_constant (stack_pointer_rtx,
9566 rounded_size - size)));
9567 emit_stack_probe (stack_pointer_rtx);
9570 /* Adjust back to account for the additional first interval. */
9571 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9572 plus_constant (stack_pointer_rtx,
9573 PROBE_INTERVAL + dope)));
9575 release_scratch_register_on_entry (&sr);
9578 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9580 /* Even if the stack pointer isn't the CFA register, we need to correctly
9581 describe the adjustments made to it, in particular differentiate the
9582 frame-related ones from the frame-unrelated ones. */
9585 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9586 XVECEXP (expr, 0, 0)
9587 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9588 plus_constant (stack_pointer_rtx, -size));
9589 XVECEXP (expr, 0, 1)
9590 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9591 plus_constant (stack_pointer_rtx,
9592 PROBE_INTERVAL + dope + size));
9593 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9594 RTX_FRAME_RELATED_P (last) = 1;
9596 cfun->machine->fs.sp_offset += size;
9599 /* Make sure nothing is scheduled before we are done. */
9600 emit_insn (gen_blockage ());
9603 /* Adjust the stack pointer up to REG while probing it. */
9606 output_adjust_stack_and_probe (rtx reg)
9608 static int labelno = 0;
9609 char loop_lab[32], end_lab[32];
9612 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9613 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9615 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9617 /* Jump to END_LAB if SP == LAST_ADDR. */
9618 xops[0] = stack_pointer_rtx;
9620 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9621 fputs ("\tje\t", asm_out_file);
9622 assemble_name_raw (asm_out_file, end_lab);
9623 fputc ('\n', asm_out_file);
9625 /* SP = SP + PROBE_INTERVAL. */
9626 xops[1] = GEN_INT (PROBE_INTERVAL);
9627 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9630 xops[1] = const0_rtx;
9631 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9633 fprintf (asm_out_file, "\tjmp\t");
9634 assemble_name_raw (asm_out_file, loop_lab);
9635 fputc ('\n', asm_out_file);
9637 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9642 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9643 inclusive. These are offsets from the current stack pointer. */
9646 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9648 /* See if we have a constant small number of probes to generate. If so,
9649 that's the easy case. The run-time loop is made up of 7 insns in the
9650 generic case while the compile-time loop is made up of n insns for n #
9652 if (size <= 7 * PROBE_INTERVAL)
9656 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9657 it exceeds SIZE. If only one probe is needed, this will not
9658 generate any code. Then probe at FIRST + SIZE. */
9659 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9660 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9662 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9665 /* Otherwise, do the same as above, but in a loop. Note that we must be
9666 extra careful with variables wrapping around because we might be at
9667 the very top (or the very bottom) of the address space and we have
9668 to be able to handle this case properly; in particular, we use an
9669 equality test for the loop condition. */
9672 HOST_WIDE_INT rounded_size, last;
9673 struct scratch_reg sr;
9675 get_scratch_register_on_entry (&sr);
9678 /* Step 1: round SIZE to the previous multiple of the interval. */
9680 rounded_size = size & -PROBE_INTERVAL;
9683 /* Step 2: compute initial and final value of the loop counter. */
9685 /* TEST_OFFSET = FIRST. */
9686 emit_move_insn (sr.reg, GEN_INT (-first));
9688 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9689 last = first + rounded_size;
9694 while (TEST_ADDR != LAST_ADDR)
9696 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9700 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9701 until it is equal to ROUNDED_SIZE. */
9703 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9706 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9707 that SIZE is equal to ROUNDED_SIZE. */
9709 if (size != rounded_size)
9710 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9713 rounded_size - size));
9715 release_scratch_register_on_entry (&sr);
9718 /* Make sure nothing is scheduled before we are done. */
9719 emit_insn (gen_blockage ());
9722 /* Probe a range of stack addresses from REG to END, inclusive. These are
9723 offsets from the current stack pointer. */
9726 output_probe_stack_range (rtx reg, rtx end)
9728 static int labelno = 0;
9729 char loop_lab[32], end_lab[32];
9732 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9733 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9735 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9737 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9740 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9741 fputs ("\tje\t", asm_out_file);
9742 assemble_name_raw (asm_out_file, end_lab);
9743 fputc ('\n', asm_out_file);
9745 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9746 xops[1] = GEN_INT (PROBE_INTERVAL);
9747 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9749 /* Probe at TEST_ADDR. */
9750 xops[0] = stack_pointer_rtx;
9752 xops[2] = const0_rtx;
9753 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9755 fprintf (asm_out_file, "\tjmp\t");
9756 assemble_name_raw (asm_out_file, loop_lab);
9757 fputc ('\n', asm_out_file);
9759 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9764 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9765 to be generated in correct form. */
9767 ix86_finalize_stack_realign_flags (void)
9769 /* Check if stack realign is really needed after reload, and
9770 stores result in cfun */
9771 unsigned int incoming_stack_boundary
9772 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9773 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9774 unsigned int stack_realign = (incoming_stack_boundary
9775 < (current_function_is_leaf
9776 ? crtl->max_used_stack_slot_alignment
9777 : crtl->stack_alignment_needed));
9779 if (crtl->stack_realign_finalized)
9781 /* After stack_realign_needed is finalized, we can't no longer
9783 gcc_assert (crtl->stack_realign_needed == stack_realign);
9787 crtl->stack_realign_needed = stack_realign;
9788 crtl->stack_realign_finalized = true;
9792 /* Expand the prologue into a bunch of separate insns. */
9795 ix86_expand_prologue (void)
9797 struct machine_function *m = cfun->machine;
9800 struct ix86_frame frame;
9801 HOST_WIDE_INT allocate;
9802 bool int_registers_saved;
9804 ix86_finalize_stack_realign_flags ();
9806 /* DRAP should not coexist with stack_realign_fp */
9807 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9809 memset (&m->fs, 0, sizeof (m->fs));
9811 /* Initialize CFA state for before the prologue. */
9812 m->fs.cfa_reg = stack_pointer_rtx;
9813 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9815 /* Track SP offset to the CFA. We continue tracking this after we've
9816 swapped the CFA register away from SP. In the case of re-alignment
9817 this is fudged; we're interested to offsets within the local frame. */
9818 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9819 m->fs.sp_valid = true;
9821 ix86_compute_frame_layout (&frame);
9823 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9825 /* We should have already generated an error for any use of
9826 ms_hook on a nested function. */
9827 gcc_checking_assert (!ix86_static_chain_on_stack);
9829 /* Check if profiling is active and we shall use profiling before
9830 prologue variant. If so sorry. */
9831 if (crtl->profile && flag_fentry != 0)
9832 sorry ("ms_hook_prologue attribute isn%'t compatible "
9833 "with -mfentry for 32-bit");
9835 /* In ix86_asm_output_function_label we emitted:
9836 8b ff movl.s %edi,%edi
9838 8b ec movl.s %esp,%ebp
9840 This matches the hookable function prologue in Win32 API
9841 functions in Microsoft Windows XP Service Pack 2 and newer.
9842 Wine uses this to enable Windows apps to hook the Win32 API
9843 functions provided by Wine.
9845 What that means is that we've already set up the frame pointer. */
9847 if (frame_pointer_needed
9848 && !(crtl->drap_reg && crtl->stack_realign_needed))
9852 /* We've decided to use the frame pointer already set up.
9853 Describe this to the unwinder by pretending that both
9854 push and mov insns happen right here.
9856 Putting the unwind info here at the end of the ms_hook
9857 is done so that we can make absolutely certain we get
9858 the required byte sequence at the start of the function,
9859 rather than relying on an assembler that can produce
9860 the exact encoding required.
9862 However it does mean (in the unpatched case) that we have
9863 a 1 insn window where the asynchronous unwind info is
9864 incorrect. However, if we placed the unwind info at
9865 its correct location we would have incorrect unwind info
9866 in the patched case. Which is probably all moot since
9867 I don't expect Wine generates dwarf2 unwind info for the
9868 system libraries that use this feature. */
9870 insn = emit_insn (gen_blockage ());
9872 push = gen_push (hard_frame_pointer_rtx);
9873 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9875 RTX_FRAME_RELATED_P (push) = 1;
9876 RTX_FRAME_RELATED_P (mov) = 1;
9878 RTX_FRAME_RELATED_P (insn) = 1;
9879 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9880 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9882 /* Note that gen_push incremented m->fs.cfa_offset, even
9883 though we didn't emit the push insn here. */
9884 m->fs.cfa_reg = hard_frame_pointer_rtx;
9885 m->fs.fp_offset = m->fs.cfa_offset;
9886 m->fs.fp_valid = true;
9890 /* The frame pointer is not needed so pop %ebp again.
9891 This leaves us with a pristine state. */
9892 emit_insn (gen_pop (hard_frame_pointer_rtx));
9896 /* The first insn of a function that accepts its static chain on the
9897 stack is to push the register that would be filled in by a direct
9898 call. This insn will be skipped by the trampoline. */
9899 else if (ix86_static_chain_on_stack)
9901 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9902 emit_insn (gen_blockage ());
9904 /* We don't want to interpret this push insn as a register save,
9905 only as a stack adjustment. The real copy of the register as
9906 a save will be done later, if needed. */
9907 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9908 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9909 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9910 RTX_FRAME_RELATED_P (insn) = 1;
9913 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9914 of DRAP is needed and stack realignment is really needed after reload */
9915 if (stack_realign_drap)
9917 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9919 /* Only need to push parameter pointer reg if it is caller saved. */
9920 if (!call_used_regs[REGNO (crtl->drap_reg)])
9922 /* Push arg pointer reg */
9923 insn = emit_insn (gen_push (crtl->drap_reg));
9924 RTX_FRAME_RELATED_P (insn) = 1;
9927 /* Grab the argument pointer. */
9928 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9929 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9930 RTX_FRAME_RELATED_P (insn) = 1;
9931 m->fs.cfa_reg = crtl->drap_reg;
9932 m->fs.cfa_offset = 0;
9934 /* Align the stack. */
9935 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9937 GEN_INT (-align_bytes)));
9938 RTX_FRAME_RELATED_P (insn) = 1;
9940 /* Replicate the return address on the stack so that return
9941 address can be reached via (argp - 1) slot. This is needed
9942 to implement macro RETURN_ADDR_RTX and intrinsic function
9943 expand_builtin_return_addr etc. */
9944 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9945 t = gen_frame_mem (Pmode, t);
9946 insn = emit_insn (gen_push (t));
9947 RTX_FRAME_RELATED_P (insn) = 1;
9949 /* For the purposes of frame and register save area addressing,
9950 we've started over with a new frame. */
9951 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9952 m->fs.realigned = true;
9955 if (frame_pointer_needed && !m->fs.fp_valid)
9957 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9958 slower on all targets. Also sdb doesn't like it. */
9959 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9960 RTX_FRAME_RELATED_P (insn) = 1;
9962 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
9964 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9965 RTX_FRAME_RELATED_P (insn) = 1;
9967 if (m->fs.cfa_reg == stack_pointer_rtx)
9968 m->fs.cfa_reg = hard_frame_pointer_rtx;
9969 m->fs.fp_offset = m->fs.sp_offset;
9970 m->fs.fp_valid = true;
9974 int_registers_saved = (frame.nregs == 0);
9976 if (!int_registers_saved)
9978 /* If saving registers via PUSH, do so now. */
9979 if (!frame.save_regs_using_mov)
9981 ix86_emit_save_regs ();
9982 int_registers_saved = true;
9983 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9986 /* When using red zone we may start register saving before allocating
9987 the stack frame saving one cycle of the prologue. However, avoid
9988 doing this if we have to probe the stack; at least on x86_64 the
9989 stack probe can turn into a call that clobbers a red zone location. */
9990 else if (ix86_using_red_zone ()
9991 && (! TARGET_STACK_PROBE
9992 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9994 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9995 int_registers_saved = true;
9999 if (stack_realign_fp)
10001 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10002 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10004 /* The computation of the size of the re-aligned stack frame means
10005 that we must allocate the size of the register save area before
10006 performing the actual alignment. Otherwise we cannot guarantee
10007 that there's enough storage above the realignment point. */
10008 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10009 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10010 GEN_INT (m->fs.sp_offset
10011 - frame.sse_reg_save_offset),
10014 /* Align the stack. */
10015 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10017 GEN_INT (-align_bytes)));
10019 /* For the purposes of register save area addressing, the stack
10020 pointer is no longer valid. As for the value of sp_offset,
10021 see ix86_compute_frame_layout, which we need to match in order
10022 to pass verification of stack_pointer_offset at the end. */
10023 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10024 m->fs.sp_valid = false;
10027 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10029 if (flag_stack_usage_info)
10031 /* We start to count from ARG_POINTER. */
10032 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10034 /* If it was realigned, take into account the fake frame. */
10035 if (stack_realign_drap)
10037 if (ix86_static_chain_on_stack)
10038 stack_size += UNITS_PER_WORD;
10040 if (!call_used_regs[REGNO (crtl->drap_reg)])
10041 stack_size += UNITS_PER_WORD;
10043 /* This over-estimates by 1 minimal-stack-alignment-unit but
10044 mitigates that by counting in the new return address slot. */
10045 current_function_dynamic_stack_size
10046 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10049 current_function_static_stack_size = stack_size;
10052 /* The stack has already been decremented by the instruction calling us
10053 so probe if the size is non-negative to preserve the protection area. */
10054 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10056 /* We expect the registers to be saved when probes are used. */
10057 gcc_assert (int_registers_saved);
10059 if (STACK_CHECK_MOVING_SP)
10061 ix86_adjust_stack_and_probe (allocate);
10066 HOST_WIDE_INT size = allocate;
10068 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10069 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10071 if (TARGET_STACK_PROBE)
10072 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10074 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10080 else if (!ix86_target_stack_probe ()
10081 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10083 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10084 GEN_INT (-allocate), -1,
10085 m->fs.cfa_reg == stack_pointer_rtx);
10089 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10091 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10093 bool eax_live = false;
10094 bool r10_live = false;
10097 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10098 if (!TARGET_64BIT_MS_ABI)
10099 eax_live = ix86_eax_live_at_start_p ();
10103 emit_insn (gen_push (eax));
10104 allocate -= UNITS_PER_WORD;
10108 r10 = gen_rtx_REG (Pmode, R10_REG);
10109 emit_insn (gen_push (r10));
10110 allocate -= UNITS_PER_WORD;
10113 emit_move_insn (eax, GEN_INT (allocate));
10114 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10116 /* Use the fact that AX still contains ALLOCATE. */
10117 adjust_stack_insn = (TARGET_64BIT
10118 ? gen_pro_epilogue_adjust_stack_di_sub
10119 : gen_pro_epilogue_adjust_stack_si_sub);
10121 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10122 stack_pointer_rtx, eax));
10124 /* Note that SEH directives need to continue tracking the stack
10125 pointer even after the frame pointer has been set up. */
10126 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10128 if (m->fs.cfa_reg == stack_pointer_rtx)
10129 m->fs.cfa_offset += allocate;
10131 RTX_FRAME_RELATED_P (insn) = 1;
10132 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10133 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10134 plus_constant (stack_pointer_rtx,
10137 m->fs.sp_offset += allocate;
10139 if (r10_live && eax_live)
10141 t = choose_baseaddr (m->fs.sp_offset - allocate);
10142 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10143 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10144 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10146 else if (eax_live || r10_live)
10148 t = choose_baseaddr (m->fs.sp_offset - allocate);
10149 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10152 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10154 /* If we havn't already set up the frame pointer, do so now. */
10155 if (frame_pointer_needed && !m->fs.fp_valid)
10157 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10158 GEN_INT (frame.stack_pointer_offset
10159 - frame.hard_frame_pointer_offset));
10160 insn = emit_insn (insn);
10161 RTX_FRAME_RELATED_P (insn) = 1;
10162 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10164 if (m->fs.cfa_reg == stack_pointer_rtx)
10165 m->fs.cfa_reg = hard_frame_pointer_rtx;
10166 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10167 m->fs.fp_valid = true;
10170 if (!int_registers_saved)
10171 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10172 if (frame.nsseregs)
10173 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10175 pic_reg_used = false;
10176 if (pic_offset_table_rtx
10177 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10180 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10182 if (alt_pic_reg_used != INVALID_REGNUM)
10183 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10185 pic_reg_used = true;
10192 if (ix86_cmodel == CM_LARGE_PIC)
10194 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10195 rtx label = gen_label_rtx ();
10196 emit_label (label);
10197 LABEL_PRESERVE_P (label) = 1;
10198 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10199 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10200 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10201 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10202 pic_offset_table_rtx, tmp_reg));
10205 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10209 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10210 RTX_FRAME_RELATED_P (insn) = 1;
10211 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10215 /* In the pic_reg_used case, make sure that the got load isn't deleted
10216 when mcount needs it. Blockage to avoid call movement across mcount
10217 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10219 if (crtl->profile && !flag_fentry && pic_reg_used)
10220 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10222 if (crtl->drap_reg && !crtl->stack_realign_needed)
10224 /* vDRAP is setup but after reload it turns out stack realign
10225 isn't necessary, here we will emit prologue to setup DRAP
10226 without stack realign adjustment */
10227 t = choose_baseaddr (0);
10228 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10231 /* Prevent instructions from being scheduled into register save push
10232 sequence when access to the redzone area is done through frame pointer.
10233 The offset between the frame pointer and the stack pointer is calculated
10234 relative to the value of the stack pointer at the end of the function
10235 prologue, and moving instructions that access redzone area via frame
10236 pointer inside push sequence violates this assumption. */
10237 if (frame_pointer_needed && frame.red_zone_size)
10238 emit_insn (gen_memory_blockage ());
10240 /* Emit cld instruction if stringops are used in the function. */
10241 if (TARGET_CLD && ix86_current_function_needs_cld)
10242 emit_insn (gen_cld ());
10244 /* SEH requires that the prologue end within 256 bytes of the start of
10245 the function. Prevent instruction schedules that would extend that.
10246 Further, prevent alloca modifications to the stack pointer from being
10247 combined with prologue modifications. */
10249 emit_insn (gen_prologue_use (stack_pointer_rtx));
10252 /* Emit code to restore REG using a POP insn. */
10255 ix86_emit_restore_reg_using_pop (rtx reg)
10257 struct machine_function *m = cfun->machine;
10258 rtx insn = emit_insn (gen_pop (reg));
10260 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10261 m->fs.sp_offset -= UNITS_PER_WORD;
10263 if (m->fs.cfa_reg == crtl->drap_reg
10264 && REGNO (reg) == REGNO (crtl->drap_reg))
10266 /* Previously we'd represented the CFA as an expression
10267 like *(%ebp - 8). We've just popped that value from
10268 the stack, which means we need to reset the CFA to
10269 the drap register. This will remain until we restore
10270 the stack pointer. */
10271 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10272 RTX_FRAME_RELATED_P (insn) = 1;
10274 /* This means that the DRAP register is valid for addressing too. */
10275 m->fs.drap_valid = true;
10279 if (m->fs.cfa_reg == stack_pointer_rtx)
10281 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10282 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10283 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10284 RTX_FRAME_RELATED_P (insn) = 1;
10286 m->fs.cfa_offset -= UNITS_PER_WORD;
10289 /* When the frame pointer is the CFA, and we pop it, we are
10290 swapping back to the stack pointer as the CFA. This happens
10291 for stack frames that don't allocate other data, so we assume
10292 the stack pointer is now pointing at the return address, i.e.
10293 the function entry state, which makes the offset be 1 word. */
10294 if (reg == hard_frame_pointer_rtx)
10296 m->fs.fp_valid = false;
10297 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10299 m->fs.cfa_reg = stack_pointer_rtx;
10300 m->fs.cfa_offset -= UNITS_PER_WORD;
10302 add_reg_note (insn, REG_CFA_DEF_CFA,
10303 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10304 GEN_INT (m->fs.cfa_offset)));
10305 RTX_FRAME_RELATED_P (insn) = 1;
10310 /* Emit code to restore saved registers using POP insns. */
10313 ix86_emit_restore_regs_using_pop (void)
10315 unsigned int regno;
10317 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10318 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10319 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10322 /* Emit code and notes for the LEAVE instruction. */
10325 ix86_emit_leave (void)
10327 struct machine_function *m = cfun->machine;
10328 rtx insn = emit_insn (ix86_gen_leave ());
10330 ix86_add_queued_cfa_restore_notes (insn);
10332 gcc_assert (m->fs.fp_valid);
10333 m->fs.sp_valid = true;
10334 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10335 m->fs.fp_valid = false;
10337 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10339 m->fs.cfa_reg = stack_pointer_rtx;
10340 m->fs.cfa_offset = m->fs.sp_offset;
10342 add_reg_note (insn, REG_CFA_DEF_CFA,
10343 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10344 RTX_FRAME_RELATED_P (insn) = 1;
10345 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10350 /* Emit code to restore saved registers using MOV insns.
10351 First register is restored from CFA - CFA_OFFSET. */
10353 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10354 bool maybe_eh_return)
10356 struct machine_function *m = cfun->machine;
10357 unsigned int regno;
10359 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10360 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10362 rtx reg = gen_rtx_REG (Pmode, regno);
10365 mem = choose_baseaddr (cfa_offset);
10366 mem = gen_frame_mem (Pmode, mem);
10367 insn = emit_move_insn (reg, mem);
10369 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10371 /* Previously we'd represented the CFA as an expression
10372 like *(%ebp - 8). We've just popped that value from
10373 the stack, which means we need to reset the CFA to
10374 the drap register. This will remain until we restore
10375 the stack pointer. */
10376 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10377 RTX_FRAME_RELATED_P (insn) = 1;
10379 /* This means that the DRAP register is valid for addressing. */
10380 m->fs.drap_valid = true;
10383 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10385 cfa_offset -= UNITS_PER_WORD;
10389 /* Emit code to restore saved registers using MOV insns.
10390 First register is restored from CFA - CFA_OFFSET. */
10392 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10393 bool maybe_eh_return)
10395 unsigned int regno;
10397 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10398 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10400 rtx reg = gen_rtx_REG (V4SFmode, regno);
10403 mem = choose_baseaddr (cfa_offset);
10404 mem = gen_rtx_MEM (V4SFmode, mem);
10405 set_mem_align (mem, 128);
10406 emit_move_insn (reg, mem);
10408 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10414 /* Restore function stack, frame, and registers. */
10417 ix86_expand_epilogue (int style)
10419 struct machine_function *m = cfun->machine;
10420 struct machine_frame_state frame_state_save = m->fs;
10421 struct ix86_frame frame;
10422 bool restore_regs_via_mov;
10425 ix86_finalize_stack_realign_flags ();
10426 ix86_compute_frame_layout (&frame);
10428 m->fs.sp_valid = (!frame_pointer_needed
10429 || (current_function_sp_is_unchanging
10430 && !stack_realign_fp));
10431 gcc_assert (!m->fs.sp_valid
10432 || m->fs.sp_offset == frame.stack_pointer_offset);
10434 /* The FP must be valid if the frame pointer is present. */
10435 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10436 gcc_assert (!m->fs.fp_valid
10437 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10439 /* We must have *some* valid pointer to the stack frame. */
10440 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10442 /* The DRAP is never valid at this point. */
10443 gcc_assert (!m->fs.drap_valid);
10445 /* See the comment about red zone and frame
10446 pointer usage in ix86_expand_prologue. */
10447 if (frame_pointer_needed && frame.red_zone_size)
10448 emit_insn (gen_memory_blockage ());
10450 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10451 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10453 /* Determine the CFA offset of the end of the red-zone. */
10454 m->fs.red_zone_offset = 0;
10455 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10457 /* The red-zone begins below the return address. */
10458 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10460 /* When the register save area is in the aligned portion of
10461 the stack, determine the maximum runtime displacement that
10462 matches up with the aligned frame. */
10463 if (stack_realign_drap)
10464 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10468 /* Special care must be taken for the normal return case of a function
10469 using eh_return: the eax and edx registers are marked as saved, but
10470 not restored along this path. Adjust the save location to match. */
10471 if (crtl->calls_eh_return && style != 2)
10472 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10474 /* EH_RETURN requires the use of moves to function properly. */
10475 if (crtl->calls_eh_return)
10476 restore_regs_via_mov = true;
10477 /* SEH requires the use of pops to identify the epilogue. */
10478 else if (TARGET_SEH)
10479 restore_regs_via_mov = false;
10480 /* If we're only restoring one register and sp is not valid then
10481 using a move instruction to restore the register since it's
10482 less work than reloading sp and popping the register. */
10483 else if (!m->fs.sp_valid && frame.nregs <= 1)
10484 restore_regs_via_mov = true;
10485 else if (TARGET_EPILOGUE_USING_MOVE
10486 && cfun->machine->use_fast_prologue_epilogue
10487 && (frame.nregs > 1
10488 || m->fs.sp_offset != frame.reg_save_offset))
10489 restore_regs_via_mov = true;
10490 else if (frame_pointer_needed
10492 && m->fs.sp_offset != frame.reg_save_offset)
10493 restore_regs_via_mov = true;
10494 else if (frame_pointer_needed
10495 && TARGET_USE_LEAVE
10496 && cfun->machine->use_fast_prologue_epilogue
10497 && frame.nregs == 1)
10498 restore_regs_via_mov = true;
10500 restore_regs_via_mov = false;
10502 if (restore_regs_via_mov || frame.nsseregs)
10504 /* Ensure that the entire register save area is addressable via
10505 the stack pointer, if we will restore via sp. */
10507 && m->fs.sp_offset > 0x7fffffff
10508 && !(m->fs.fp_valid || m->fs.drap_valid)
10509 && (frame.nsseregs + frame.nregs) != 0)
10511 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10512 GEN_INT (m->fs.sp_offset
10513 - frame.sse_reg_save_offset),
10515 m->fs.cfa_reg == stack_pointer_rtx);
10519 /* If there are any SSE registers to restore, then we have to do it
10520 via moves, since there's obviously no pop for SSE regs. */
10521 if (frame.nsseregs)
10522 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10525 if (restore_regs_via_mov)
10530 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10532 /* eh_return epilogues need %ecx added to the stack pointer. */
10535 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10537 /* Stack align doesn't work with eh_return. */
10538 gcc_assert (!stack_realign_drap);
10539 /* Neither does regparm nested functions. */
10540 gcc_assert (!ix86_static_chain_on_stack);
10542 if (frame_pointer_needed)
10544 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10545 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10546 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10548 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10549 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10551 /* Note that we use SA as a temporary CFA, as the return
10552 address is at the proper place relative to it. We
10553 pretend this happens at the FP restore insn because
10554 prior to this insn the FP would be stored at the wrong
10555 offset relative to SA, and after this insn we have no
10556 other reasonable register to use for the CFA. We don't
10557 bother resetting the CFA to the SP for the duration of
10558 the return insn. */
10559 add_reg_note (insn, REG_CFA_DEF_CFA,
10560 plus_constant (sa, UNITS_PER_WORD));
10561 ix86_add_queued_cfa_restore_notes (insn);
10562 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10563 RTX_FRAME_RELATED_P (insn) = 1;
10565 m->fs.cfa_reg = sa;
10566 m->fs.cfa_offset = UNITS_PER_WORD;
10567 m->fs.fp_valid = false;
10569 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10570 const0_rtx, style, false);
10574 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10575 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10576 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10577 ix86_add_queued_cfa_restore_notes (insn);
10579 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10580 if (m->fs.cfa_offset != UNITS_PER_WORD)
10582 m->fs.cfa_offset = UNITS_PER_WORD;
10583 add_reg_note (insn, REG_CFA_DEF_CFA,
10584 plus_constant (stack_pointer_rtx,
10586 RTX_FRAME_RELATED_P (insn) = 1;
10589 m->fs.sp_offset = UNITS_PER_WORD;
10590 m->fs.sp_valid = true;
10595 /* SEH requires that the function end with (1) a stack adjustment
10596 if necessary, (2) a sequence of pops, and (3) a return or
10597 jump instruction. Prevent insns from the function body from
10598 being scheduled into this sequence. */
10601 /* Prevent a catch region from being adjacent to the standard
10602 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10603 several other flags that would be interesting to test are
10605 if (flag_non_call_exceptions)
10606 emit_insn (gen_nops (const1_rtx));
10608 emit_insn (gen_blockage ());
10611 /* First step is to deallocate the stack frame so that we can
10612 pop the registers. */
10613 if (!m->fs.sp_valid)
10615 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10616 GEN_INT (m->fs.fp_offset
10617 - frame.reg_save_offset),
10620 else if (m->fs.sp_offset != frame.reg_save_offset)
10622 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10623 GEN_INT (m->fs.sp_offset
10624 - frame.reg_save_offset),
10626 m->fs.cfa_reg == stack_pointer_rtx);
10629 ix86_emit_restore_regs_using_pop ();
10632 /* If we used a stack pointer and haven't already got rid of it,
10634 if (m->fs.fp_valid)
10636 /* If the stack pointer is valid and pointing at the frame
10637 pointer store address, then we only need a pop. */
10638 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10639 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10640 /* Leave results in shorter dependency chains on CPUs that are
10641 able to grok it fast. */
10642 else if (TARGET_USE_LEAVE
10643 || optimize_function_for_size_p (cfun)
10644 || !cfun->machine->use_fast_prologue_epilogue)
10645 ix86_emit_leave ();
10648 pro_epilogue_adjust_stack (stack_pointer_rtx,
10649 hard_frame_pointer_rtx,
10650 const0_rtx, style, !using_drap);
10651 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10657 int param_ptr_offset = UNITS_PER_WORD;
10660 gcc_assert (stack_realign_drap);
10662 if (ix86_static_chain_on_stack)
10663 param_ptr_offset += UNITS_PER_WORD;
10664 if (!call_used_regs[REGNO (crtl->drap_reg)])
10665 param_ptr_offset += UNITS_PER_WORD;
10667 insn = emit_insn (gen_rtx_SET
10668 (VOIDmode, stack_pointer_rtx,
10669 gen_rtx_PLUS (Pmode,
10671 GEN_INT (-param_ptr_offset))));
10672 m->fs.cfa_reg = stack_pointer_rtx;
10673 m->fs.cfa_offset = param_ptr_offset;
10674 m->fs.sp_offset = param_ptr_offset;
10675 m->fs.realigned = false;
10677 add_reg_note (insn, REG_CFA_DEF_CFA,
10678 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10679 GEN_INT (param_ptr_offset)));
10680 RTX_FRAME_RELATED_P (insn) = 1;
10682 if (!call_used_regs[REGNO (crtl->drap_reg)])
10683 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10686 /* At this point the stack pointer must be valid, and we must have
10687 restored all of the registers. We may not have deallocated the
10688 entire stack frame. We've delayed this until now because it may
10689 be possible to merge the local stack deallocation with the
10690 deallocation forced by ix86_static_chain_on_stack. */
10691 gcc_assert (m->fs.sp_valid);
10692 gcc_assert (!m->fs.fp_valid);
10693 gcc_assert (!m->fs.realigned);
10694 if (m->fs.sp_offset != UNITS_PER_WORD)
10696 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10697 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10701 /* Sibcall epilogues don't want a return instruction. */
10704 m->fs = frame_state_save;
10708 /* Emit vzeroupper if needed. */
10709 if (TARGET_VZEROUPPER
10710 && !TREE_THIS_VOLATILE (cfun->decl)
10711 && !cfun->machine->caller_return_avx256_p)
10712 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10714 if (crtl->args.pops_args && crtl->args.size)
10716 rtx popc = GEN_INT (crtl->args.pops_args);
10718 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10719 address, do explicit add, and jump indirectly to the caller. */
10721 if (crtl->args.pops_args >= 65536)
10723 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10726 /* There is no "pascal" calling convention in any 64bit ABI. */
10727 gcc_assert (!TARGET_64BIT);
10729 insn = emit_insn (gen_pop (ecx));
10730 m->fs.cfa_offset -= UNITS_PER_WORD;
10731 m->fs.sp_offset -= UNITS_PER_WORD;
10733 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10734 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10735 add_reg_note (insn, REG_CFA_REGISTER,
10736 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10737 RTX_FRAME_RELATED_P (insn) = 1;
10739 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10741 emit_jump_insn (gen_return_indirect_internal (ecx));
10744 emit_jump_insn (gen_return_pop_internal (popc));
10747 emit_jump_insn (gen_return_internal ());
10749 /* Restore the state back to the state from the prologue,
10750 so that it's correct for the next epilogue. */
10751 m->fs = frame_state_save;
10754 /* Reset from the function's potential modifications. */
10757 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10758 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10760 if (pic_offset_table_rtx)
10761 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10763 /* Mach-O doesn't support labels at the end of objects, so if
10764 it looks like we might want one, insert a NOP. */
10766 rtx insn = get_last_insn ();
10769 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10770 insn = PREV_INSN (insn);
10774 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10775 fputs ("\tnop\n", file);
10781 /* Return a scratch register to use in the split stack prologue. The
10782 split stack prologue is used for -fsplit-stack. It is the first
10783 instructions in the function, even before the regular prologue.
10784 The scratch register can be any caller-saved register which is not
10785 used for parameters or for the static chain. */
10787 static unsigned int
10788 split_stack_prologue_scratch_regno (void)
10797 is_fastcall = (lookup_attribute ("fastcall",
10798 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10800 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10804 if (DECL_STATIC_CHAIN (cfun->decl))
10806 sorry ("-fsplit-stack does not support fastcall with "
10807 "nested function");
10808 return INVALID_REGNUM;
10812 else if (regparm < 3)
10814 if (!DECL_STATIC_CHAIN (cfun->decl))
10820 sorry ("-fsplit-stack does not support 2 register "
10821 " parameters for a nested function");
10822 return INVALID_REGNUM;
10829 /* FIXME: We could make this work by pushing a register
10830 around the addition and comparison. */
10831 sorry ("-fsplit-stack does not support 3 register parameters");
10832 return INVALID_REGNUM;
10837 /* A SYMBOL_REF for the function which allocates new stackspace for
10840 static GTY(()) rtx split_stack_fn;
10842 /* A SYMBOL_REF for the more stack function when using the large
10845 static GTY(()) rtx split_stack_fn_large;
10847 /* Handle -fsplit-stack. These are the first instructions in the
10848 function, even before the regular prologue. */
10851 ix86_expand_split_stack_prologue (void)
10853 struct ix86_frame frame;
10854 HOST_WIDE_INT allocate;
10855 unsigned HOST_WIDE_INT args_size;
10856 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10857 rtx scratch_reg = NULL_RTX;
10858 rtx varargs_label = NULL_RTX;
10861 gcc_assert (flag_split_stack && reload_completed);
10863 ix86_finalize_stack_realign_flags ();
10864 ix86_compute_frame_layout (&frame);
10865 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10867 /* This is the label we will branch to if we have enough stack
10868 space. We expect the basic block reordering pass to reverse this
10869 branch if optimizing, so that we branch in the unlikely case. */
10870 label = gen_label_rtx ();
10872 /* We need to compare the stack pointer minus the frame size with
10873 the stack boundary in the TCB. The stack boundary always gives
10874 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10875 can compare directly. Otherwise we need to do an addition. */
10877 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10878 UNSPEC_STACK_CHECK);
10879 limit = gen_rtx_CONST (Pmode, limit);
10880 limit = gen_rtx_MEM (Pmode, limit);
10881 if (allocate < SPLIT_STACK_AVAILABLE)
10882 current = stack_pointer_rtx;
10885 unsigned int scratch_regno;
10888 /* We need a scratch register to hold the stack pointer minus
10889 the required frame size. Since this is the very start of the
10890 function, the scratch register can be any caller-saved
10891 register which is not used for parameters. */
10892 offset = GEN_INT (- allocate);
10893 scratch_regno = split_stack_prologue_scratch_regno ();
10894 if (scratch_regno == INVALID_REGNUM)
10896 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10897 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10899 /* We don't use ix86_gen_add3 in this case because it will
10900 want to split to lea, but when not optimizing the insn
10901 will not be split after this point. */
10902 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10903 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10908 emit_move_insn (scratch_reg, offset);
10909 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
10910 stack_pointer_rtx));
10912 current = scratch_reg;
10915 ix86_expand_branch (GEU, current, limit, label);
10916 jump_insn = get_last_insn ();
10917 JUMP_LABEL (jump_insn) = label;
10919 /* Mark the jump as very likely to be taken. */
10920 add_reg_note (jump_insn, REG_BR_PROB,
10921 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
10923 if (split_stack_fn == NULL_RTX)
10924 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10925 fn = split_stack_fn;
10927 /* Get more stack space. We pass in the desired stack space and the
10928 size of the arguments to copy to the new stack. In 32-bit mode
10929 we push the parameters; __morestack will return on a new stack
10930 anyhow. In 64-bit mode we pass the parameters in r10 and
10932 allocate_rtx = GEN_INT (allocate);
10933 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
10934 call_fusage = NULL_RTX;
10939 reg10 = gen_rtx_REG (Pmode, R10_REG);
10940 reg11 = gen_rtx_REG (Pmode, R11_REG);
10942 /* If this function uses a static chain, it will be in %r10.
10943 Preserve it across the call to __morestack. */
10944 if (DECL_STATIC_CHAIN (cfun->decl))
10948 rax = gen_rtx_REG (Pmode, AX_REG);
10949 emit_move_insn (rax, reg10);
10950 use_reg (&call_fusage, rax);
10953 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10955 HOST_WIDE_INT argval;
10957 /* When using the large model we need to load the address
10958 into a register, and we've run out of registers. So we
10959 switch to a different calling convention, and we call a
10960 different function: __morestack_large. We pass the
10961 argument size in the upper 32 bits of r10 and pass the
10962 frame size in the lower 32 bits. */
10963 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
10964 gcc_assert ((args_size & 0xffffffff) == args_size);
10966 if (split_stack_fn_large == NULL_RTX)
10967 split_stack_fn_large =
10968 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
10970 if (ix86_cmodel == CM_LARGE_PIC)
10974 label = gen_label_rtx ();
10975 emit_label (label);
10976 LABEL_PRESERVE_P (label) = 1;
10977 emit_insn (gen_set_rip_rex64 (reg10, label));
10978 emit_insn (gen_set_got_offset_rex64 (reg11, label));
10979 emit_insn (gen_adddi3 (reg10, reg10, reg11));
10980 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
10982 x = gen_rtx_CONST (Pmode, x);
10983 emit_move_insn (reg11, x);
10984 x = gen_rtx_PLUS (Pmode, reg10, reg11);
10985 x = gen_const_mem (Pmode, x);
10986 emit_move_insn (reg11, x);
10989 emit_move_insn (reg11, split_stack_fn_large);
10993 argval = ((args_size << 16) << 16) + allocate;
10994 emit_move_insn (reg10, GEN_INT (argval));
10998 emit_move_insn (reg10, allocate_rtx);
10999 emit_move_insn (reg11, GEN_INT (args_size));
11000 use_reg (&call_fusage, reg11);
11003 use_reg (&call_fusage, reg10);
11007 emit_insn (gen_push (GEN_INT (args_size)));
11008 emit_insn (gen_push (allocate_rtx));
11010 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11011 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11013 add_function_usage_to (call_insn, call_fusage);
11015 /* In order to make call/return prediction work right, we now need
11016 to execute a return instruction. See
11017 libgcc/config/i386/morestack.S for the details on how this works.
11019 For flow purposes gcc must not see this as a return
11020 instruction--we need control flow to continue at the subsequent
11021 label. Therefore, we use an unspec. */
11022 gcc_assert (crtl->args.pops_args < 65536);
11023 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11025 /* If we are in 64-bit mode and this function uses a static chain,
11026 we saved %r10 in %rax before calling _morestack. */
11027 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11028 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11029 gen_rtx_REG (Pmode, AX_REG));
11031 /* If this function calls va_start, we need to store a pointer to
11032 the arguments on the old stack, because they may not have been
11033 all copied to the new stack. At this point the old stack can be
11034 found at the frame pointer value used by __morestack, because
11035 __morestack has set that up before calling back to us. Here we
11036 store that pointer in a scratch register, and in
11037 ix86_expand_prologue we store the scratch register in a stack
11039 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11041 unsigned int scratch_regno;
11045 scratch_regno = split_stack_prologue_scratch_regno ();
11046 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11047 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11051 return address within this function
11052 return address of caller of this function
11054 So we add three words to get to the stack arguments.
11058 return address within this function
11059 first argument to __morestack
11060 second argument to __morestack
11061 return address of caller of this function
11063 So we add five words to get to the stack arguments.
11065 words = TARGET_64BIT ? 3 : 5;
11066 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11067 gen_rtx_PLUS (Pmode, frame_reg,
11068 GEN_INT (words * UNITS_PER_WORD))));
11070 varargs_label = gen_label_rtx ();
11071 emit_jump_insn (gen_jump (varargs_label));
11072 JUMP_LABEL (get_last_insn ()) = varargs_label;
11077 emit_label (label);
11078 LABEL_NUSES (label) = 1;
11080 /* If this function calls va_start, we now have to set the scratch
11081 register for the case where we do not call __morestack. In this
11082 case we need to set it based on the stack pointer. */
11083 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11085 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11086 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11087 GEN_INT (UNITS_PER_WORD))));
11089 emit_label (varargs_label);
11090 LABEL_NUSES (varargs_label) = 1;
11094 /* We may have to tell the dataflow pass that the split stack prologue
11095 is initializing a scratch register. */
11098 ix86_live_on_entry (bitmap regs)
11100 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11102 gcc_assert (flag_split_stack);
11103 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11107 /* Determine if op is suitable SUBREG RTX for address. */
11110 ix86_address_subreg_operand (rtx op)
11112 enum machine_mode mode;
11117 mode = GET_MODE (op);
11119 if (GET_MODE_CLASS (mode) != MODE_INT)
11122 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11123 failures when the register is one word out of a two word structure. */
11124 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11127 /* Allow only SUBREGs of non-eliminable hard registers. */
11128 return register_no_elim_operand (op, mode);
11131 /* Extract the parts of an RTL expression that is a valid memory address
11132 for an instruction. Return 0 if the structure of the address is
11133 grossly off. Return -1 if the address contains ASHIFT, so it is not
11134 strictly valid, but still used for computing length of lea instruction. */
11137 ix86_decompose_address (rtx addr, struct ix86_address *out)
11139 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11140 rtx base_reg, index_reg;
11141 HOST_WIDE_INT scale = 1;
11142 rtx scale_rtx = NULL_RTX;
11145 enum ix86_address_seg seg = SEG_DEFAULT;
11147 /* Allow zero-extended SImode addresses,
11148 they will be emitted with addr32 prefix. */
11150 && GET_CODE (addr) == ZERO_EXTEND
11151 && GET_MODE (addr) == DImode
11152 && GET_MODE (XEXP (addr, 0)) == SImode)
11153 addr = XEXP (addr, 0);
11157 else if (GET_CODE (addr) == SUBREG)
11159 if (ix86_address_subreg_operand (SUBREG_REG (addr)))
11164 else if (GET_CODE (addr) == PLUS)
11166 rtx addends[4], op;
11174 addends[n++] = XEXP (op, 1);
11177 while (GET_CODE (op) == PLUS);
11182 for (i = n; i >= 0; --i)
11185 switch (GET_CODE (op))
11190 index = XEXP (op, 0);
11191 scale_rtx = XEXP (op, 1);
11197 index = XEXP (op, 0);
11198 tmp = XEXP (op, 1);
11199 if (!CONST_INT_P (tmp))
11201 scale = INTVAL (tmp);
11202 if ((unsigned HOST_WIDE_INT) scale > 3)
11204 scale = 1 << scale;
11208 if (XINT (op, 1) == UNSPEC_TP
11209 && TARGET_TLS_DIRECT_SEG_REFS
11210 && seg == SEG_DEFAULT)
11211 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11217 if (!ix86_address_subreg_operand (SUBREG_REG (op)))
11244 else if (GET_CODE (addr) == MULT)
11246 index = XEXP (addr, 0); /* index*scale */
11247 scale_rtx = XEXP (addr, 1);
11249 else if (GET_CODE (addr) == ASHIFT)
11251 /* We're called for lea too, which implements ashift on occasion. */
11252 index = XEXP (addr, 0);
11253 tmp = XEXP (addr, 1);
11254 if (!CONST_INT_P (tmp))
11256 scale = INTVAL (tmp);
11257 if ((unsigned HOST_WIDE_INT) scale > 3)
11259 scale = 1 << scale;
11263 disp = addr; /* displacement */
11269 else if (GET_CODE (index) == SUBREG
11270 && ix86_address_subreg_operand (SUBREG_REG (index)))
11276 /* Extract the integral value of scale. */
11279 if (!CONST_INT_P (scale_rtx))
11281 scale = INTVAL (scale_rtx);
11284 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11285 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11287 /* Avoid useless 0 displacement. */
11288 if (disp == const0_rtx && (base || index))
11291 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11292 if (base_reg && index_reg && scale == 1
11293 && (index_reg == arg_pointer_rtx
11294 || index_reg == frame_pointer_rtx
11295 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11298 tmp = base, base = index, index = tmp;
11299 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11302 /* Special case: %ebp cannot be encoded as a base without a displacement.
11306 && (base_reg == hard_frame_pointer_rtx
11307 || base_reg == frame_pointer_rtx
11308 || base_reg == arg_pointer_rtx
11309 || (REG_P (base_reg)
11310 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11311 || REGNO (base_reg) == R13_REG))))
11314 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11315 Avoid this by transforming to [%esi+0].
11316 Reload calls address legitimization without cfun defined, so we need
11317 to test cfun for being non-NULL. */
11318 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11319 && base_reg && !index_reg && !disp
11320 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11323 /* Special case: encode reg+reg instead of reg*2. */
11324 if (!base && index && scale == 2)
11325 base = index, base_reg = index_reg, scale = 1;
11327 /* Special case: scaling cannot be encoded without base or displacement. */
11328 if (!base && !disp && index && scale != 1)
11332 out->index = index;
11334 out->scale = scale;
11340 /* Return cost of the memory address x.
11341 For i386, it is better to use a complex address than let gcc copy
11342 the address into a reg and make a new pseudo. But not if the address
11343 requires to two regs - that would mean more pseudos with longer
11346 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11348 struct ix86_address parts;
11350 int ok = ix86_decompose_address (x, &parts);
11354 if (parts.base && GET_CODE (parts.base) == SUBREG)
11355 parts.base = SUBREG_REG (parts.base);
11356 if (parts.index && GET_CODE (parts.index) == SUBREG)
11357 parts.index = SUBREG_REG (parts.index);
11359 /* Attempt to minimize number of registers in the address. */
11361 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11363 && (!REG_P (parts.index)
11364 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11368 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11370 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11371 && parts.base != parts.index)
11374 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11375 since it's predecode logic can't detect the length of instructions
11376 and it degenerates to vector decoded. Increase cost of such
11377 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11378 to split such addresses or even refuse such addresses at all.
11380 Following addressing modes are affected:
11385 The first and last case may be avoidable by explicitly coding the zero in
11386 memory address, but I don't have AMD-K6 machine handy to check this
11390 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11391 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11392 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11398 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11399 this is used for to form addresses to local data when -fPIC is in
11403 darwin_local_data_pic (rtx disp)
11405 return (GET_CODE (disp) == UNSPEC
11406 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11409 /* Determine if a given RTX is a valid constant. We already know this
11410 satisfies CONSTANT_P. */
11413 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11415 switch (GET_CODE (x))
11420 if (GET_CODE (x) == PLUS)
11422 if (!CONST_INT_P (XEXP (x, 1)))
11427 if (TARGET_MACHO && darwin_local_data_pic (x))
11430 /* Only some unspecs are valid as "constants". */
11431 if (GET_CODE (x) == UNSPEC)
11432 switch (XINT (x, 1))
11435 case UNSPEC_GOTOFF:
11436 case UNSPEC_PLTOFF:
11437 return TARGET_64BIT;
11439 case UNSPEC_NTPOFF:
11440 x = XVECEXP (x, 0, 0);
11441 return (GET_CODE (x) == SYMBOL_REF
11442 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11443 case UNSPEC_DTPOFF:
11444 x = XVECEXP (x, 0, 0);
11445 return (GET_CODE (x) == SYMBOL_REF
11446 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11451 /* We must have drilled down to a symbol. */
11452 if (GET_CODE (x) == LABEL_REF)
11454 if (GET_CODE (x) != SYMBOL_REF)
11459 /* TLS symbols are never valid. */
11460 if (SYMBOL_REF_TLS_MODEL (x))
11463 /* DLLIMPORT symbols are never valid. */
11464 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11465 && SYMBOL_REF_DLLIMPORT_P (x))
11469 /* mdynamic-no-pic */
11470 if (MACHO_DYNAMIC_NO_PIC_P)
11471 return machopic_symbol_defined_p (x);
11476 if (GET_MODE (x) == TImode
11477 && x != CONST0_RTX (TImode)
11483 if (!standard_sse_constant_p (x))
11490 /* Otherwise we handle everything else in the move patterns. */
11494 /* Determine if it's legal to put X into the constant pool. This
11495 is not possible for the address of thread-local symbols, which
11496 is checked above. */
11499 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11501 /* We can always put integral constants and vectors in memory. */
11502 switch (GET_CODE (x))
11512 return !ix86_legitimate_constant_p (mode, x);
11516 /* Nonzero if the constant value X is a legitimate general operand
11517 when generating PIC code. It is given that flag_pic is on and
11518 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11521 legitimate_pic_operand_p (rtx x)
11525 switch (GET_CODE (x))
11528 inner = XEXP (x, 0);
11529 if (GET_CODE (inner) == PLUS
11530 && CONST_INT_P (XEXP (inner, 1)))
11531 inner = XEXP (inner, 0);
11533 /* Only some unspecs are valid as "constants". */
11534 if (GET_CODE (inner) == UNSPEC)
11535 switch (XINT (inner, 1))
11538 case UNSPEC_GOTOFF:
11539 case UNSPEC_PLTOFF:
11540 return TARGET_64BIT;
11542 x = XVECEXP (inner, 0, 0);
11543 return (GET_CODE (x) == SYMBOL_REF
11544 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11545 case UNSPEC_MACHOPIC_OFFSET:
11546 return legitimate_pic_address_disp_p (x);
11554 return legitimate_pic_address_disp_p (x);
11561 /* Determine if a given CONST RTX is a valid memory displacement
11565 legitimate_pic_address_disp_p (rtx disp)
11569 /* In 64bit mode we can allow direct addresses of symbols and labels
11570 when they are not dynamic symbols. */
11573 rtx op0 = disp, op1;
11575 switch (GET_CODE (disp))
11581 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11583 op0 = XEXP (XEXP (disp, 0), 0);
11584 op1 = XEXP (XEXP (disp, 0), 1);
11585 if (!CONST_INT_P (op1)
11586 || INTVAL (op1) >= 16*1024*1024
11587 || INTVAL (op1) < -16*1024*1024)
11589 if (GET_CODE (op0) == LABEL_REF)
11591 if (GET_CODE (op0) != SYMBOL_REF)
11596 /* TLS references should always be enclosed in UNSPEC. */
11597 if (SYMBOL_REF_TLS_MODEL (op0))
11599 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11600 && ix86_cmodel != CM_LARGE_PIC)
11608 if (GET_CODE (disp) != CONST)
11610 disp = XEXP (disp, 0);
11614 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11615 of GOT tables. We should not need these anyway. */
11616 if (GET_CODE (disp) != UNSPEC
11617 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11618 && XINT (disp, 1) != UNSPEC_GOTOFF
11619 && XINT (disp, 1) != UNSPEC_PCREL
11620 && XINT (disp, 1) != UNSPEC_PLTOFF))
11623 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11624 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11630 if (GET_CODE (disp) == PLUS)
11632 if (!CONST_INT_P (XEXP (disp, 1)))
11634 disp = XEXP (disp, 0);
11638 if (TARGET_MACHO && darwin_local_data_pic (disp))
11641 if (GET_CODE (disp) != UNSPEC)
11644 switch (XINT (disp, 1))
11649 /* We need to check for both symbols and labels because VxWorks loads
11650 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11652 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11653 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11654 case UNSPEC_GOTOFF:
11655 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11656 While ABI specify also 32bit relocation but we don't produce it in
11657 small PIC model at all. */
11658 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11659 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11661 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11663 case UNSPEC_GOTTPOFF:
11664 case UNSPEC_GOTNTPOFF:
11665 case UNSPEC_INDNTPOFF:
11668 disp = XVECEXP (disp, 0, 0);
11669 return (GET_CODE (disp) == SYMBOL_REF
11670 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11671 case UNSPEC_NTPOFF:
11672 disp = XVECEXP (disp, 0, 0);
11673 return (GET_CODE (disp) == SYMBOL_REF
11674 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11675 case UNSPEC_DTPOFF:
11676 disp = XVECEXP (disp, 0, 0);
11677 return (GET_CODE (disp) == SYMBOL_REF
11678 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11684 /* Recognizes RTL expressions that are valid memory addresses for an
11685 instruction. The MODE argument is the machine mode for the MEM
11686 expression that wants to use this address.
11688 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11689 convert common non-canonical forms to canonical form so that they will
11693 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11694 rtx addr, bool strict)
11696 struct ix86_address parts;
11697 rtx base, index, disp;
11698 HOST_WIDE_INT scale;
11700 if (ix86_decompose_address (addr, &parts) <= 0)
11701 /* Decomposition failed. */
11705 index = parts.index;
11707 scale = parts.scale;
11709 /* Validate base register. */
11716 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
11717 reg = SUBREG_REG (base);
11719 /* Base is not a register. */
11722 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
11725 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11726 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11727 /* Base is not valid. */
11731 /* Validate index register. */
11738 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
11739 reg = SUBREG_REG (index);
11741 /* Index is not a register. */
11744 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
11747 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11748 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11749 /* Index is not valid. */
11753 /* Index and base should have the same mode. */
11755 && GET_MODE (base) != GET_MODE (index))
11758 /* Validate scale factor. */
11762 /* Scale without index. */
11765 if (scale != 2 && scale != 4 && scale != 8)
11766 /* Scale is not a valid multiplier. */
11770 /* Validate displacement. */
11773 if (GET_CODE (disp) == CONST
11774 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11775 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11776 switch (XINT (XEXP (disp, 0), 1))
11778 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11779 used. While ABI specify also 32bit relocations, we don't produce
11780 them at all and use IP relative instead. */
11782 case UNSPEC_GOTOFF:
11783 gcc_assert (flag_pic);
11785 goto is_legitimate_pic;
11787 /* 64bit address unspec. */
11790 case UNSPEC_GOTPCREL:
11792 gcc_assert (flag_pic);
11793 goto is_legitimate_pic;
11795 case UNSPEC_GOTTPOFF:
11796 case UNSPEC_GOTNTPOFF:
11797 case UNSPEC_INDNTPOFF:
11798 case UNSPEC_NTPOFF:
11799 case UNSPEC_DTPOFF:
11802 case UNSPEC_STACK_CHECK:
11803 gcc_assert (flag_split_stack);
11807 /* Invalid address unspec. */
11811 else if (SYMBOLIC_CONST (disp)
11815 && MACHOPIC_INDIRECT
11816 && !machopic_operand_p (disp)
11822 if (TARGET_64BIT && (index || base))
11824 /* foo@dtpoff(%rX) is ok. */
11825 if (GET_CODE (disp) != CONST
11826 || GET_CODE (XEXP (disp, 0)) != PLUS
11827 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11828 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11829 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11830 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11831 /* Non-constant pic memory reference. */
11834 else if ((!TARGET_MACHO || flag_pic)
11835 && ! legitimate_pic_address_disp_p (disp))
11836 /* Displacement is an invalid pic construct. */
11839 else if (MACHO_DYNAMIC_NO_PIC_P
11840 && !ix86_legitimate_constant_p (Pmode, disp))
11841 /* displacment must be referenced via non_lazy_pointer */
11845 /* This code used to verify that a symbolic pic displacement
11846 includes the pic_offset_table_rtx register.
11848 While this is good idea, unfortunately these constructs may
11849 be created by "adds using lea" optimization for incorrect
11858 This code is nonsensical, but results in addressing
11859 GOT table with pic_offset_table_rtx base. We can't
11860 just refuse it easily, since it gets matched by
11861 "addsi3" pattern, that later gets split to lea in the
11862 case output register differs from input. While this
11863 can be handled by separate addsi pattern for this case
11864 that never results in lea, this seems to be easier and
11865 correct fix for crash to disable this test. */
11867 else if (GET_CODE (disp) != LABEL_REF
11868 && !CONST_INT_P (disp)
11869 && (GET_CODE (disp) != CONST
11870 || !ix86_legitimate_constant_p (Pmode, disp))
11871 && (GET_CODE (disp) != SYMBOL_REF
11872 || !ix86_legitimate_constant_p (Pmode, disp)))
11873 /* Displacement is not constant. */
11875 else if (TARGET_64BIT
11876 && !x86_64_immediate_operand (disp, VOIDmode))
11877 /* Displacement is out of range. */
11881 /* Everything looks valid. */
11885 /* Determine if a given RTX is a valid constant address. */
11888 constant_address_p (rtx x)
11890 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11893 /* Return a unique alias set for the GOT. */
11895 static alias_set_type
11896 ix86_GOT_alias_set (void)
11898 static alias_set_type set = -1;
11900 set = new_alias_set ();
11904 /* Return a legitimate reference for ORIG (an address) using the
11905 register REG. If REG is 0, a new pseudo is generated.
11907 There are two types of references that must be handled:
11909 1. Global data references must load the address from the GOT, via
11910 the PIC reg. An insn is emitted to do this load, and the reg is
11913 2. Static data references, constant pool addresses, and code labels
11914 compute the address as an offset from the GOT, whose base is in
11915 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11916 differentiate them from global data objects. The returned
11917 address is the PIC reg + an unspec constant.
11919 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11920 reg also appears in the address. */
11923 legitimize_pic_address (rtx orig, rtx reg)
11926 rtx new_rtx = orig;
11930 if (TARGET_MACHO && !TARGET_64BIT)
11933 reg = gen_reg_rtx (Pmode);
11934 /* Use the generic Mach-O PIC machinery. */
11935 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11939 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11941 else if (TARGET_64BIT
11942 && ix86_cmodel != CM_SMALL_PIC
11943 && gotoff_operand (addr, Pmode))
11946 /* This symbol may be referenced via a displacement from the PIC
11947 base address (@GOTOFF). */
11949 if (reload_in_progress)
11950 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11951 if (GET_CODE (addr) == CONST)
11952 addr = XEXP (addr, 0);
11953 if (GET_CODE (addr) == PLUS)
11955 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11957 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11960 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11961 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11963 tmpreg = gen_reg_rtx (Pmode);
11966 emit_move_insn (tmpreg, new_rtx);
11970 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11971 tmpreg, 1, OPTAB_DIRECT);
11974 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11976 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11978 /* This symbol may be referenced via a displacement from the PIC
11979 base address (@GOTOFF). */
11981 if (reload_in_progress)
11982 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11983 if (GET_CODE (addr) == CONST)
11984 addr = XEXP (addr, 0);
11985 if (GET_CODE (addr) == PLUS)
11987 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11989 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11992 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11993 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11994 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11998 emit_move_insn (reg, new_rtx);
12002 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12003 /* We can't use @GOTOFF for text labels on VxWorks;
12004 see gotoff_operand. */
12005 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12007 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12009 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12010 return legitimize_dllimport_symbol (addr, true);
12011 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12012 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12013 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12015 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12016 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12020 /* For x64 PE-COFF there is no GOT table. So we use address
12022 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12024 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12025 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12028 reg = gen_reg_rtx (Pmode);
12029 emit_move_insn (reg, new_rtx);
12032 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12034 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12035 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12036 new_rtx = gen_const_mem (Pmode, new_rtx);
12037 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12040 reg = gen_reg_rtx (Pmode);
12041 /* Use directly gen_movsi, otherwise the address is loaded
12042 into register for CSE. We don't want to CSE this addresses,
12043 instead we CSE addresses from the GOT table, so skip this. */
12044 emit_insn (gen_movsi (reg, new_rtx));
12049 /* This symbol must be referenced via a load from the
12050 Global Offset Table (@GOT). */
12052 if (reload_in_progress)
12053 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12054 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12055 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12057 new_rtx = force_reg (Pmode, new_rtx);
12058 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12059 new_rtx = gen_const_mem (Pmode, new_rtx);
12060 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12063 reg = gen_reg_rtx (Pmode);
12064 emit_move_insn (reg, new_rtx);
12070 if (CONST_INT_P (addr)
12071 && !x86_64_immediate_operand (addr, VOIDmode))
12075 emit_move_insn (reg, addr);
12079 new_rtx = force_reg (Pmode, addr);
12081 else if (GET_CODE (addr) == CONST)
12083 addr = XEXP (addr, 0);
12085 /* We must match stuff we generate before. Assume the only
12086 unspecs that can get here are ours. Not that we could do
12087 anything with them anyway.... */
12088 if (GET_CODE (addr) == UNSPEC
12089 || (GET_CODE (addr) == PLUS
12090 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12092 gcc_assert (GET_CODE (addr) == PLUS);
12094 if (GET_CODE (addr) == PLUS)
12096 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12098 /* Check first to see if this is a constant offset from a @GOTOFF
12099 symbol reference. */
12100 if (gotoff_operand (op0, Pmode)
12101 && CONST_INT_P (op1))
12105 if (reload_in_progress)
12106 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12107 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12109 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12110 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12111 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12115 emit_move_insn (reg, new_rtx);
12121 if (INTVAL (op1) < -16*1024*1024
12122 || INTVAL (op1) >= 16*1024*1024)
12124 if (!x86_64_immediate_operand (op1, Pmode))
12125 op1 = force_reg (Pmode, op1);
12126 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12132 base = legitimize_pic_address (XEXP (addr, 0), reg);
12133 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12134 base == reg ? NULL_RTX : reg);
12136 if (CONST_INT_P (new_rtx))
12137 new_rtx = plus_constant (base, INTVAL (new_rtx));
12140 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12142 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12143 new_rtx = XEXP (new_rtx, 1);
12145 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12153 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12156 get_thread_pointer (bool to_reg)
12158 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12160 if (GET_MODE (tp) != Pmode)
12161 tp = convert_to_mode (Pmode, tp, 1);
12164 tp = copy_addr_to_reg (tp);
12169 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12171 static GTY(()) rtx ix86_tls_symbol;
12174 ix86_tls_get_addr (void)
12176 if (!ix86_tls_symbol)
12179 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12180 ? "___tls_get_addr" : "__tls_get_addr");
12182 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12185 return ix86_tls_symbol;
12188 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12190 static GTY(()) rtx ix86_tls_module_base_symbol;
12193 ix86_tls_module_base (void)
12195 if (!ix86_tls_module_base_symbol)
12197 ix86_tls_module_base_symbol
12198 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12200 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12201 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12204 return ix86_tls_module_base_symbol;
12207 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12208 false if we expect this to be used for a memory address and true if
12209 we expect to load the address into a register. */
12212 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12214 rtx dest, base, off;
12215 rtx pic = NULL_RTX, tp = NULL_RTX;
12220 case TLS_MODEL_GLOBAL_DYNAMIC:
12221 dest = gen_reg_rtx (Pmode);
12226 pic = pic_offset_table_rtx;
12229 pic = gen_reg_rtx (Pmode);
12230 emit_insn (gen_set_got (pic));
12234 if (TARGET_GNU2_TLS)
12237 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12239 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12241 tp = get_thread_pointer (true);
12242 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12244 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12248 rtx caddr = ix86_tls_get_addr ();
12252 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12255 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12256 insns = get_insns ();
12259 RTL_CONST_CALL_P (insns) = 1;
12260 emit_libcall_block (insns, dest, rax, x);
12263 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12267 case TLS_MODEL_LOCAL_DYNAMIC:
12268 base = gen_reg_rtx (Pmode);
12273 pic = pic_offset_table_rtx;
12276 pic = gen_reg_rtx (Pmode);
12277 emit_insn (gen_set_got (pic));
12281 if (TARGET_GNU2_TLS)
12283 rtx tmp = ix86_tls_module_base ();
12286 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12288 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12290 tp = get_thread_pointer (true);
12291 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12292 gen_rtx_MINUS (Pmode, tmp, tp));
12296 rtx caddr = ix86_tls_get_addr ();
12300 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12303 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12304 insns = get_insns ();
12307 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
12308 share the LD_BASE result with other LD model accesses. */
12309 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12310 UNSPEC_TLS_LD_BASE);
12312 RTL_CONST_CALL_P (insns) = 1;
12313 emit_libcall_block (insns, base, rax, eqv);
12316 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12319 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12320 off = gen_rtx_CONST (Pmode, off);
12322 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12324 if (TARGET_GNU2_TLS)
12326 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12328 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12332 case TLS_MODEL_INITIAL_EXEC:
12335 if (TARGET_SUN_TLS)
12337 /* The Sun linker took the AMD64 TLS spec literally
12338 and can only handle %rax as destination of the
12339 initial executable code sequence. */
12341 dest = gen_reg_rtx (Pmode);
12342 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12347 type = UNSPEC_GOTNTPOFF;
12351 if (reload_in_progress)
12352 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12353 pic = pic_offset_table_rtx;
12354 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12356 else if (!TARGET_ANY_GNU_TLS)
12358 pic = gen_reg_rtx (Pmode);
12359 emit_insn (gen_set_got (pic));
12360 type = UNSPEC_GOTTPOFF;
12365 type = UNSPEC_INDNTPOFF;
12368 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12369 off = gen_rtx_CONST (Pmode, off);
12371 off = gen_rtx_PLUS (Pmode, pic, off);
12372 off = gen_const_mem (Pmode, off);
12373 set_mem_alias_set (off, ix86_GOT_alias_set ());
12375 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12377 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12378 off = force_reg (Pmode, off);
12379 return gen_rtx_PLUS (Pmode, base, off);
12383 base = get_thread_pointer (true);
12384 dest = gen_reg_rtx (Pmode);
12385 emit_insn (gen_subsi3 (dest, base, off));
12389 case TLS_MODEL_LOCAL_EXEC:
12390 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12391 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12392 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12393 off = gen_rtx_CONST (Pmode, off);
12395 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12397 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12398 return gen_rtx_PLUS (Pmode, base, off);
12402 base = get_thread_pointer (true);
12403 dest = gen_reg_rtx (Pmode);
12404 emit_insn (gen_subsi3 (dest, base, off));
12409 gcc_unreachable ();
12415 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12418 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12419 htab_t dllimport_map;
12422 get_dllimport_decl (tree decl)
12424 struct tree_map *h, in;
12427 const char *prefix;
12428 size_t namelen, prefixlen;
12433 if (!dllimport_map)
12434 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12436 in.hash = htab_hash_pointer (decl);
12437 in.base.from = decl;
12438 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12439 h = (struct tree_map *) *loc;
12443 *loc = h = ggc_alloc_tree_map ();
12445 h->base.from = decl;
12446 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12447 VAR_DECL, NULL, ptr_type_node);
12448 DECL_ARTIFICIAL (to) = 1;
12449 DECL_IGNORED_P (to) = 1;
12450 DECL_EXTERNAL (to) = 1;
12451 TREE_READONLY (to) = 1;
12453 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12454 name = targetm.strip_name_encoding (name);
12455 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12456 ? "*__imp_" : "*__imp__";
12457 namelen = strlen (name);
12458 prefixlen = strlen (prefix);
12459 imp_name = (char *) alloca (namelen + prefixlen + 1);
12460 memcpy (imp_name, prefix, prefixlen);
12461 memcpy (imp_name + prefixlen, name, namelen + 1);
12463 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12464 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12465 SET_SYMBOL_REF_DECL (rtl, to);
12466 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12468 rtl = gen_const_mem (Pmode, rtl);
12469 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12471 SET_DECL_RTL (to, rtl);
12472 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12477 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12478 true if we require the result be a register. */
12481 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12486 gcc_assert (SYMBOL_REF_DECL (symbol));
12487 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12489 x = DECL_RTL (imp_decl);
12491 x = force_reg (Pmode, x);
12495 /* Try machine-dependent ways of modifying an illegitimate address
12496 to be legitimate. If we find one, return the new, valid address.
12497 This macro is used in only one place: `memory_address' in explow.c.
12499 OLDX is the address as it was before break_out_memory_refs was called.
12500 In some cases it is useful to look at this to decide what needs to be done.
12502 It is always safe for this macro to do nothing. It exists to recognize
12503 opportunities to optimize the output.
12505 For the 80386, we handle X+REG by loading X into a register R and
12506 using R+REG. R will go in a general reg and indexing will be used.
12507 However, if REG is a broken-out memory address or multiplication,
12508 nothing needs to be done because REG can certainly go in a general reg.
12510 When -fpic is used, special handling is needed for symbolic references.
12511 See comments by legitimize_pic_address in i386.c for details. */
12514 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12515 enum machine_mode mode)
12520 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12522 return legitimize_tls_address (x, (enum tls_model) log, false);
12523 if (GET_CODE (x) == CONST
12524 && GET_CODE (XEXP (x, 0)) == PLUS
12525 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12526 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12528 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12529 (enum tls_model) log, false);
12530 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12533 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12535 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12536 return legitimize_dllimport_symbol (x, true);
12537 if (GET_CODE (x) == CONST
12538 && GET_CODE (XEXP (x, 0)) == PLUS
12539 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12540 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12542 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12543 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12547 if (flag_pic && SYMBOLIC_CONST (x))
12548 return legitimize_pic_address (x, 0);
12551 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12552 return machopic_indirect_data_reference (x, 0);
12555 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12556 if (GET_CODE (x) == ASHIFT
12557 && CONST_INT_P (XEXP (x, 1))
12558 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12561 log = INTVAL (XEXP (x, 1));
12562 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12563 GEN_INT (1 << log));
12566 if (GET_CODE (x) == PLUS)
12568 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12570 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12571 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12572 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12575 log = INTVAL (XEXP (XEXP (x, 0), 1));
12576 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12577 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12578 GEN_INT (1 << log));
12581 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12582 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12583 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12586 log = INTVAL (XEXP (XEXP (x, 1), 1));
12587 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12588 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12589 GEN_INT (1 << log));
12592 /* Put multiply first if it isn't already. */
12593 if (GET_CODE (XEXP (x, 1)) == MULT)
12595 rtx tmp = XEXP (x, 0);
12596 XEXP (x, 0) = XEXP (x, 1);
12601 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12602 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12603 created by virtual register instantiation, register elimination, and
12604 similar optimizations. */
12605 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12608 x = gen_rtx_PLUS (Pmode,
12609 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12610 XEXP (XEXP (x, 1), 0)),
12611 XEXP (XEXP (x, 1), 1));
12615 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12616 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12617 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12618 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12619 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12620 && CONSTANT_P (XEXP (x, 1)))
12623 rtx other = NULL_RTX;
12625 if (CONST_INT_P (XEXP (x, 1)))
12627 constant = XEXP (x, 1);
12628 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12630 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12632 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12633 other = XEXP (x, 1);
12641 x = gen_rtx_PLUS (Pmode,
12642 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12643 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12644 plus_constant (other, INTVAL (constant)));
12648 if (changed && ix86_legitimate_address_p (mode, x, false))
12651 if (GET_CODE (XEXP (x, 0)) == MULT)
12654 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12657 if (GET_CODE (XEXP (x, 1)) == MULT)
12660 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12664 && REG_P (XEXP (x, 1))
12665 && REG_P (XEXP (x, 0)))
12668 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12671 x = legitimize_pic_address (x, 0);
12674 if (changed && ix86_legitimate_address_p (mode, x, false))
12677 if (REG_P (XEXP (x, 0)))
12679 rtx temp = gen_reg_rtx (Pmode);
12680 rtx val = force_operand (XEXP (x, 1), temp);
12683 if (GET_MODE (val) != Pmode)
12684 val = convert_to_mode (Pmode, val, 1);
12685 emit_move_insn (temp, val);
12688 XEXP (x, 1) = temp;
12692 else if (REG_P (XEXP (x, 1)))
12694 rtx temp = gen_reg_rtx (Pmode);
12695 rtx val = force_operand (XEXP (x, 0), temp);
12698 if (GET_MODE (val) != Pmode)
12699 val = convert_to_mode (Pmode, val, 1);
12700 emit_move_insn (temp, val);
12703 XEXP (x, 0) = temp;
12711 /* Print an integer constant expression in assembler syntax. Addition
12712 and subtraction are the only arithmetic that may appear in these
12713 expressions. FILE is the stdio stream to write to, X is the rtx, and
12714 CODE is the operand print code from the output string. */
12717 output_pic_addr_const (FILE *file, rtx x, int code)
12721 switch (GET_CODE (x))
12724 gcc_assert (flag_pic);
12729 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12730 output_addr_const (file, x);
12733 const char *name = XSTR (x, 0);
12735 /* Mark the decl as referenced so that cgraph will
12736 output the function. */
12737 if (SYMBOL_REF_DECL (x))
12738 mark_decl_referenced (SYMBOL_REF_DECL (x));
12741 if (MACHOPIC_INDIRECT
12742 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12743 name = machopic_indirection_name (x, /*stub_p=*/true);
12745 assemble_name (file, name);
12747 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12748 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12749 fputs ("@PLT", file);
12756 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12757 assemble_name (asm_out_file, buf);
12761 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12765 /* This used to output parentheses around the expression,
12766 but that does not work on the 386 (either ATT or BSD assembler). */
12767 output_pic_addr_const (file, XEXP (x, 0), code);
12771 if (GET_MODE (x) == VOIDmode)
12773 /* We can use %d if the number is <32 bits and positive. */
12774 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12775 fprintf (file, "0x%lx%08lx",
12776 (unsigned long) CONST_DOUBLE_HIGH (x),
12777 (unsigned long) CONST_DOUBLE_LOW (x));
12779 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12782 /* We can't handle floating point constants;
12783 TARGET_PRINT_OPERAND must handle them. */
12784 output_operand_lossage ("floating constant misused");
12788 /* Some assemblers need integer constants to appear first. */
12789 if (CONST_INT_P (XEXP (x, 0)))
12791 output_pic_addr_const (file, XEXP (x, 0), code);
12793 output_pic_addr_const (file, XEXP (x, 1), code);
12797 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12798 output_pic_addr_const (file, XEXP (x, 1), code);
12800 output_pic_addr_const (file, XEXP (x, 0), code);
12806 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12807 output_pic_addr_const (file, XEXP (x, 0), code);
12809 output_pic_addr_const (file, XEXP (x, 1), code);
12811 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12815 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12817 bool f = i386_asm_output_addr_const_extra (file, x);
12822 gcc_assert (XVECLEN (x, 0) == 1);
12823 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12824 switch (XINT (x, 1))
12827 fputs ("@GOT", file);
12829 case UNSPEC_GOTOFF:
12830 fputs ("@GOTOFF", file);
12832 case UNSPEC_PLTOFF:
12833 fputs ("@PLTOFF", file);
12836 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12837 "(%rip)" : "[rip]", file);
12839 case UNSPEC_GOTPCREL:
12840 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12841 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12843 case UNSPEC_GOTTPOFF:
12844 /* FIXME: This might be @TPOFF in Sun ld too. */
12845 fputs ("@gottpoff", file);
12848 fputs ("@tpoff", file);
12850 case UNSPEC_NTPOFF:
12852 fputs ("@tpoff", file);
12854 fputs ("@ntpoff", file);
12856 case UNSPEC_DTPOFF:
12857 fputs ("@dtpoff", file);
12859 case UNSPEC_GOTNTPOFF:
12861 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12862 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12864 fputs ("@gotntpoff", file);
12866 case UNSPEC_INDNTPOFF:
12867 fputs ("@indntpoff", file);
12870 case UNSPEC_MACHOPIC_OFFSET:
12872 machopic_output_function_base_name (file);
12876 output_operand_lossage ("invalid UNSPEC as operand");
12882 output_operand_lossage ("invalid expression as operand");
12886 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12887 We need to emit DTP-relative relocations. */
12889 static void ATTRIBUTE_UNUSED
12890 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12892 fputs (ASM_LONG, file);
12893 output_addr_const (file, x);
12894 fputs ("@dtpoff", file);
12900 fputs (", 0", file);
12903 gcc_unreachable ();
12907 /* Return true if X is a representation of the PIC register. This copes
12908 with calls from ix86_find_base_term, where the register might have
12909 been replaced by a cselib value. */
12912 ix86_pic_register_p (rtx x)
12914 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12915 return (pic_offset_table_rtx
12916 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12918 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12921 /* Helper function for ix86_delegitimize_address.
12922 Attempt to delegitimize TLS local-exec accesses. */
12925 ix86_delegitimize_tls_address (rtx orig_x)
12927 rtx x = orig_x, unspec;
12928 struct ix86_address addr;
12930 if (!TARGET_TLS_DIRECT_SEG_REFS)
12934 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12936 if (ix86_decompose_address (x, &addr) == 0
12937 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
12938 || addr.disp == NULL_RTX
12939 || GET_CODE (addr.disp) != CONST)
12941 unspec = XEXP (addr.disp, 0);
12942 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12943 unspec = XEXP (unspec, 0);
12944 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12946 x = XVECEXP (unspec, 0, 0);
12947 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12948 if (unspec != XEXP (addr.disp, 0))
12949 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12952 rtx idx = addr.index;
12953 if (addr.scale != 1)
12954 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12955 x = gen_rtx_PLUS (Pmode, idx, x);
12958 x = gen_rtx_PLUS (Pmode, addr.base, x);
12959 if (MEM_P (orig_x))
12960 x = replace_equiv_address_nv (orig_x, x);
12964 /* In the name of slightly smaller debug output, and to cater to
12965 general assembler lossage, recognize PIC+GOTOFF and turn it back
12966 into a direct symbol reference.
12968 On Darwin, this is necessary to avoid a crash, because Darwin
12969 has a different PIC label for each routine but the DWARF debugging
12970 information is not associated with any particular routine, so it's
12971 necessary to remove references to the PIC label from RTL stored by
12972 the DWARF output code. */
12975 ix86_delegitimize_address (rtx x)
12977 rtx orig_x = delegitimize_mem_from_attrs (x);
12978 /* addend is NULL or some rtx if x is something+GOTOFF where
12979 something doesn't include the PIC register. */
12980 rtx addend = NULL_RTX;
12981 /* reg_addend is NULL or a multiple of some register. */
12982 rtx reg_addend = NULL_RTX;
12983 /* const_addend is NULL or a const_int. */
12984 rtx const_addend = NULL_RTX;
12985 /* This is the result, or NULL. */
12986 rtx result = NULL_RTX;
12995 if (GET_CODE (x) != CONST
12996 || GET_CODE (XEXP (x, 0)) != UNSPEC
12997 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
12998 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
12999 || !MEM_P (orig_x))
13000 return ix86_delegitimize_tls_address (orig_x);
13001 x = XVECEXP (XEXP (x, 0), 0, 0);
13002 if (GET_MODE (orig_x) != GET_MODE (x))
13004 x = simplify_gen_subreg (GET_MODE (orig_x), x,
13012 if (GET_CODE (x) != PLUS
13013 || GET_CODE (XEXP (x, 1)) != CONST)
13014 return ix86_delegitimize_tls_address (orig_x);
13016 if (ix86_pic_register_p (XEXP (x, 0)))
13017 /* %ebx + GOT/GOTOFF */
13019 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13021 /* %ebx + %reg * scale + GOT/GOTOFF */
13022 reg_addend = XEXP (x, 0);
13023 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13024 reg_addend = XEXP (reg_addend, 1);
13025 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13026 reg_addend = XEXP (reg_addend, 0);
13029 reg_addend = NULL_RTX;
13030 addend = XEXP (x, 0);
13034 addend = XEXP (x, 0);
13036 x = XEXP (XEXP (x, 1), 0);
13037 if (GET_CODE (x) == PLUS
13038 && CONST_INT_P (XEXP (x, 1)))
13040 const_addend = XEXP (x, 1);
13044 if (GET_CODE (x) == UNSPEC
13045 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13046 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13047 result = XVECEXP (x, 0, 0);
13049 if (TARGET_MACHO && darwin_local_data_pic (x)
13050 && !MEM_P (orig_x))
13051 result = XVECEXP (x, 0, 0);
13054 return ix86_delegitimize_tls_address (orig_x);
13057 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13059 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13062 /* If the rest of original X doesn't involve the PIC register, add
13063 addend and subtract pic_offset_table_rtx. This can happen e.g.
13065 leal (%ebx, %ecx, 4), %ecx
13067 movl foo@GOTOFF(%ecx), %edx
13068 in which case we return (%ecx - %ebx) + foo. */
13069 if (pic_offset_table_rtx)
13070 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13071 pic_offset_table_rtx),
13076 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13078 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13079 if (result == NULL_RTX)
13085 /* If X is a machine specific address (i.e. a symbol or label being
13086 referenced as a displacement from the GOT implemented using an
13087 UNSPEC), then return the base term. Otherwise return X. */
13090 ix86_find_base_term (rtx x)
13096 if (GET_CODE (x) != CONST)
13098 term = XEXP (x, 0);
13099 if (GET_CODE (term) == PLUS
13100 && (CONST_INT_P (XEXP (term, 1))
13101 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13102 term = XEXP (term, 0);
13103 if (GET_CODE (term) != UNSPEC
13104 || (XINT (term, 1) != UNSPEC_GOTPCREL
13105 && XINT (term, 1) != UNSPEC_PCREL))
13108 return XVECEXP (term, 0, 0);
13111 return ix86_delegitimize_address (x);
13115 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13116 int fp, FILE *file)
13118 const char *suffix;
13120 if (mode == CCFPmode || mode == CCFPUmode)
13122 code = ix86_fp_compare_code_to_integer (code);
13126 code = reverse_condition (code);
13177 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13181 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13182 Those same assemblers have the same but opposite lossage on cmov. */
13183 if (mode == CCmode)
13184 suffix = fp ? "nbe" : "a";
13185 else if (mode == CCCmode)
13188 gcc_unreachable ();
13204 gcc_unreachable ();
13208 gcc_assert (mode == CCmode || mode == CCCmode);
13225 gcc_unreachable ();
13229 /* ??? As above. */
13230 gcc_assert (mode == CCmode || mode == CCCmode);
13231 suffix = fp ? "nb" : "ae";
13234 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13238 /* ??? As above. */
13239 if (mode == CCmode)
13241 else if (mode == CCCmode)
13242 suffix = fp ? "nb" : "ae";
13244 gcc_unreachable ();
13247 suffix = fp ? "u" : "p";
13250 suffix = fp ? "nu" : "np";
13253 gcc_unreachable ();
13255 fputs (suffix, file);
13258 /* Print the name of register X to FILE based on its machine mode and number.
13259 If CODE is 'w', pretend the mode is HImode.
13260 If CODE is 'b', pretend the mode is QImode.
13261 If CODE is 'k', pretend the mode is SImode.
13262 If CODE is 'q', pretend the mode is DImode.
13263 If CODE is 'x', pretend the mode is V4SFmode.
13264 If CODE is 't', pretend the mode is V8SFmode.
13265 If CODE is 'h', pretend the reg is the 'high' byte register.
13266 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13267 If CODE is 'd', duplicate the operand for AVX instruction.
13271 print_reg (rtx x, int code, FILE *file)
13274 bool duplicated = code == 'd' && TARGET_AVX;
13276 gcc_assert (x == pc_rtx
13277 || (REGNO (x) != ARG_POINTER_REGNUM
13278 && REGNO (x) != FRAME_POINTER_REGNUM
13279 && REGNO (x) != FLAGS_REG
13280 && REGNO (x) != FPSR_REG
13281 && REGNO (x) != FPCR_REG));
13283 if (ASSEMBLER_DIALECT == ASM_ATT)
13288 gcc_assert (TARGET_64BIT);
13289 fputs ("rip", file);
13293 if (code == 'w' || MMX_REG_P (x))
13295 else if (code == 'b')
13297 else if (code == 'k')
13299 else if (code == 'q')
13301 else if (code == 'y')
13303 else if (code == 'h')
13305 else if (code == 'x')
13307 else if (code == 't')
13310 code = GET_MODE_SIZE (GET_MODE (x));
13312 /* Irritatingly, AMD extended registers use different naming convention
13313 from the normal registers. */
13314 if (REX_INT_REG_P (x))
13316 gcc_assert (TARGET_64BIT);
13320 error ("extended registers have no high halves");
13323 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13326 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13329 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13332 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13335 error ("unsupported operand size for extended register");
13345 if (STACK_TOP_P (x))
13354 if (! ANY_FP_REG_P (x))
13355 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13360 reg = hi_reg_name[REGNO (x)];
13363 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13365 reg = qi_reg_name[REGNO (x)];
13368 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13370 reg = qi_high_reg_name[REGNO (x)];
13375 gcc_assert (!duplicated);
13377 fputs (hi_reg_name[REGNO (x)] + 1, file);
13382 gcc_unreachable ();
13388 if (ASSEMBLER_DIALECT == ASM_ATT)
13389 fprintf (file, ", %%%s", reg);
13391 fprintf (file, ", %s", reg);
13395 /* Locate some local-dynamic symbol still in use by this function
13396 so that we can print its name in some tls_local_dynamic_base
13400 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13404 if (GET_CODE (x) == SYMBOL_REF
13405 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13407 cfun->machine->some_ld_name = XSTR (x, 0);
13414 static const char *
13415 get_some_local_dynamic_name (void)
13419 if (cfun->machine->some_ld_name)
13420 return cfun->machine->some_ld_name;
13422 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13423 if (NONDEBUG_INSN_P (insn)
13424 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13425 return cfun->machine->some_ld_name;
13430 /* Meaning of CODE:
13431 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13432 C -- print opcode suffix for set/cmov insn.
13433 c -- like C, but print reversed condition
13434 F,f -- likewise, but for floating-point.
13435 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13437 R -- print the prefix for register names.
13438 z -- print the opcode suffix for the size of the current operand.
13439 Z -- likewise, with special suffixes for x87 instructions.
13440 * -- print a star (in certain assembler syntax)
13441 A -- print an absolute memory reference.
13442 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13443 s -- print a shift double count, followed by the assemblers argument
13445 b -- print the QImode name of the register for the indicated operand.
13446 %b0 would print %al if operands[0] is reg 0.
13447 w -- likewise, print the HImode name of the register.
13448 k -- likewise, print the SImode name of the register.
13449 q -- likewise, print the DImode name of the register.
13450 x -- likewise, print the V4SFmode name of the register.
13451 t -- likewise, print the V8SFmode name of the register.
13452 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13453 y -- print "st(0)" instead of "st" as a register.
13454 d -- print duplicated register operand for AVX instruction.
13455 D -- print condition for SSE cmp instruction.
13456 P -- if PIC, print an @PLT suffix.
13457 p -- print raw symbol name.
13458 X -- don't print any sort of PIC '@' suffix for a symbol.
13459 & -- print some in-use local-dynamic symbol name.
13460 H -- print a memory address offset by 8; used for sse high-parts
13461 Y -- print condition for XOP pcom* instruction.
13462 + -- print a branch hint as 'cs' or 'ds' prefix
13463 ; -- print a semicolon (after prefixes due to bug in older gas).
13464 @ -- print a segment register of thread base pointer load
13468 ix86_print_operand (FILE *file, rtx x, int code)
13475 if (ASSEMBLER_DIALECT == ASM_ATT)
13481 const char *name = get_some_local_dynamic_name ();
13483 output_operand_lossage ("'%%&' used without any "
13484 "local dynamic TLS references");
13486 assemble_name (file, name);
13491 switch (ASSEMBLER_DIALECT)
13498 /* Intel syntax. For absolute addresses, registers should not
13499 be surrounded by braces. */
13503 ix86_print_operand (file, x, 0);
13510 gcc_unreachable ();
13513 ix86_print_operand (file, x, 0);
13518 if (ASSEMBLER_DIALECT == ASM_ATT)
13523 if (ASSEMBLER_DIALECT == ASM_ATT)
13528 if (ASSEMBLER_DIALECT == ASM_ATT)
13533 if (ASSEMBLER_DIALECT == ASM_ATT)
13538 if (ASSEMBLER_DIALECT == ASM_ATT)
13543 if (ASSEMBLER_DIALECT == ASM_ATT)
13548 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13550 /* Opcodes don't get size suffixes if using Intel opcodes. */
13551 if (ASSEMBLER_DIALECT == ASM_INTEL)
13554 switch (GET_MODE_SIZE (GET_MODE (x)))
13573 output_operand_lossage
13574 ("invalid operand size for operand code '%c'", code);
13579 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13581 (0, "non-integer operand used with operand code '%c'", code);
13585 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13586 if (ASSEMBLER_DIALECT == ASM_INTEL)
13589 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13591 switch (GET_MODE_SIZE (GET_MODE (x)))
13594 #ifdef HAVE_AS_IX86_FILDS
13604 #ifdef HAVE_AS_IX86_FILDQ
13607 fputs ("ll", file);
13615 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13617 /* 387 opcodes don't get size suffixes
13618 if the operands are registers. */
13619 if (STACK_REG_P (x))
13622 switch (GET_MODE_SIZE (GET_MODE (x)))
13643 output_operand_lossage
13644 ("invalid operand type used with operand code '%c'", code);
13648 output_operand_lossage
13649 ("invalid operand size for operand code '%c'", code);
13667 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13669 ix86_print_operand (file, x, 0);
13670 fputs (", ", file);
13675 /* Little bit of braindamage here. The SSE compare instructions
13676 does use completely different names for the comparisons that the
13677 fp conditional moves. */
13680 switch (GET_CODE (x))
13683 fputs ("eq", file);
13686 fputs ("eq_us", file);
13689 fputs ("lt", file);
13692 fputs ("nge", file);
13695 fputs ("le", file);
13698 fputs ("ngt", file);
13701 fputs ("unord", file);
13704 fputs ("neq", file);
13707 fputs ("neq_oq", file);
13710 fputs ("ge", file);
13713 fputs ("nlt", file);
13716 fputs ("gt", file);
13719 fputs ("nle", file);
13722 fputs ("ord", file);
13725 output_operand_lossage ("operand is not a condition code, "
13726 "invalid operand code 'D'");
13732 switch (GET_CODE (x))
13736 fputs ("eq", file);
13740 fputs ("lt", file);
13744 fputs ("le", file);
13747 fputs ("unord", file);
13751 fputs ("neq", file);
13755 fputs ("nlt", file);
13759 fputs ("nle", file);
13762 fputs ("ord", file);
13765 output_operand_lossage ("operand is not a condition code, "
13766 "invalid operand code 'D'");
13772 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13773 if (ASSEMBLER_DIALECT == ASM_ATT)
13775 switch (GET_MODE (x))
13777 case HImode: putc ('w', file); break;
13779 case SFmode: putc ('l', file); break;
13781 case DFmode: putc ('q', file); break;
13782 default: gcc_unreachable ();
13789 if (!COMPARISON_P (x))
13791 output_operand_lossage ("operand is neither a constant nor a "
13792 "condition code, invalid operand code "
13796 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13799 if (!COMPARISON_P (x))
13801 output_operand_lossage ("operand is neither a constant nor a "
13802 "condition code, invalid operand code "
13806 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13807 if (ASSEMBLER_DIALECT == ASM_ATT)
13810 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13813 /* Like above, but reverse condition */
13815 /* Check to see if argument to %c is really a constant
13816 and not a condition code which needs to be reversed. */
13817 if (!COMPARISON_P (x))
13819 output_operand_lossage ("operand is neither a constant nor a "
13820 "condition code, invalid operand "
13824 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13827 if (!COMPARISON_P (x))
13829 output_operand_lossage ("operand is neither a constant nor a "
13830 "condition code, invalid operand "
13834 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13835 if (ASSEMBLER_DIALECT == ASM_ATT)
13838 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13842 /* It doesn't actually matter what mode we use here, as we're
13843 only going to use this for printing. */
13844 x = adjust_address_nv (x, DImode, 8);
13852 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13855 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13858 int pred_val = INTVAL (XEXP (x, 0));
13860 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13861 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13863 int taken = pred_val > REG_BR_PROB_BASE / 2;
13864 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13866 /* Emit hints only in the case default branch prediction
13867 heuristics would fail. */
13868 if (taken != cputaken)
13870 /* We use 3e (DS) prefix for taken branches and
13871 2e (CS) prefix for not taken branches. */
13873 fputs ("ds ; ", file);
13875 fputs ("cs ; ", file);
13883 switch (GET_CODE (x))
13886 fputs ("neq", file);
13889 fputs ("eq", file);
13893 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13897 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13901 fputs ("le", file);
13905 fputs ("lt", file);
13908 fputs ("unord", file);
13911 fputs ("ord", file);
13914 fputs ("ueq", file);
13917 fputs ("nlt", file);
13920 fputs ("nle", file);
13923 fputs ("ule", file);
13926 fputs ("ult", file);
13929 fputs ("une", file);
13932 output_operand_lossage ("operand is not a condition code, "
13933 "invalid operand code 'Y'");
13939 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13945 if (ASSEMBLER_DIALECT == ASM_ATT)
13948 /* The kernel uses a different segment register for performance
13949 reasons; a system call would not have to trash the userspace
13950 segment register, which would be expensive. */
13951 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13952 fputs ("fs", file);
13954 fputs ("gs", file);
13958 output_operand_lossage ("invalid operand code '%c'", code);
13963 print_reg (x, code, file);
13965 else if (MEM_P (x))
13967 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13968 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
13969 && GET_MODE (x) != BLKmode)
13972 switch (GET_MODE_SIZE (GET_MODE (x)))
13974 case 1: size = "BYTE"; break;
13975 case 2: size = "WORD"; break;
13976 case 4: size = "DWORD"; break;
13977 case 8: size = "QWORD"; break;
13978 case 12: size = "TBYTE"; break;
13980 if (GET_MODE (x) == XFmode)
13985 case 32: size = "YMMWORD"; break;
13987 gcc_unreachable ();
13990 /* Check for explicit size override (codes 'b', 'w' and 'k') */
13993 else if (code == 'w')
13995 else if (code == 'k')
13998 fputs (size, file);
13999 fputs (" PTR ", file);
14003 /* Avoid (%rip) for call operands. */
14004 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14005 && !CONST_INT_P (x))
14006 output_addr_const (file, x);
14007 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14008 output_operand_lossage ("invalid constraints for operand");
14010 output_address (x);
14013 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14018 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14019 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14021 if (ASSEMBLER_DIALECT == ASM_ATT)
14023 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14025 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14027 fprintf (file, "0x%08x", (unsigned int) l);
14030 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14035 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14036 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14038 if (ASSEMBLER_DIALECT == ASM_ATT)
14040 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14043 /* These float cases don't actually occur as immediate operands. */
14044 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14048 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14049 fputs (dstr, file);
14054 /* We have patterns that allow zero sets of memory, for instance.
14055 In 64-bit mode, we should probably support all 8-byte vectors,
14056 since we can in fact encode that into an immediate. */
14057 if (GET_CODE (x) == CONST_VECTOR)
14059 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14063 if (code != 'P' && code != 'p')
14065 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14067 if (ASSEMBLER_DIALECT == ASM_ATT)
14070 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14071 || GET_CODE (x) == LABEL_REF)
14073 if (ASSEMBLER_DIALECT == ASM_ATT)
14076 fputs ("OFFSET FLAT:", file);
14079 if (CONST_INT_P (x))
14080 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14081 else if (flag_pic || MACHOPIC_INDIRECT)
14082 output_pic_addr_const (file, x, code);
14084 output_addr_const (file, x);
14089 ix86_print_operand_punct_valid_p (unsigned char code)
14091 return (code == '@' || code == '*' || code == '+'
14092 || code == '&' || code == ';');
14095 /* Print a memory operand whose address is ADDR. */
14098 ix86_print_operand_address (FILE *file, rtx addr)
14100 struct ix86_address parts;
14101 rtx base, index, disp;
14103 int ok = ix86_decompose_address (addr, &parts);
14107 if (parts.base && GET_CODE (parts.base) == SUBREG)
14109 rtx tmp = SUBREG_REG (parts.base);
14110 parts.base = simplify_subreg (GET_MODE (parts.base),
14111 tmp, GET_MODE (tmp), 0);
14114 if (parts.index && GET_CODE (parts.index) == SUBREG)
14116 rtx tmp = SUBREG_REG (parts.index);
14117 parts.index = simplify_subreg (GET_MODE (parts.index),
14118 tmp, GET_MODE (tmp), 0);
14122 index = parts.index;
14124 scale = parts.scale;
14132 if (ASSEMBLER_DIALECT == ASM_ATT)
14134 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14137 gcc_unreachable ();
14140 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14141 if (TARGET_64BIT && !base && !index)
14145 if (GET_CODE (disp) == CONST
14146 && GET_CODE (XEXP (disp, 0)) == PLUS
14147 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14148 symbol = XEXP (XEXP (disp, 0), 0);
14150 if (GET_CODE (symbol) == LABEL_REF
14151 || (GET_CODE (symbol) == SYMBOL_REF
14152 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14155 if (!base && !index)
14157 /* Displacement only requires special attention. */
14159 if (CONST_INT_P (disp))
14161 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14162 fputs ("ds:", file);
14163 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14166 output_pic_addr_const (file, disp, 0);
14168 output_addr_const (file, disp);
14174 /* Print SImode registers for zero-extended addresses to force
14175 addr32 prefix. Otherwise print DImode registers to avoid it. */
14177 code = (GET_CODE (addr) == ZERO_EXTEND) ? 'l' : 'q';
14179 if (ASSEMBLER_DIALECT == ASM_ATT)
14184 output_pic_addr_const (file, disp, 0);
14185 else if (GET_CODE (disp) == LABEL_REF)
14186 output_asm_label (disp);
14188 output_addr_const (file, disp);
14193 print_reg (base, code, file);
14197 print_reg (index, code, file);
14199 fprintf (file, ",%d", scale);
14205 rtx offset = NULL_RTX;
14209 /* Pull out the offset of a symbol; print any symbol itself. */
14210 if (GET_CODE (disp) == CONST
14211 && GET_CODE (XEXP (disp, 0)) == PLUS
14212 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14214 offset = XEXP (XEXP (disp, 0), 1);
14215 disp = gen_rtx_CONST (VOIDmode,
14216 XEXP (XEXP (disp, 0), 0));
14220 output_pic_addr_const (file, disp, 0);
14221 else if (GET_CODE (disp) == LABEL_REF)
14222 output_asm_label (disp);
14223 else if (CONST_INT_P (disp))
14226 output_addr_const (file, disp);
14232 print_reg (base, code, file);
14235 if (INTVAL (offset) >= 0)
14237 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14241 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14248 print_reg (index, code, file);
14250 fprintf (file, "*%d", scale);
14257 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14260 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14264 if (GET_CODE (x) != UNSPEC)
14267 op = XVECEXP (x, 0, 0);
14268 switch (XINT (x, 1))
14270 case UNSPEC_GOTTPOFF:
14271 output_addr_const (file, op);
14272 /* FIXME: This might be @TPOFF in Sun ld. */
14273 fputs ("@gottpoff", file);
14276 output_addr_const (file, op);
14277 fputs ("@tpoff", file);
14279 case UNSPEC_NTPOFF:
14280 output_addr_const (file, op);
14282 fputs ("@tpoff", file);
14284 fputs ("@ntpoff", file);
14286 case UNSPEC_DTPOFF:
14287 output_addr_const (file, op);
14288 fputs ("@dtpoff", file);
14290 case UNSPEC_GOTNTPOFF:
14291 output_addr_const (file, op);
14293 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14294 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14296 fputs ("@gotntpoff", file);
14298 case UNSPEC_INDNTPOFF:
14299 output_addr_const (file, op);
14300 fputs ("@indntpoff", file);
14303 case UNSPEC_MACHOPIC_OFFSET:
14304 output_addr_const (file, op);
14306 machopic_output_function_base_name (file);
14310 case UNSPEC_STACK_CHECK:
14314 gcc_assert (flag_split_stack);
14316 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14317 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14319 gcc_unreachable ();
14322 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14333 /* Split one or more double-mode RTL references into pairs of half-mode
14334 references. The RTL can be REG, offsettable MEM, integer constant, or
14335 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14336 split and "num" is its length. lo_half and hi_half are output arrays
14337 that parallel "operands". */
14340 split_double_mode (enum machine_mode mode, rtx operands[],
14341 int num, rtx lo_half[], rtx hi_half[])
14343 enum machine_mode half_mode;
14349 half_mode = DImode;
14352 half_mode = SImode;
14355 gcc_unreachable ();
14358 byte = GET_MODE_SIZE (half_mode);
14362 rtx op = operands[num];
14364 /* simplify_subreg refuse to split volatile memory addresses,
14365 but we still have to handle it. */
14368 lo_half[num] = adjust_address (op, half_mode, 0);
14369 hi_half[num] = adjust_address (op, half_mode, byte);
14373 lo_half[num] = simplify_gen_subreg (half_mode, op,
14374 GET_MODE (op) == VOIDmode
14375 ? mode : GET_MODE (op), 0);
14376 hi_half[num] = simplify_gen_subreg (half_mode, op,
14377 GET_MODE (op) == VOIDmode
14378 ? mode : GET_MODE (op), byte);
14383 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14384 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14385 is the expression of the binary operation. The output may either be
14386 emitted here, or returned to the caller, like all output_* functions.
14388 There is no guarantee that the operands are the same mode, as they
14389 might be within FLOAT or FLOAT_EXTEND expressions. */
14391 #ifndef SYSV386_COMPAT
14392 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14393 wants to fix the assemblers because that causes incompatibility
14394 with gcc. No-one wants to fix gcc because that causes
14395 incompatibility with assemblers... You can use the option of
14396 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14397 #define SYSV386_COMPAT 1
14401 output_387_binary_op (rtx insn, rtx *operands)
14403 static char buf[40];
14406 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14408 #ifdef ENABLE_CHECKING
14409 /* Even if we do not want to check the inputs, this documents input
14410 constraints. Which helps in understanding the following code. */
14411 if (STACK_REG_P (operands[0])
14412 && ((REG_P (operands[1])
14413 && REGNO (operands[0]) == REGNO (operands[1])
14414 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14415 || (REG_P (operands[2])
14416 && REGNO (operands[0]) == REGNO (operands[2])
14417 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14418 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14421 gcc_assert (is_sse);
14424 switch (GET_CODE (operands[3]))
14427 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14428 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14436 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14437 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14445 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14446 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14454 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14455 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14463 gcc_unreachable ();
14470 strcpy (buf, ssep);
14471 if (GET_MODE (operands[0]) == SFmode)
14472 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14474 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14478 strcpy (buf, ssep + 1);
14479 if (GET_MODE (operands[0]) == SFmode)
14480 strcat (buf, "ss\t{%2, %0|%0, %2}");
14482 strcat (buf, "sd\t{%2, %0|%0, %2}");
14488 switch (GET_CODE (operands[3]))
14492 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14494 rtx temp = operands[2];
14495 operands[2] = operands[1];
14496 operands[1] = temp;
14499 /* know operands[0] == operands[1]. */
14501 if (MEM_P (operands[2]))
14507 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14509 if (STACK_TOP_P (operands[0]))
14510 /* How is it that we are storing to a dead operand[2]?
14511 Well, presumably operands[1] is dead too. We can't
14512 store the result to st(0) as st(0) gets popped on this
14513 instruction. Instead store to operands[2] (which I
14514 think has to be st(1)). st(1) will be popped later.
14515 gcc <= 2.8.1 didn't have this check and generated
14516 assembly code that the Unixware assembler rejected. */
14517 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14519 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14523 if (STACK_TOP_P (operands[0]))
14524 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14526 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14531 if (MEM_P (operands[1]))
14537 if (MEM_P (operands[2]))
14543 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14546 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14547 derived assemblers, confusingly reverse the direction of
14548 the operation for fsub{r} and fdiv{r} when the
14549 destination register is not st(0). The Intel assembler
14550 doesn't have this brain damage. Read !SYSV386_COMPAT to
14551 figure out what the hardware really does. */
14552 if (STACK_TOP_P (operands[0]))
14553 p = "{p\t%0, %2|rp\t%2, %0}";
14555 p = "{rp\t%2, %0|p\t%0, %2}";
14557 if (STACK_TOP_P (operands[0]))
14558 /* As above for fmul/fadd, we can't store to st(0). */
14559 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14561 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14566 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14569 if (STACK_TOP_P (operands[0]))
14570 p = "{rp\t%0, %1|p\t%1, %0}";
14572 p = "{p\t%1, %0|rp\t%0, %1}";
14574 if (STACK_TOP_P (operands[0]))
14575 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14577 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14582 if (STACK_TOP_P (operands[0]))
14584 if (STACK_TOP_P (operands[1]))
14585 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14587 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14590 else if (STACK_TOP_P (operands[1]))
14593 p = "{\t%1, %0|r\t%0, %1}";
14595 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14601 p = "{r\t%2, %0|\t%0, %2}";
14603 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14609 gcc_unreachable ();
14616 /* Return needed mode for entity in optimize_mode_switching pass. */
14619 ix86_mode_needed (int entity, rtx insn)
14621 enum attr_i387_cw mode;
14623 /* The mode UNINITIALIZED is used to store control word after a
14624 function call or ASM pattern. The mode ANY specify that function
14625 has no requirements on the control word and make no changes in the
14626 bits we are interested in. */
14629 || (NONJUMP_INSN_P (insn)
14630 && (asm_noperands (PATTERN (insn)) >= 0
14631 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14632 return I387_CW_UNINITIALIZED;
14634 if (recog_memoized (insn) < 0)
14635 return I387_CW_ANY;
14637 mode = get_attr_i387_cw (insn);
14642 if (mode == I387_CW_TRUNC)
14647 if (mode == I387_CW_FLOOR)
14652 if (mode == I387_CW_CEIL)
14657 if (mode == I387_CW_MASK_PM)
14662 gcc_unreachable ();
14665 return I387_CW_ANY;
14668 /* Output code to initialize control word copies used by trunc?f?i and
14669 rounding patterns. CURRENT_MODE is set to current control word,
14670 while NEW_MODE is set to new control word. */
14673 emit_i387_cw_initialization (int mode)
14675 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14678 enum ix86_stack_slot slot;
14680 rtx reg = gen_reg_rtx (HImode);
14682 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14683 emit_move_insn (reg, copy_rtx (stored_mode));
14685 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14686 || optimize_function_for_size_p (cfun))
14690 case I387_CW_TRUNC:
14691 /* round toward zero (truncate) */
14692 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14693 slot = SLOT_CW_TRUNC;
14696 case I387_CW_FLOOR:
14697 /* round down toward -oo */
14698 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14699 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14700 slot = SLOT_CW_FLOOR;
14704 /* round up toward +oo */
14705 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14706 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14707 slot = SLOT_CW_CEIL;
14710 case I387_CW_MASK_PM:
14711 /* mask precision exception for nearbyint() */
14712 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14713 slot = SLOT_CW_MASK_PM;
14717 gcc_unreachable ();
14724 case I387_CW_TRUNC:
14725 /* round toward zero (truncate) */
14726 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14727 slot = SLOT_CW_TRUNC;
14730 case I387_CW_FLOOR:
14731 /* round down toward -oo */
14732 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14733 slot = SLOT_CW_FLOOR;
14737 /* round up toward +oo */
14738 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14739 slot = SLOT_CW_CEIL;
14742 case I387_CW_MASK_PM:
14743 /* mask precision exception for nearbyint() */
14744 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14745 slot = SLOT_CW_MASK_PM;
14749 gcc_unreachable ();
14753 gcc_assert (slot < MAX_386_STACK_LOCALS);
14755 new_mode = assign_386_stack_local (HImode, slot);
14756 emit_move_insn (new_mode, reg);
14759 /* Output code for INSN to convert a float to a signed int. OPERANDS
14760 are the insn operands. The output may be [HSD]Imode and the input
14761 operand may be [SDX]Fmode. */
14764 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
14766 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14767 int dimode_p = GET_MODE (operands[0]) == DImode;
14768 int round_mode = get_attr_i387_cw (insn);
14770 /* Jump through a hoop or two for DImode, since the hardware has no
14771 non-popping instruction. We used to do this a different way, but
14772 that was somewhat fragile and broke with post-reload splitters. */
14773 if ((dimode_p || fisttp) && !stack_top_dies)
14774 output_asm_insn ("fld\t%y1", operands);
14776 gcc_assert (STACK_TOP_P (operands[1]));
14777 gcc_assert (MEM_P (operands[0]));
14778 gcc_assert (GET_MODE (operands[1]) != TFmode);
14781 output_asm_insn ("fisttp%Z0\t%0", operands);
14784 if (round_mode != I387_CW_ANY)
14785 output_asm_insn ("fldcw\t%3", operands);
14786 if (stack_top_dies || dimode_p)
14787 output_asm_insn ("fistp%Z0\t%0", operands);
14789 output_asm_insn ("fist%Z0\t%0", operands);
14790 if (round_mode != I387_CW_ANY)
14791 output_asm_insn ("fldcw\t%2", operands);
14797 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14798 have the values zero or one, indicates the ffreep insn's operand
14799 from the OPERANDS array. */
14801 static const char *
14802 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14804 if (TARGET_USE_FFREEP)
14805 #ifdef HAVE_AS_IX86_FFREEP
14806 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14809 static char retval[32];
14810 int regno = REGNO (operands[opno]);
14812 gcc_assert (FP_REGNO_P (regno));
14814 regno -= FIRST_STACK_REG;
14816 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14821 return opno ? "fstp\t%y1" : "fstp\t%y0";
14825 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14826 should be used. UNORDERED_P is true when fucom should be used. */
14829 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
14831 int stack_top_dies;
14832 rtx cmp_op0, cmp_op1;
14833 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14837 cmp_op0 = operands[0];
14838 cmp_op1 = operands[1];
14842 cmp_op0 = operands[1];
14843 cmp_op1 = operands[2];
14848 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14849 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14850 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14851 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14853 if (GET_MODE (operands[0]) == SFmode)
14855 return &ucomiss[TARGET_AVX ? 0 : 1];
14857 return &comiss[TARGET_AVX ? 0 : 1];
14860 return &ucomisd[TARGET_AVX ? 0 : 1];
14862 return &comisd[TARGET_AVX ? 0 : 1];
14865 gcc_assert (STACK_TOP_P (cmp_op0));
14867 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14869 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14871 if (stack_top_dies)
14873 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14874 return output_387_ffreep (operands, 1);
14877 return "ftst\n\tfnstsw\t%0";
14880 if (STACK_REG_P (cmp_op1)
14882 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14883 && REGNO (cmp_op1) != FIRST_STACK_REG)
14885 /* If both the top of the 387 stack dies, and the other operand
14886 is also a stack register that dies, then this must be a
14887 `fcompp' float compare */
14891 /* There is no double popping fcomi variant. Fortunately,
14892 eflags is immune from the fstp's cc clobbering. */
14894 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14896 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14897 return output_387_ffreep (operands, 0);
14902 return "fucompp\n\tfnstsw\t%0";
14904 return "fcompp\n\tfnstsw\t%0";
14909 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14911 static const char * const alt[16] =
14913 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14914 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14915 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14916 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14918 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14919 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14923 "fcomi\t{%y1, %0|%0, %y1}",
14924 "fcomip\t{%y1, %0|%0, %y1}",
14925 "fucomi\t{%y1, %0|%0, %y1}",
14926 "fucomip\t{%y1, %0|%0, %y1}",
14937 mask = eflags_p << 3;
14938 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14939 mask |= unordered_p << 1;
14940 mask |= stack_top_dies;
14942 gcc_assert (mask < 16);
14951 ix86_output_addr_vec_elt (FILE *file, int value)
14953 const char *directive = ASM_LONG;
14957 directive = ASM_QUAD;
14959 gcc_assert (!TARGET_64BIT);
14962 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14966 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14968 const char *directive = ASM_LONG;
14971 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14972 directive = ASM_QUAD;
14974 gcc_assert (!TARGET_64BIT);
14976 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14977 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14978 fprintf (file, "%s%s%d-%s%d\n",
14979 directive, LPREFIX, value, LPREFIX, rel);
14980 else if (HAVE_AS_GOTOFF_IN_DATA)
14981 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14983 else if (TARGET_MACHO)
14985 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14986 machopic_output_function_base_name (file);
14991 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14992 GOT_SYMBOL_NAME, LPREFIX, value);
14995 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
14999 ix86_expand_clear (rtx dest)
15003 /* We play register width games, which are only valid after reload. */
15004 gcc_assert (reload_completed);
15006 /* Avoid HImode and its attendant prefix byte. */
15007 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15008 dest = gen_rtx_REG (SImode, REGNO (dest));
15009 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15011 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15012 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15014 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15015 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15021 /* X is an unchanging MEM. If it is a constant pool reference, return
15022 the constant pool rtx, else NULL. */
15025 maybe_get_pool_constant (rtx x)
15027 x = ix86_delegitimize_address (XEXP (x, 0));
15029 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15030 return get_pool_constant (x);
15036 ix86_expand_move (enum machine_mode mode, rtx operands[])
15039 enum tls_model model;
15044 if (GET_CODE (op1) == SYMBOL_REF)
15046 model = SYMBOL_REF_TLS_MODEL (op1);
15049 op1 = legitimize_tls_address (op1, model, true);
15050 op1 = force_operand (op1, op0);
15053 if (GET_MODE (op1) != mode)
15054 op1 = convert_to_mode (mode, op1, 1);
15056 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15057 && SYMBOL_REF_DLLIMPORT_P (op1))
15058 op1 = legitimize_dllimport_symbol (op1, false);
15060 else if (GET_CODE (op1) == CONST
15061 && GET_CODE (XEXP (op1, 0)) == PLUS
15062 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15064 rtx addend = XEXP (XEXP (op1, 0), 1);
15065 rtx symbol = XEXP (XEXP (op1, 0), 0);
15068 model = SYMBOL_REF_TLS_MODEL (symbol);
15070 tmp = legitimize_tls_address (symbol, model, true);
15071 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15072 && SYMBOL_REF_DLLIMPORT_P (symbol))
15073 tmp = legitimize_dllimport_symbol (symbol, true);
15077 tmp = force_operand (tmp, NULL);
15078 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15079 op0, 1, OPTAB_DIRECT);
15082 if (GET_MODE (tmp) != mode)
15083 op1 = convert_to_mode (mode, tmp, 1);
15087 if ((flag_pic || MACHOPIC_INDIRECT)
15088 && symbolic_operand (op1, mode))
15090 if (TARGET_MACHO && !TARGET_64BIT)
15093 /* dynamic-no-pic */
15094 if (MACHOPIC_INDIRECT)
15096 rtx temp = ((reload_in_progress
15097 || ((op0 && REG_P (op0))
15099 ? op0 : gen_reg_rtx (Pmode));
15100 op1 = machopic_indirect_data_reference (op1, temp);
15102 op1 = machopic_legitimize_pic_address (op1, mode,
15103 temp == op1 ? 0 : temp);
15105 if (op0 != op1 && GET_CODE (op0) != MEM)
15107 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15111 if (GET_CODE (op0) == MEM)
15112 op1 = force_reg (Pmode, op1);
15116 if (GET_CODE (temp) != REG)
15117 temp = gen_reg_rtx (Pmode);
15118 temp = legitimize_pic_address (op1, temp);
15123 /* dynamic-no-pic */
15129 op1 = force_reg (mode, op1);
15130 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
15132 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15133 op1 = legitimize_pic_address (op1, reg);
15136 if (GET_MODE (op1) != mode)
15137 op1 = convert_to_mode (mode, op1, 1);
15144 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15145 || !push_operand (op0, mode))
15147 op1 = force_reg (mode, op1);
15149 if (push_operand (op0, mode)
15150 && ! general_no_elim_operand (op1, mode))
15151 op1 = copy_to_mode_reg (mode, op1);
15153 /* Force large constants in 64bit compilation into register
15154 to get them CSEed. */
15155 if (can_create_pseudo_p ()
15156 && (mode == DImode) && TARGET_64BIT
15157 && immediate_operand (op1, mode)
15158 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15159 && !register_operand (op0, mode)
15161 op1 = copy_to_mode_reg (mode, op1);
15163 if (can_create_pseudo_p ()
15164 && FLOAT_MODE_P (mode)
15165 && GET_CODE (op1) == CONST_DOUBLE)
15167 /* If we are loading a floating point constant to a register,
15168 force the value to memory now, since we'll get better code
15169 out the back end. */
15171 op1 = validize_mem (force_const_mem (mode, op1));
15172 if (!register_operand (op0, mode))
15174 rtx temp = gen_reg_rtx (mode);
15175 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15176 emit_move_insn (op0, temp);
15182 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15186 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15188 rtx op0 = operands[0], op1 = operands[1];
15189 unsigned int align = GET_MODE_ALIGNMENT (mode);
15191 /* Force constants other than zero into memory. We do not know how
15192 the instructions used to build constants modify the upper 64 bits
15193 of the register, once we have that information we may be able
15194 to handle some of them more efficiently. */
15195 if (can_create_pseudo_p ()
15196 && register_operand (op0, mode)
15197 && (CONSTANT_P (op1)
15198 || (GET_CODE (op1) == SUBREG
15199 && CONSTANT_P (SUBREG_REG (op1))))
15200 && !standard_sse_constant_p (op1))
15201 op1 = validize_mem (force_const_mem (mode, op1));
15203 /* We need to check memory alignment for SSE mode since attribute
15204 can make operands unaligned. */
15205 if (can_create_pseudo_p ()
15206 && SSE_REG_MODE_P (mode)
15207 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15208 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15212 /* ix86_expand_vector_move_misalign() does not like constants ... */
15213 if (CONSTANT_P (op1)
15214 || (GET_CODE (op1) == SUBREG
15215 && CONSTANT_P (SUBREG_REG (op1))))
15216 op1 = validize_mem (force_const_mem (mode, op1));
15218 /* ... nor both arguments in memory. */
15219 if (!register_operand (op0, mode)
15220 && !register_operand (op1, mode))
15221 op1 = force_reg (mode, op1);
15223 tmp[0] = op0; tmp[1] = op1;
15224 ix86_expand_vector_move_misalign (mode, tmp);
15228 /* Make operand1 a register if it isn't already. */
15229 if (can_create_pseudo_p ()
15230 && !register_operand (op0, mode)
15231 && !register_operand (op1, mode))
15233 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15237 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15240 /* Split 32-byte AVX unaligned load and store if needed. */
15243 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15246 rtx (*extract) (rtx, rtx, rtx);
15247 rtx (*move_unaligned) (rtx, rtx);
15248 enum machine_mode mode;
15250 switch (GET_MODE (op0))
15253 gcc_unreachable ();
15255 extract = gen_avx_vextractf128v32qi;
15256 move_unaligned = gen_avx_movdqu256;
15260 extract = gen_avx_vextractf128v8sf;
15261 move_unaligned = gen_avx_movups256;
15265 extract = gen_avx_vextractf128v4df;
15266 move_unaligned = gen_avx_movupd256;
15271 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15273 rtx r = gen_reg_rtx (mode);
15274 m = adjust_address (op1, mode, 0);
15275 emit_move_insn (r, m);
15276 m = adjust_address (op1, mode, 16);
15277 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15278 emit_move_insn (op0, r);
15280 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15282 m = adjust_address (op0, mode, 0);
15283 emit_insn (extract (m, op1, const0_rtx));
15284 m = adjust_address (op0, mode, 16);
15285 emit_insn (extract (m, op1, const1_rtx));
15288 emit_insn (move_unaligned (op0, op1));
15291 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15292 straight to ix86_expand_vector_move. */
15293 /* Code generation for scalar reg-reg moves of single and double precision data:
15294 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15298 if (x86_sse_partial_reg_dependency == true)
15303 Code generation for scalar loads of double precision data:
15304 if (x86_sse_split_regs == true)
15305 movlpd mem, reg (gas syntax)
15309 Code generation for unaligned packed loads of single precision data
15310 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15311 if (x86_sse_unaligned_move_optimal)
15314 if (x86_sse_partial_reg_dependency == true)
15326 Code generation for unaligned packed loads of double precision data
15327 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15328 if (x86_sse_unaligned_move_optimal)
15331 if (x86_sse_split_regs == true)
15344 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15353 switch (GET_MODE_CLASS (mode))
15355 case MODE_VECTOR_INT:
15357 switch (GET_MODE_SIZE (mode))
15360 /* If we're optimizing for size, movups is the smallest. */
15361 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15363 op0 = gen_lowpart (V4SFmode, op0);
15364 op1 = gen_lowpart (V4SFmode, op1);
15365 emit_insn (gen_sse_movups (op0, op1));
15368 op0 = gen_lowpart (V16QImode, op0);
15369 op1 = gen_lowpart (V16QImode, op1);
15370 emit_insn (gen_sse2_movdqu (op0, op1));
15373 op0 = gen_lowpart (V32QImode, op0);
15374 op1 = gen_lowpart (V32QImode, op1);
15375 ix86_avx256_split_vector_move_misalign (op0, op1);
15378 gcc_unreachable ();
15381 case MODE_VECTOR_FLOAT:
15382 op0 = gen_lowpart (mode, op0);
15383 op1 = gen_lowpart (mode, op1);
15388 emit_insn (gen_sse_movups (op0, op1));
15391 ix86_avx256_split_vector_move_misalign (op0, op1);
15394 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15396 op0 = gen_lowpart (V4SFmode, op0);
15397 op1 = gen_lowpart (V4SFmode, op1);
15398 emit_insn (gen_sse_movups (op0, op1));
15401 emit_insn (gen_sse2_movupd (op0, op1));
15404 ix86_avx256_split_vector_move_misalign (op0, op1);
15407 gcc_unreachable ();
15412 gcc_unreachable ();
15420 /* If we're optimizing for size, movups is the smallest. */
15421 if (optimize_insn_for_size_p ()
15422 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15424 op0 = gen_lowpart (V4SFmode, op0);
15425 op1 = gen_lowpart (V4SFmode, op1);
15426 emit_insn (gen_sse_movups (op0, op1));
15430 /* ??? If we have typed data, then it would appear that using
15431 movdqu is the only way to get unaligned data loaded with
15433 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15435 op0 = gen_lowpart (V16QImode, op0);
15436 op1 = gen_lowpart (V16QImode, op1);
15437 emit_insn (gen_sse2_movdqu (op0, op1));
15441 if (TARGET_SSE2 && mode == V2DFmode)
15445 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15447 op0 = gen_lowpart (V2DFmode, op0);
15448 op1 = gen_lowpart (V2DFmode, op1);
15449 emit_insn (gen_sse2_movupd (op0, op1));
15453 /* When SSE registers are split into halves, we can avoid
15454 writing to the top half twice. */
15455 if (TARGET_SSE_SPLIT_REGS)
15457 emit_clobber (op0);
15462 /* ??? Not sure about the best option for the Intel chips.
15463 The following would seem to satisfy; the register is
15464 entirely cleared, breaking the dependency chain. We
15465 then store to the upper half, with a dependency depth
15466 of one. A rumor has it that Intel recommends two movsd
15467 followed by an unpacklpd, but this is unconfirmed. And
15468 given that the dependency depth of the unpacklpd would
15469 still be one, I'm not sure why this would be better. */
15470 zero = CONST0_RTX (V2DFmode);
15473 m = adjust_address (op1, DFmode, 0);
15474 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15475 m = adjust_address (op1, DFmode, 8);
15476 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15480 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15482 op0 = gen_lowpart (V4SFmode, op0);
15483 op1 = gen_lowpart (V4SFmode, op1);
15484 emit_insn (gen_sse_movups (op0, op1));
15488 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15489 emit_move_insn (op0, CONST0_RTX (mode));
15491 emit_clobber (op0);
15493 if (mode != V4SFmode)
15494 op0 = gen_lowpart (V4SFmode, op0);
15495 m = adjust_address (op1, V2SFmode, 0);
15496 emit_insn (gen_sse_loadlps (op0, op0, m));
15497 m = adjust_address (op1, V2SFmode, 8);
15498 emit_insn (gen_sse_loadhps (op0, op0, m));
15501 else if (MEM_P (op0))
15503 /* If we're optimizing for size, movups is the smallest. */
15504 if (optimize_insn_for_size_p ()
15505 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15507 op0 = gen_lowpart (V4SFmode, op0);
15508 op1 = gen_lowpart (V4SFmode, op1);
15509 emit_insn (gen_sse_movups (op0, op1));
15513 /* ??? Similar to above, only less clear because of quote
15514 typeless stores unquote. */
15515 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15516 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15518 op0 = gen_lowpart (V16QImode, op0);
15519 op1 = gen_lowpart (V16QImode, op1);
15520 emit_insn (gen_sse2_movdqu (op0, op1));
15524 if (TARGET_SSE2 && mode == V2DFmode)
15526 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15528 op0 = gen_lowpart (V2DFmode, op0);
15529 op1 = gen_lowpart (V2DFmode, op1);
15530 emit_insn (gen_sse2_movupd (op0, op1));
15534 m = adjust_address (op0, DFmode, 0);
15535 emit_insn (gen_sse2_storelpd (m, op1));
15536 m = adjust_address (op0, DFmode, 8);
15537 emit_insn (gen_sse2_storehpd (m, op1));
15542 if (mode != V4SFmode)
15543 op1 = gen_lowpart (V4SFmode, op1);
15545 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15547 op0 = gen_lowpart (V4SFmode, op0);
15548 emit_insn (gen_sse_movups (op0, op1));
15552 m = adjust_address (op0, V2SFmode, 0);
15553 emit_insn (gen_sse_storelps (m, op1));
15554 m = adjust_address (op0, V2SFmode, 8);
15555 emit_insn (gen_sse_storehps (m, op1));
15560 gcc_unreachable ();
15563 /* Expand a push in MODE. This is some mode for which we do not support
15564 proper push instructions, at least from the registers that we expect
15565 the value to live in. */
15568 ix86_expand_push (enum machine_mode mode, rtx x)
15572 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15573 GEN_INT (-GET_MODE_SIZE (mode)),
15574 stack_pointer_rtx, 1, OPTAB_DIRECT);
15575 if (tmp != stack_pointer_rtx)
15576 emit_move_insn (stack_pointer_rtx, tmp);
15578 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15580 /* When we push an operand onto stack, it has to be aligned at least
15581 at the function argument boundary. However since we don't have
15582 the argument type, we can't determine the actual argument
15584 emit_move_insn (tmp, x);
15587 /* Helper function of ix86_fixup_binary_operands to canonicalize
15588 operand order. Returns true if the operands should be swapped. */
15591 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15594 rtx dst = operands[0];
15595 rtx src1 = operands[1];
15596 rtx src2 = operands[2];
15598 /* If the operation is not commutative, we can't do anything. */
15599 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15602 /* Highest priority is that src1 should match dst. */
15603 if (rtx_equal_p (dst, src1))
15605 if (rtx_equal_p (dst, src2))
15608 /* Next highest priority is that immediate constants come second. */
15609 if (immediate_operand (src2, mode))
15611 if (immediate_operand (src1, mode))
15614 /* Lowest priority is that memory references should come second. */
15624 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15625 destination to use for the operation. If different from the true
15626 destination in operands[0], a copy operation will be required. */
15629 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15632 rtx dst = operands[0];
15633 rtx src1 = operands[1];
15634 rtx src2 = operands[2];
15636 /* Canonicalize operand order. */
15637 if (ix86_swap_binary_operands_p (code, mode, operands))
15641 /* It is invalid to swap operands of different modes. */
15642 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15649 /* Both source operands cannot be in memory. */
15650 if (MEM_P (src1) && MEM_P (src2))
15652 /* Optimization: Only read from memory once. */
15653 if (rtx_equal_p (src1, src2))
15655 src2 = force_reg (mode, src2);
15659 src2 = force_reg (mode, src2);
15662 /* If the destination is memory, and we do not have matching source
15663 operands, do things in registers. */
15664 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15665 dst = gen_reg_rtx (mode);
15667 /* Source 1 cannot be a constant. */
15668 if (CONSTANT_P (src1))
15669 src1 = force_reg (mode, src1);
15671 /* Source 1 cannot be a non-matching memory. */
15672 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15673 src1 = force_reg (mode, src1);
15675 operands[1] = src1;
15676 operands[2] = src2;
15680 /* Similarly, but assume that the destination has already been
15681 set up properly. */
15684 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15685 enum machine_mode mode, rtx operands[])
15687 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15688 gcc_assert (dst == operands[0]);
15691 /* Attempt to expand a binary operator. Make the expansion closer to the
15692 actual machine, then just general_operand, which will allow 3 separate
15693 memory references (one output, two input) in a single insn. */
15696 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15699 rtx src1, src2, dst, op, clob;
15701 dst = ix86_fixup_binary_operands (code, mode, operands);
15702 src1 = operands[1];
15703 src2 = operands[2];
15705 /* Emit the instruction. */
15707 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15708 if (reload_in_progress)
15710 /* Reload doesn't know about the flags register, and doesn't know that
15711 it doesn't want to clobber it. We can only do this with PLUS. */
15712 gcc_assert (code == PLUS);
15715 else if (reload_completed
15717 && !rtx_equal_p (dst, src1))
15719 /* This is going to be an LEA; avoid splitting it later. */
15724 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15725 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15728 /* Fix up the destination if needed. */
15729 if (dst != operands[0])
15730 emit_move_insn (operands[0], dst);
15733 /* Return TRUE or FALSE depending on whether the binary operator meets the
15734 appropriate constraints. */
15737 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15740 rtx dst = operands[0];
15741 rtx src1 = operands[1];
15742 rtx src2 = operands[2];
15744 /* Both source operands cannot be in memory. */
15745 if (MEM_P (src1) && MEM_P (src2))
15748 /* Canonicalize operand order for commutative operators. */
15749 if (ix86_swap_binary_operands_p (code, mode, operands))
15756 /* If the destination is memory, we must have a matching source operand. */
15757 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15760 /* Source 1 cannot be a constant. */
15761 if (CONSTANT_P (src1))
15764 /* Source 1 cannot be a non-matching memory. */
15765 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15767 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15768 return (code == AND
15771 || (TARGET_64BIT && mode == DImode))
15772 && CONST_INT_P (src2)
15773 && (INTVAL (src2) == 0xff
15774 || INTVAL (src2) == 0xffff));
15780 /* Attempt to expand a unary operator. Make the expansion closer to the
15781 actual machine, then just general_operand, which will allow 2 separate
15782 memory references (one output, one input) in a single insn. */
15785 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15788 int matching_memory;
15789 rtx src, dst, op, clob;
15794 /* If the destination is memory, and we do not have matching source
15795 operands, do things in registers. */
15796 matching_memory = 0;
15799 if (rtx_equal_p (dst, src))
15800 matching_memory = 1;
15802 dst = gen_reg_rtx (mode);
15805 /* When source operand is memory, destination must match. */
15806 if (MEM_P (src) && !matching_memory)
15807 src = force_reg (mode, src);
15809 /* Emit the instruction. */
15811 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15812 if (reload_in_progress || code == NOT)
15814 /* Reload doesn't know about the flags register, and doesn't know that
15815 it doesn't want to clobber it. */
15816 gcc_assert (code == NOT);
15821 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15822 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15825 /* Fix up the destination if needed. */
15826 if (dst != operands[0])
15827 emit_move_insn (operands[0], dst);
15830 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15831 divisor are within the range [0-255]. */
15834 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15837 rtx end_label, qimode_label;
15838 rtx insn, div, mod;
15839 rtx scratch, tmp0, tmp1, tmp2;
15840 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15841 rtx (*gen_zero_extend) (rtx, rtx);
15842 rtx (*gen_test_ccno_1) (rtx, rtx);
15847 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15848 gen_test_ccno_1 = gen_testsi_ccno_1;
15849 gen_zero_extend = gen_zero_extendqisi2;
15852 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15853 gen_test_ccno_1 = gen_testdi_ccno_1;
15854 gen_zero_extend = gen_zero_extendqidi2;
15857 gcc_unreachable ();
15860 end_label = gen_label_rtx ();
15861 qimode_label = gen_label_rtx ();
15863 scratch = gen_reg_rtx (mode);
15865 /* Use 8bit unsigned divimod if dividend and divisor are within
15866 the range [0-255]. */
15867 emit_move_insn (scratch, operands[2]);
15868 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15869 scratch, 1, OPTAB_DIRECT);
15870 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15871 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15872 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15873 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15874 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15876 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15877 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15878 JUMP_LABEL (insn) = qimode_label;
15880 /* Generate original signed/unsigned divimod. */
15881 div = gen_divmod4_1 (operands[0], operands[1],
15882 operands[2], operands[3]);
15885 /* Branch to the end. */
15886 emit_jump_insn (gen_jump (end_label));
15889 /* Generate 8bit unsigned divide. */
15890 emit_label (qimode_label);
15891 /* Don't use operands[0] for result of 8bit divide since not all
15892 registers support QImode ZERO_EXTRACT. */
15893 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15894 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15895 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15896 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15900 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15901 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15905 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15906 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15909 /* Extract remainder from AH. */
15910 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15911 if (REG_P (operands[1]))
15912 insn = emit_move_insn (operands[1], tmp1);
15915 /* Need a new scratch register since the old one has result
15917 scratch = gen_reg_rtx (mode);
15918 emit_move_insn (scratch, tmp1);
15919 insn = emit_move_insn (operands[1], scratch);
15921 set_unique_reg_note (insn, REG_EQUAL, mod);
15923 /* Zero extend quotient from AL. */
15924 tmp1 = gen_lowpart (QImode, tmp0);
15925 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15926 set_unique_reg_note (insn, REG_EQUAL, div);
15928 emit_label (end_label);
15931 #define LEA_SEARCH_THRESHOLD 12
15933 /* Search backward for non-agu definition of register number REGNO1
15934 or register number REGNO2 in INSN's basic block until
15935 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15936 2. Reach BB boundary, or
15937 3. Reach agu definition.
15938 Returns the distance between the non-agu definition point and INSN.
15939 If no definition point, returns -1. */
15942 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15945 basic_block bb = BLOCK_FOR_INSN (insn);
15948 enum attr_type insn_type;
15950 if (insn != BB_HEAD (bb))
15952 rtx prev = PREV_INSN (insn);
15953 while (prev && distance < LEA_SEARCH_THRESHOLD)
15955 if (NONDEBUG_INSN_P (prev))
15958 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15959 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15960 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15961 && (regno1 == DF_REF_REGNO (*def_rec)
15962 || regno2 == DF_REF_REGNO (*def_rec)))
15964 insn_type = get_attr_type (prev);
15965 if (insn_type != TYPE_LEA)
15969 if (prev == BB_HEAD (bb))
15971 prev = PREV_INSN (prev);
15975 if (distance < LEA_SEARCH_THRESHOLD)
15979 bool simple_loop = false;
15981 FOR_EACH_EDGE (e, ei, bb->preds)
15984 simple_loop = true;
15990 rtx prev = BB_END (bb);
15993 && distance < LEA_SEARCH_THRESHOLD)
15995 if (NONDEBUG_INSN_P (prev))
15998 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15999 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16000 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16001 && (regno1 == DF_REF_REGNO (*def_rec)
16002 || regno2 == DF_REF_REGNO (*def_rec)))
16004 insn_type = get_attr_type (prev);
16005 if (insn_type != TYPE_LEA)
16009 prev = PREV_INSN (prev);
16017 /* get_attr_type may modify recog data. We want to make sure
16018 that recog data is valid for instruction INSN, on which
16019 distance_non_agu_define is called. INSN is unchanged here. */
16020 extract_insn_cached (insn);
16024 /* Return the distance between INSN and the next insn that uses
16025 register number REGNO0 in memory address. Return -1 if no such
16026 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16029 distance_agu_use (unsigned int regno0, rtx insn)
16031 basic_block bb = BLOCK_FOR_INSN (insn);
16036 if (insn != BB_END (bb))
16038 rtx next = NEXT_INSN (insn);
16039 while (next && distance < LEA_SEARCH_THRESHOLD)
16041 if (NONDEBUG_INSN_P (next))
16045 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16046 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16047 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16048 && regno0 == DF_REF_REGNO (*use_rec))
16050 /* Return DISTANCE if OP0 is used in memory
16051 address in NEXT. */
16055 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16056 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16057 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16058 && regno0 == DF_REF_REGNO (*def_rec))
16060 /* Return -1 if OP0 is set in NEXT. */
16064 if (next == BB_END (bb))
16066 next = NEXT_INSN (next);
16070 if (distance < LEA_SEARCH_THRESHOLD)
16074 bool simple_loop = false;
16076 FOR_EACH_EDGE (e, ei, bb->succs)
16079 simple_loop = true;
16085 rtx next = BB_HEAD (bb);
16088 && distance < LEA_SEARCH_THRESHOLD)
16090 if (NONDEBUG_INSN_P (next))
16094 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16095 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16096 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16097 && regno0 == DF_REF_REGNO (*use_rec))
16099 /* Return DISTANCE if OP0 is used in memory
16100 address in NEXT. */
16104 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16105 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16106 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16107 && regno0 == DF_REF_REGNO (*def_rec))
16109 /* Return -1 if OP0 is set in NEXT. */
16114 next = NEXT_INSN (next);
16122 /* Define this macro to tune LEA priority vs ADD, it take effect when
16123 there is a dilemma of choicing LEA or ADD
16124 Negative value: ADD is more preferred than LEA
16126 Positive value: LEA is more preferred than ADD*/
16127 #define IX86_LEA_PRIORITY 2
16129 /* Return true if it is ok to optimize an ADD operation to LEA
16130 operation to avoid flag register consumation. For most processors,
16131 ADD is faster than LEA. For the processors like ATOM, if the
16132 destination register of LEA holds an actual address which will be
16133 used soon, LEA is better and otherwise ADD is better. */
16136 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16138 unsigned int regno0 = true_regnum (operands[0]);
16139 unsigned int regno1 = true_regnum (operands[1]);
16140 unsigned int regno2 = true_regnum (operands[2]);
16142 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16143 if (regno0 != regno1 && regno0 != regno2)
16146 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16150 int dist_define, dist_use;
16152 /* Return false if REGNO0 isn't used in memory address. */
16153 dist_use = distance_agu_use (regno0, insn);
16157 dist_define = distance_non_agu_define (regno1, regno2, insn);
16158 if (dist_define <= 0)
16161 /* If this insn has both backward non-agu dependence and forward
16162 agu dependence, the one with short distance take effect. */
16163 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16170 /* Return true if destination reg of SET_BODY is shift count of
16174 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16180 /* Retrieve destination of SET_BODY. */
16181 switch (GET_CODE (set_body))
16184 set_dest = SET_DEST (set_body);
16185 if (!set_dest || !REG_P (set_dest))
16189 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16190 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16198 /* Retrieve shift count of USE_BODY. */
16199 switch (GET_CODE (use_body))
16202 shift_rtx = XEXP (use_body, 1);
16205 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16206 if (ix86_dep_by_shift_count_body (set_body,
16207 XVECEXP (use_body, 0, i)))
16215 && (GET_CODE (shift_rtx) == ASHIFT
16216 || GET_CODE (shift_rtx) == LSHIFTRT
16217 || GET_CODE (shift_rtx) == ASHIFTRT
16218 || GET_CODE (shift_rtx) == ROTATE
16219 || GET_CODE (shift_rtx) == ROTATERT))
16221 rtx shift_count = XEXP (shift_rtx, 1);
16223 /* Return true if shift count is dest of SET_BODY. */
16224 if (REG_P (shift_count)
16225 && true_regnum (set_dest) == true_regnum (shift_count))
16232 /* Return true if destination reg of SET_INSN is shift count of
16236 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16238 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16239 PATTERN (use_insn));
16242 /* Return TRUE or FALSE depending on whether the unary operator meets the
16243 appropriate constraints. */
16246 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16247 enum machine_mode mode ATTRIBUTE_UNUSED,
16248 rtx operands[2] ATTRIBUTE_UNUSED)
16250 /* If one of operands is memory, source and destination must match. */
16251 if ((MEM_P (operands[0])
16252 || MEM_P (operands[1]))
16253 && ! rtx_equal_p (operands[0], operands[1]))
16258 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16259 are ok, keeping in mind the possible movddup alternative. */
16262 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16264 if (MEM_P (operands[0]))
16265 return rtx_equal_p (operands[0], operands[1 + high]);
16266 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16267 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16271 /* Post-reload splitter for converting an SF or DFmode value in an
16272 SSE register into an unsigned SImode. */
16275 ix86_split_convert_uns_si_sse (rtx operands[])
16277 enum machine_mode vecmode;
16278 rtx value, large, zero_or_two31, input, two31, x;
16280 large = operands[1];
16281 zero_or_two31 = operands[2];
16282 input = operands[3];
16283 two31 = operands[4];
16284 vecmode = GET_MODE (large);
16285 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16287 /* Load up the value into the low element. We must ensure that the other
16288 elements are valid floats -- zero is the easiest such value. */
16291 if (vecmode == V4SFmode)
16292 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16294 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16298 input = gen_rtx_REG (vecmode, REGNO (input));
16299 emit_move_insn (value, CONST0_RTX (vecmode));
16300 if (vecmode == V4SFmode)
16301 emit_insn (gen_sse_movss (value, value, input));
16303 emit_insn (gen_sse2_movsd (value, value, input));
16306 emit_move_insn (large, two31);
16307 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16309 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16310 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16312 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16313 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16315 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16316 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16318 large = gen_rtx_REG (V4SImode, REGNO (large));
16319 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16321 x = gen_rtx_REG (V4SImode, REGNO (value));
16322 if (vecmode == V4SFmode)
16323 emit_insn (gen_sse2_cvttps2dq (x, value));
16325 emit_insn (gen_sse2_cvttpd2dq (x, value));
16328 emit_insn (gen_xorv4si3 (value, value, large));
16331 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16332 Expects the 64-bit DImode to be supplied in a pair of integral
16333 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16334 -mfpmath=sse, !optimize_size only. */
16337 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16339 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16340 rtx int_xmm, fp_xmm;
16341 rtx biases, exponents;
16344 int_xmm = gen_reg_rtx (V4SImode);
16345 if (TARGET_INTER_UNIT_MOVES)
16346 emit_insn (gen_movdi_to_sse (int_xmm, input));
16347 else if (TARGET_SSE_SPLIT_REGS)
16349 emit_clobber (int_xmm);
16350 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16354 x = gen_reg_rtx (V2DImode);
16355 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16356 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16359 x = gen_rtx_CONST_VECTOR (V4SImode,
16360 gen_rtvec (4, GEN_INT (0x43300000UL),
16361 GEN_INT (0x45300000UL),
16362 const0_rtx, const0_rtx));
16363 exponents = validize_mem (force_const_mem (V4SImode, x));
16365 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16366 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16368 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16369 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16370 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16371 (0x1.0p84 + double(fp_value_hi_xmm)).
16372 Note these exponents differ by 32. */
16374 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16376 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16377 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16378 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16379 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16380 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16381 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16382 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16383 biases = validize_mem (force_const_mem (V2DFmode, biases));
16384 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16386 /* Add the upper and lower DFmode values together. */
16388 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16391 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16392 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16393 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16396 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16399 /* Not used, but eases macroization of patterns. */
16401 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16402 rtx input ATTRIBUTE_UNUSED)
16404 gcc_unreachable ();
16407 /* Convert an unsigned SImode value into a DFmode. Only currently used
16408 for SSE, but applicable anywhere. */
16411 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16413 REAL_VALUE_TYPE TWO31r;
16416 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16417 NULL, 1, OPTAB_DIRECT);
16419 fp = gen_reg_rtx (DFmode);
16420 emit_insn (gen_floatsidf2 (fp, x));
16422 real_ldexp (&TWO31r, &dconst1, 31);
16423 x = const_double_from_real_value (TWO31r, DFmode);
16425 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16427 emit_move_insn (target, x);
16430 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16431 32-bit mode; otherwise we have a direct convert instruction. */
16434 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16436 REAL_VALUE_TYPE TWO32r;
16437 rtx fp_lo, fp_hi, x;
16439 fp_lo = gen_reg_rtx (DFmode);
16440 fp_hi = gen_reg_rtx (DFmode);
16442 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16444 real_ldexp (&TWO32r, &dconst1, 32);
16445 x = const_double_from_real_value (TWO32r, DFmode);
16446 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16448 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16450 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16453 emit_move_insn (target, x);
16456 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16457 For x86_32, -mfpmath=sse, !optimize_size only. */
16459 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16461 REAL_VALUE_TYPE ONE16r;
16462 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16464 real_ldexp (&ONE16r, &dconst1, 16);
16465 x = const_double_from_real_value (ONE16r, SFmode);
16466 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16467 NULL, 0, OPTAB_DIRECT);
16468 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16469 NULL, 0, OPTAB_DIRECT);
16470 fp_hi = gen_reg_rtx (SFmode);
16471 fp_lo = gen_reg_rtx (SFmode);
16472 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16473 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16474 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16476 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16478 if (!rtx_equal_p (target, fp_hi))
16479 emit_move_insn (target, fp_hi);
16482 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16483 then replicate the value for all elements of the vector
16487 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16494 v = gen_rtvec (4, value, value, value, value);
16495 return gen_rtx_CONST_VECTOR (V4SImode, v);
16499 v = gen_rtvec (2, value, value);
16500 return gen_rtx_CONST_VECTOR (V2DImode, v);
16504 v = gen_rtvec (8, value, value, value, value,
16505 value, value, value, value);
16507 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16508 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16509 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16510 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16511 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16515 v = gen_rtvec (4, value, value, value, value);
16517 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16518 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16519 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16523 v = gen_rtvec (4, value, value, value, value);
16525 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16526 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16527 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16531 v = gen_rtvec (2, value, value);
16533 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16534 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16537 gcc_unreachable ();
16541 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16542 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16543 for an SSE register. If VECT is true, then replicate the mask for
16544 all elements of the vector register. If INVERT is true, then create
16545 a mask excluding the sign bit. */
16548 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16550 enum machine_mode vec_mode, imode;
16551 HOST_WIDE_INT hi, lo;
16556 /* Find the sign bit, sign extended to 2*HWI. */
16563 mode = GET_MODE_INNER (mode);
16565 lo = 0x80000000, hi = lo < 0;
16572 mode = GET_MODE_INNER (mode);
16574 if (HOST_BITS_PER_WIDE_INT >= 64)
16575 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16577 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16582 vec_mode = VOIDmode;
16583 if (HOST_BITS_PER_WIDE_INT >= 64)
16586 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16593 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16597 lo = ~lo, hi = ~hi;
16603 mask = immed_double_const (lo, hi, imode);
16605 vec = gen_rtvec (2, v, mask);
16606 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
16607 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
16614 gcc_unreachable ();
16618 lo = ~lo, hi = ~hi;
16620 /* Force this value into the low part of a fp vector constant. */
16621 mask = immed_double_const (lo, hi, imode);
16622 mask = gen_lowpart (mode, mask);
16624 if (vec_mode == VOIDmode)
16625 return force_reg (mode, mask);
16627 v = ix86_build_const_vector (vec_mode, vect, mask);
16628 return force_reg (vec_mode, v);
16631 /* Generate code for floating point ABS or NEG. */
16634 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
16637 rtx mask, set, dst, src;
16638 bool use_sse = false;
16639 bool vector_mode = VECTOR_MODE_P (mode);
16640 enum machine_mode vmode = mode;
16644 else if (mode == TFmode)
16646 else if (TARGET_SSE_MATH)
16648 use_sse = SSE_FLOAT_MODE_P (mode);
16649 if (mode == SFmode)
16651 else if (mode == DFmode)
16655 /* NEG and ABS performed with SSE use bitwise mask operations.
16656 Create the appropriate mask now. */
16658 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
16665 set = gen_rtx_fmt_e (code, mode, src);
16666 set = gen_rtx_SET (VOIDmode, dst, set);
16673 use = gen_rtx_USE (VOIDmode, mask);
16675 par = gen_rtvec (2, set, use);
16678 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16679 par = gen_rtvec (3, set, use, clob);
16681 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16687 /* Expand a copysign operation. Special case operand 0 being a constant. */
16690 ix86_expand_copysign (rtx operands[])
16692 enum machine_mode mode, vmode;
16693 rtx dest, op0, op1, mask, nmask;
16695 dest = operands[0];
16699 mode = GET_MODE (dest);
16701 if (mode == SFmode)
16703 else if (mode == DFmode)
16708 if (GET_CODE (op0) == CONST_DOUBLE)
16710 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
16712 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
16713 op0 = simplify_unary_operation (ABS, mode, op0, mode);
16715 if (mode == SFmode || mode == DFmode)
16717 if (op0 == CONST0_RTX (mode))
16718 op0 = CONST0_RTX (vmode);
16721 rtx v = ix86_build_const_vector (vmode, false, op0);
16723 op0 = force_reg (vmode, v);
16726 else if (op0 != CONST0_RTX (mode))
16727 op0 = force_reg (mode, op0);
16729 mask = ix86_build_signbit_mask (vmode, 0, 0);
16731 if (mode == SFmode)
16732 copysign_insn = gen_copysignsf3_const;
16733 else if (mode == DFmode)
16734 copysign_insn = gen_copysigndf3_const;
16736 copysign_insn = gen_copysigntf3_const;
16738 emit_insn (copysign_insn (dest, op0, op1, mask));
16742 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
16744 nmask = ix86_build_signbit_mask (vmode, 0, 1);
16745 mask = ix86_build_signbit_mask (vmode, 0, 0);
16747 if (mode == SFmode)
16748 copysign_insn = gen_copysignsf3_var;
16749 else if (mode == DFmode)
16750 copysign_insn = gen_copysigndf3_var;
16752 copysign_insn = gen_copysigntf3_var;
16754 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
16758 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16759 be a constant, and so has already been expanded into a vector constant. */
16762 ix86_split_copysign_const (rtx operands[])
16764 enum machine_mode mode, vmode;
16765 rtx dest, op0, mask, x;
16767 dest = operands[0];
16769 mask = operands[3];
16771 mode = GET_MODE (dest);
16772 vmode = GET_MODE (mask);
16774 dest = simplify_gen_subreg (vmode, dest, mode, 0);
16775 x = gen_rtx_AND (vmode, dest, mask);
16776 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16778 if (op0 != CONST0_RTX (vmode))
16780 x = gen_rtx_IOR (vmode, dest, op0);
16781 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16785 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16786 so we have to do two masks. */
16789 ix86_split_copysign_var (rtx operands[])
16791 enum machine_mode mode, vmode;
16792 rtx dest, scratch, op0, op1, mask, nmask, x;
16794 dest = operands[0];
16795 scratch = operands[1];
16798 nmask = operands[4];
16799 mask = operands[5];
16801 mode = GET_MODE (dest);
16802 vmode = GET_MODE (mask);
16804 if (rtx_equal_p (op0, op1))
16806 /* Shouldn't happen often (it's useless, obviously), but when it does
16807 we'd generate incorrect code if we continue below. */
16808 emit_move_insn (dest, op0);
16812 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16814 gcc_assert (REGNO (op1) == REGNO (scratch));
16816 x = gen_rtx_AND (vmode, scratch, mask);
16817 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16820 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16821 x = gen_rtx_NOT (vmode, dest);
16822 x = gen_rtx_AND (vmode, x, op0);
16823 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16827 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16829 x = gen_rtx_AND (vmode, scratch, mask);
16831 else /* alternative 2,4 */
16833 gcc_assert (REGNO (mask) == REGNO (scratch));
16834 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16835 x = gen_rtx_AND (vmode, scratch, op1);
16837 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16839 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16841 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16842 x = gen_rtx_AND (vmode, dest, nmask);
16844 else /* alternative 3,4 */
16846 gcc_assert (REGNO (nmask) == REGNO (dest));
16848 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16849 x = gen_rtx_AND (vmode, dest, op0);
16851 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16854 x = gen_rtx_IOR (vmode, dest, scratch);
16855 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16858 /* Return TRUE or FALSE depending on whether the first SET in INSN
16859 has source and destination with matching CC modes, and that the
16860 CC mode is at least as constrained as REQ_MODE. */
16863 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16866 enum machine_mode set_mode;
16868 set = PATTERN (insn);
16869 if (GET_CODE (set) == PARALLEL)
16870 set = XVECEXP (set, 0, 0);
16871 gcc_assert (GET_CODE (set) == SET);
16872 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16874 set_mode = GET_MODE (SET_DEST (set));
16878 if (req_mode != CCNOmode
16879 && (req_mode != CCmode
16880 || XEXP (SET_SRC (set), 1) != const0_rtx))
16884 if (req_mode == CCGCmode)
16888 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16892 if (req_mode == CCZmode)
16902 if (set_mode != req_mode)
16907 gcc_unreachable ();
16910 return GET_MODE (SET_SRC (set)) == set_mode;
16913 /* Generate insn patterns to do an integer compare of OPERANDS. */
16916 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16918 enum machine_mode cmpmode;
16921 cmpmode = SELECT_CC_MODE (code, op0, op1);
16922 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16924 /* This is very simple, but making the interface the same as in the
16925 FP case makes the rest of the code easier. */
16926 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16927 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16929 /* Return the test that should be put into the flags user, i.e.
16930 the bcc, scc, or cmov instruction. */
16931 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16934 /* Figure out whether to use ordered or unordered fp comparisons.
16935 Return the appropriate mode to use. */
16938 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16940 /* ??? In order to make all comparisons reversible, we do all comparisons
16941 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16942 all forms trapping and nontrapping comparisons, we can make inequality
16943 comparisons trapping again, since it results in better code when using
16944 FCOM based compares. */
16945 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16949 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16951 enum machine_mode mode = GET_MODE (op0);
16953 if (SCALAR_FLOAT_MODE_P (mode))
16955 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16956 return ix86_fp_compare_mode (code);
16961 /* Only zero flag is needed. */
16962 case EQ: /* ZF=0 */
16963 case NE: /* ZF!=0 */
16965 /* Codes needing carry flag. */
16966 case GEU: /* CF=0 */
16967 case LTU: /* CF=1 */
16968 /* Detect overflow checks. They need just the carry flag. */
16969 if (GET_CODE (op0) == PLUS
16970 && rtx_equal_p (op1, XEXP (op0, 0)))
16974 case GTU: /* CF=0 & ZF=0 */
16975 case LEU: /* CF=1 | ZF=1 */
16976 /* Detect overflow checks. They need just the carry flag. */
16977 if (GET_CODE (op0) == MINUS
16978 && rtx_equal_p (op1, XEXP (op0, 0)))
16982 /* Codes possibly doable only with sign flag when
16983 comparing against zero. */
16984 case GE: /* SF=OF or SF=0 */
16985 case LT: /* SF<>OF or SF=1 */
16986 if (op1 == const0_rtx)
16989 /* For other cases Carry flag is not required. */
16991 /* Codes doable only with sign flag when comparing
16992 against zero, but we miss jump instruction for it
16993 so we need to use relational tests against overflow
16994 that thus needs to be zero. */
16995 case GT: /* ZF=0 & SF=OF */
16996 case LE: /* ZF=1 | SF<>OF */
16997 if (op1 == const0_rtx)
17001 /* strcmp pattern do (use flags) and combine may ask us for proper
17006 gcc_unreachable ();
17010 /* Return the fixed registers used for condition codes. */
17013 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17020 /* If two condition code modes are compatible, return a condition code
17021 mode which is compatible with both. Otherwise, return
17024 static enum machine_mode
17025 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17030 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17033 if ((m1 == CCGCmode && m2 == CCGOCmode)
17034 || (m1 == CCGOCmode && m2 == CCGCmode))
17040 gcc_unreachable ();
17070 /* These are only compatible with themselves, which we already
17077 /* Return a comparison we can do and that it is equivalent to
17078 swap_condition (code) apart possibly from orderedness.
17079 But, never change orderedness if TARGET_IEEE_FP, returning
17080 UNKNOWN in that case if necessary. */
17082 static enum rtx_code
17083 ix86_fp_swap_condition (enum rtx_code code)
17087 case GT: /* GTU - CF=0 & ZF=0 */
17088 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17089 case GE: /* GEU - CF=0 */
17090 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17091 case UNLT: /* LTU - CF=1 */
17092 return TARGET_IEEE_FP ? UNKNOWN : GT;
17093 case UNLE: /* LEU - CF=1 | ZF=1 */
17094 return TARGET_IEEE_FP ? UNKNOWN : GE;
17096 return swap_condition (code);
17100 /* Return cost of comparison CODE using the best strategy for performance.
17101 All following functions do use number of instructions as a cost metrics.
17102 In future this should be tweaked to compute bytes for optimize_size and
17103 take into account performance of various instructions on various CPUs. */
17106 ix86_fp_comparison_cost (enum rtx_code code)
17110 /* The cost of code using bit-twiddling on %ah. */
17127 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17131 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17134 gcc_unreachable ();
17137 switch (ix86_fp_comparison_strategy (code))
17139 case IX86_FPCMP_COMI:
17140 return arith_cost > 4 ? 3 : 2;
17141 case IX86_FPCMP_SAHF:
17142 return arith_cost > 4 ? 4 : 3;
17148 /* Return strategy to use for floating-point. We assume that fcomi is always
17149 preferrable where available, since that is also true when looking at size
17150 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17152 enum ix86_fpcmp_strategy
17153 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17155 /* Do fcomi/sahf based test when profitable. */
17158 return IX86_FPCMP_COMI;
17160 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17161 return IX86_FPCMP_SAHF;
17163 return IX86_FPCMP_ARITH;
17166 /* Swap, force into registers, or otherwise massage the two operands
17167 to a fp comparison. The operands are updated in place; the new
17168 comparison code is returned. */
17170 static enum rtx_code
17171 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17173 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17174 rtx op0 = *pop0, op1 = *pop1;
17175 enum machine_mode op_mode = GET_MODE (op0);
17176 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17178 /* All of the unordered compare instructions only work on registers.
17179 The same is true of the fcomi compare instructions. The XFmode
17180 compare instructions require registers except when comparing
17181 against zero or when converting operand 1 from fixed point to
17185 && (fpcmp_mode == CCFPUmode
17186 || (op_mode == XFmode
17187 && ! (standard_80387_constant_p (op0) == 1
17188 || standard_80387_constant_p (op1) == 1)
17189 && GET_CODE (op1) != FLOAT)
17190 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17192 op0 = force_reg (op_mode, op0);
17193 op1 = force_reg (op_mode, op1);
17197 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17198 things around if they appear profitable, otherwise force op0
17199 into a register. */
17201 if (standard_80387_constant_p (op0) == 0
17203 && ! (standard_80387_constant_p (op1) == 0
17206 enum rtx_code new_code = ix86_fp_swap_condition (code);
17207 if (new_code != UNKNOWN)
17210 tmp = op0, op0 = op1, op1 = tmp;
17216 op0 = force_reg (op_mode, op0);
17218 if (CONSTANT_P (op1))
17220 int tmp = standard_80387_constant_p (op1);
17222 op1 = validize_mem (force_const_mem (op_mode, op1));
17226 op1 = force_reg (op_mode, op1);
17229 op1 = force_reg (op_mode, op1);
17233 /* Try to rearrange the comparison to make it cheaper. */
17234 if (ix86_fp_comparison_cost (code)
17235 > ix86_fp_comparison_cost (swap_condition (code))
17236 && (REG_P (op1) || can_create_pseudo_p ()))
17239 tmp = op0, op0 = op1, op1 = tmp;
17240 code = swap_condition (code);
17242 op0 = force_reg (op_mode, op0);
17250 /* Convert comparison codes we use to represent FP comparison to integer
17251 code that will result in proper branch. Return UNKNOWN if no such code
17255 ix86_fp_compare_code_to_integer (enum rtx_code code)
17284 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17287 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17289 enum machine_mode fpcmp_mode, intcmp_mode;
17292 fpcmp_mode = ix86_fp_compare_mode (code);
17293 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17295 /* Do fcomi/sahf based test when profitable. */
17296 switch (ix86_fp_comparison_strategy (code))
17298 case IX86_FPCMP_COMI:
17299 intcmp_mode = fpcmp_mode;
17300 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17301 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17306 case IX86_FPCMP_SAHF:
17307 intcmp_mode = fpcmp_mode;
17308 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17309 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17313 scratch = gen_reg_rtx (HImode);
17314 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17315 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17318 case IX86_FPCMP_ARITH:
17319 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17320 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17321 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17323 scratch = gen_reg_rtx (HImode);
17324 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17326 /* In the unordered case, we have to check C2 for NaN's, which
17327 doesn't happen to work out to anything nice combination-wise.
17328 So do some bit twiddling on the value we've got in AH to come
17329 up with an appropriate set of condition codes. */
17331 intcmp_mode = CCNOmode;
17336 if (code == GT || !TARGET_IEEE_FP)
17338 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17343 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17344 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17345 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17346 intcmp_mode = CCmode;
17352 if (code == LT && TARGET_IEEE_FP)
17354 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17355 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17356 intcmp_mode = CCmode;
17361 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17367 if (code == GE || !TARGET_IEEE_FP)
17369 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17374 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17375 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17381 if (code == LE && TARGET_IEEE_FP)
17383 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17384 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17385 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17386 intcmp_mode = CCmode;
17391 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17397 if (code == EQ && TARGET_IEEE_FP)
17399 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17400 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17401 intcmp_mode = CCmode;
17406 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17412 if (code == NE && TARGET_IEEE_FP)
17414 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17415 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17421 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17427 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17431 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17436 gcc_unreachable ();
17444 /* Return the test that should be put into the flags user, i.e.
17445 the bcc, scc, or cmov instruction. */
17446 return gen_rtx_fmt_ee (code, VOIDmode,
17447 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17452 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17456 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17457 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17459 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17461 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17462 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17465 ret = ix86_expand_int_compare (code, op0, op1);
17471 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17473 enum machine_mode mode = GET_MODE (op0);
17485 tmp = ix86_expand_compare (code, op0, op1);
17486 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17487 gen_rtx_LABEL_REF (VOIDmode, label),
17489 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17496 /* Expand DImode branch into multiple compare+branch. */
17498 rtx lo[2], hi[2], label2;
17499 enum rtx_code code1, code2, code3;
17500 enum machine_mode submode;
17502 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17504 tmp = op0, op0 = op1, op1 = tmp;
17505 code = swap_condition (code);
17508 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17509 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17511 submode = mode == DImode ? SImode : DImode;
17513 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17514 avoid two branches. This costs one extra insn, so disable when
17515 optimizing for size. */
17517 if ((code == EQ || code == NE)
17518 && (!optimize_insn_for_size_p ()
17519 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17524 if (hi[1] != const0_rtx)
17525 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17526 NULL_RTX, 0, OPTAB_WIDEN);
17529 if (lo[1] != const0_rtx)
17530 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17531 NULL_RTX, 0, OPTAB_WIDEN);
17533 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17534 NULL_RTX, 0, OPTAB_WIDEN);
17536 ix86_expand_branch (code, tmp, const0_rtx, label);
17540 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17541 op1 is a constant and the low word is zero, then we can just
17542 examine the high word. Similarly for low word -1 and
17543 less-or-equal-than or greater-than. */
17545 if (CONST_INT_P (hi[1]))
17548 case LT: case LTU: case GE: case GEU:
17549 if (lo[1] == const0_rtx)
17551 ix86_expand_branch (code, hi[0], hi[1], label);
17555 case LE: case LEU: case GT: case GTU:
17556 if (lo[1] == constm1_rtx)
17558 ix86_expand_branch (code, hi[0], hi[1], label);
17566 /* Otherwise, we need two or three jumps. */
17568 label2 = gen_label_rtx ();
17571 code2 = swap_condition (code);
17572 code3 = unsigned_condition (code);
17576 case LT: case GT: case LTU: case GTU:
17579 case LE: code1 = LT; code2 = GT; break;
17580 case GE: code1 = GT; code2 = LT; break;
17581 case LEU: code1 = LTU; code2 = GTU; break;
17582 case GEU: code1 = GTU; code2 = LTU; break;
17584 case EQ: code1 = UNKNOWN; code2 = NE; break;
17585 case NE: code2 = UNKNOWN; break;
17588 gcc_unreachable ();
17593 * if (hi(a) < hi(b)) goto true;
17594 * if (hi(a) > hi(b)) goto false;
17595 * if (lo(a) < lo(b)) goto true;
17599 if (code1 != UNKNOWN)
17600 ix86_expand_branch (code1, hi[0], hi[1], label);
17601 if (code2 != UNKNOWN)
17602 ix86_expand_branch (code2, hi[0], hi[1], label2);
17604 ix86_expand_branch (code3, lo[0], lo[1], label);
17606 if (code2 != UNKNOWN)
17607 emit_label (label2);
17612 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
17617 /* Split branch based on floating point condition. */
17619 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
17620 rtx target1, rtx target2, rtx tmp, rtx pushed)
17625 if (target2 != pc_rtx)
17628 code = reverse_condition_maybe_unordered (code);
17633 condition = ix86_expand_fp_compare (code, op1, op2,
17636 /* Remove pushed operand from stack. */
17638 ix86_free_from_memory (GET_MODE (pushed));
17640 i = emit_jump_insn (gen_rtx_SET
17642 gen_rtx_IF_THEN_ELSE (VOIDmode,
17643 condition, target1, target2)));
17644 if (split_branch_probability >= 0)
17645 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
17649 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
17653 gcc_assert (GET_MODE (dest) == QImode);
17655 ret = ix86_expand_compare (code, op0, op1);
17656 PUT_MODE (ret, QImode);
17657 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
17660 /* Expand comparison setting or clearing carry flag. Return true when
17661 successful and set pop for the operation. */
17663 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
17665 enum machine_mode mode =
17666 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
17668 /* Do not handle double-mode compares that go through special path. */
17669 if (mode == (TARGET_64BIT ? TImode : DImode))
17672 if (SCALAR_FLOAT_MODE_P (mode))
17674 rtx compare_op, compare_seq;
17676 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17678 /* Shortcut: following common codes never translate
17679 into carry flag compares. */
17680 if (code == EQ || code == NE || code == UNEQ || code == LTGT
17681 || code == ORDERED || code == UNORDERED)
17684 /* These comparisons require zero flag; swap operands so they won't. */
17685 if ((code == GT || code == UNLE || code == LE || code == UNGT)
17686 && !TARGET_IEEE_FP)
17691 code = swap_condition (code);
17694 /* Try to expand the comparison and verify that we end up with
17695 carry flag based comparison. This fails to be true only when
17696 we decide to expand comparison using arithmetic that is not
17697 too common scenario. */
17699 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17700 compare_seq = get_insns ();
17703 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
17704 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
17705 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
17707 code = GET_CODE (compare_op);
17709 if (code != LTU && code != GEU)
17712 emit_insn (compare_seq);
17717 if (!INTEGRAL_MODE_P (mode))
17726 /* Convert a==0 into (unsigned)a<1. */
17729 if (op1 != const0_rtx)
17732 code = (code == EQ ? LTU : GEU);
17735 /* Convert a>b into b<a or a>=b-1. */
17738 if (CONST_INT_P (op1))
17740 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
17741 /* Bail out on overflow. We still can swap operands but that
17742 would force loading of the constant into register. */
17743 if (op1 == const0_rtx
17744 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
17746 code = (code == GTU ? GEU : LTU);
17753 code = (code == GTU ? LTU : GEU);
17757 /* Convert a>=0 into (unsigned)a<0x80000000. */
17760 if (mode == DImode || op1 != const0_rtx)
17762 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17763 code = (code == LT ? GEU : LTU);
17767 if (mode == DImode || op1 != constm1_rtx)
17769 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17770 code = (code == LE ? GEU : LTU);
17776 /* Swapping operands may cause constant to appear as first operand. */
17777 if (!nonimmediate_operand (op0, VOIDmode))
17779 if (!can_create_pseudo_p ())
17781 op0 = force_reg (mode, op0);
17783 *pop = ix86_expand_compare (code, op0, op1);
17784 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
17789 ix86_expand_int_movcc (rtx operands[])
17791 enum rtx_code code = GET_CODE (operands[1]), compare_code;
17792 rtx compare_seq, compare_op;
17793 enum machine_mode mode = GET_MODE (operands[0]);
17794 bool sign_bit_compare_p = false;
17795 rtx op0 = XEXP (operands[1], 0);
17796 rtx op1 = XEXP (operands[1], 1);
17799 compare_op = ix86_expand_compare (code, op0, op1);
17800 compare_seq = get_insns ();
17803 compare_code = GET_CODE (compare_op);
17805 if ((op1 == const0_rtx && (code == GE || code == LT))
17806 || (op1 == constm1_rtx && (code == GT || code == LE)))
17807 sign_bit_compare_p = true;
17809 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17810 HImode insns, we'd be swallowed in word prefix ops. */
17812 if ((mode != HImode || TARGET_FAST_PREFIX)
17813 && (mode != (TARGET_64BIT ? TImode : DImode))
17814 && CONST_INT_P (operands[2])
17815 && CONST_INT_P (operands[3]))
17817 rtx out = operands[0];
17818 HOST_WIDE_INT ct = INTVAL (operands[2]);
17819 HOST_WIDE_INT cf = INTVAL (operands[3]);
17820 HOST_WIDE_INT diff;
17823 /* Sign bit compares are better done using shifts than we do by using
17825 if (sign_bit_compare_p
17826 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17828 /* Detect overlap between destination and compare sources. */
17831 if (!sign_bit_compare_p)
17834 bool fpcmp = false;
17836 compare_code = GET_CODE (compare_op);
17838 flags = XEXP (compare_op, 0);
17840 if (GET_MODE (flags) == CCFPmode
17841 || GET_MODE (flags) == CCFPUmode)
17845 = ix86_fp_compare_code_to_integer (compare_code);
17848 /* To simplify rest of code, restrict to the GEU case. */
17849 if (compare_code == LTU)
17851 HOST_WIDE_INT tmp = ct;
17854 compare_code = reverse_condition (compare_code);
17855 code = reverse_condition (code);
17860 PUT_CODE (compare_op,
17861 reverse_condition_maybe_unordered
17862 (GET_CODE (compare_op)));
17864 PUT_CODE (compare_op,
17865 reverse_condition (GET_CODE (compare_op)));
17869 if (reg_overlap_mentioned_p (out, op0)
17870 || reg_overlap_mentioned_p (out, op1))
17871 tmp = gen_reg_rtx (mode);
17873 if (mode == DImode)
17874 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17876 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17877 flags, compare_op));
17881 if (code == GT || code == GE)
17882 code = reverse_condition (code);
17885 HOST_WIDE_INT tmp = ct;
17890 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17903 tmp = expand_simple_binop (mode, PLUS,
17905 copy_rtx (tmp), 1, OPTAB_DIRECT);
17916 tmp = expand_simple_binop (mode, IOR,
17918 copy_rtx (tmp), 1, OPTAB_DIRECT);
17920 else if (diff == -1 && ct)
17930 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17932 tmp = expand_simple_binop (mode, PLUS,
17933 copy_rtx (tmp), GEN_INT (cf),
17934 copy_rtx (tmp), 1, OPTAB_DIRECT);
17942 * andl cf - ct, dest
17952 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17955 tmp = expand_simple_binop (mode, AND,
17957 gen_int_mode (cf - ct, mode),
17958 copy_rtx (tmp), 1, OPTAB_DIRECT);
17960 tmp = expand_simple_binop (mode, PLUS,
17961 copy_rtx (tmp), GEN_INT (ct),
17962 copy_rtx (tmp), 1, OPTAB_DIRECT);
17965 if (!rtx_equal_p (tmp, out))
17966 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17973 enum machine_mode cmp_mode = GET_MODE (op0);
17976 tmp = ct, ct = cf, cf = tmp;
17979 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17981 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17983 /* We may be reversing unordered compare to normal compare, that
17984 is not valid in general (we may convert non-trapping condition
17985 to trapping one), however on i386 we currently emit all
17986 comparisons unordered. */
17987 compare_code = reverse_condition_maybe_unordered (compare_code);
17988 code = reverse_condition_maybe_unordered (code);
17992 compare_code = reverse_condition (compare_code);
17993 code = reverse_condition (code);
17997 compare_code = UNKNOWN;
17998 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
17999 && CONST_INT_P (op1))
18001 if (op1 == const0_rtx
18002 && (code == LT || code == GE))
18003 compare_code = code;
18004 else if (op1 == constm1_rtx)
18008 else if (code == GT)
18013 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18014 if (compare_code != UNKNOWN
18015 && GET_MODE (op0) == GET_MODE (out)
18016 && (cf == -1 || ct == -1))
18018 /* If lea code below could be used, only optimize
18019 if it results in a 2 insn sequence. */
18021 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18022 || diff == 3 || diff == 5 || diff == 9)
18023 || (compare_code == LT && ct == -1)
18024 || (compare_code == GE && cf == -1))
18027 * notl op1 (if necessary)
18035 code = reverse_condition (code);
18038 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18040 out = expand_simple_binop (mode, IOR,
18042 out, 1, OPTAB_DIRECT);
18043 if (out != operands[0])
18044 emit_move_insn (operands[0], out);
18051 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18052 || diff == 3 || diff == 5 || diff == 9)
18053 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18055 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18061 * lea cf(dest*(ct-cf)),dest
18065 * This also catches the degenerate setcc-only case.
18071 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18074 /* On x86_64 the lea instruction operates on Pmode, so we need
18075 to get arithmetics done in proper mode to match. */
18077 tmp = copy_rtx (out);
18081 out1 = copy_rtx (out);
18082 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18086 tmp = gen_rtx_PLUS (mode, tmp, out1);
18092 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18095 if (!rtx_equal_p (tmp, out))
18098 out = force_operand (tmp, copy_rtx (out));
18100 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18102 if (!rtx_equal_p (out, operands[0]))
18103 emit_move_insn (operands[0], copy_rtx (out));
18109 * General case: Jumpful:
18110 * xorl dest,dest cmpl op1, op2
18111 * cmpl op1, op2 movl ct, dest
18112 * setcc dest jcc 1f
18113 * decl dest movl cf, dest
18114 * andl (cf-ct),dest 1:
18117 * Size 20. Size 14.
18119 * This is reasonably steep, but branch mispredict costs are
18120 * high on modern cpus, so consider failing only if optimizing
18124 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18125 && BRANCH_COST (optimize_insn_for_speed_p (),
18130 enum machine_mode cmp_mode = GET_MODE (op0);
18135 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18137 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18139 /* We may be reversing unordered compare to normal compare,
18140 that is not valid in general (we may convert non-trapping
18141 condition to trapping one), however on i386 we currently
18142 emit all comparisons unordered. */
18143 code = reverse_condition_maybe_unordered (code);
18147 code = reverse_condition (code);
18148 if (compare_code != UNKNOWN)
18149 compare_code = reverse_condition (compare_code);
18153 if (compare_code != UNKNOWN)
18155 /* notl op1 (if needed)
18160 For x < 0 (resp. x <= -1) there will be no notl,
18161 so if possible swap the constants to get rid of the
18163 True/false will be -1/0 while code below (store flag
18164 followed by decrement) is 0/-1, so the constants need
18165 to be exchanged once more. */
18167 if (compare_code == GE || !cf)
18169 code = reverse_condition (code);
18174 HOST_WIDE_INT tmp = cf;
18179 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18183 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18185 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18187 copy_rtx (out), 1, OPTAB_DIRECT);
18190 out = expand_simple_binop (mode, AND, copy_rtx (out),
18191 gen_int_mode (cf - ct, mode),
18192 copy_rtx (out), 1, OPTAB_DIRECT);
18194 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18195 copy_rtx (out), 1, OPTAB_DIRECT);
18196 if (!rtx_equal_p (out, operands[0]))
18197 emit_move_insn (operands[0], copy_rtx (out));
18203 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18205 /* Try a few things more with specific constants and a variable. */
18208 rtx var, orig_out, out, tmp;
18210 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18213 /* If one of the two operands is an interesting constant, load a
18214 constant with the above and mask it in with a logical operation. */
18216 if (CONST_INT_P (operands[2]))
18219 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18220 operands[3] = constm1_rtx, op = and_optab;
18221 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18222 operands[3] = const0_rtx, op = ior_optab;
18226 else if (CONST_INT_P (operands[3]))
18229 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18230 operands[2] = constm1_rtx, op = and_optab;
18231 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18232 operands[2] = const0_rtx, op = ior_optab;
18239 orig_out = operands[0];
18240 tmp = gen_reg_rtx (mode);
18243 /* Recurse to get the constant loaded. */
18244 if (ix86_expand_int_movcc (operands) == 0)
18247 /* Mask in the interesting variable. */
18248 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18250 if (!rtx_equal_p (out, orig_out))
18251 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18257 * For comparison with above,
18267 if (! nonimmediate_operand (operands[2], mode))
18268 operands[2] = force_reg (mode, operands[2]);
18269 if (! nonimmediate_operand (operands[3], mode))
18270 operands[3] = force_reg (mode, operands[3]);
18272 if (! register_operand (operands[2], VOIDmode)
18274 || ! register_operand (operands[3], VOIDmode)))
18275 operands[2] = force_reg (mode, operands[2]);
18278 && ! register_operand (operands[3], VOIDmode))
18279 operands[3] = force_reg (mode, operands[3]);
18281 emit_insn (compare_seq);
18282 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18283 gen_rtx_IF_THEN_ELSE (mode,
18284 compare_op, operands[2],
18289 /* Swap, force into registers, or otherwise massage the two operands
18290 to an sse comparison with a mask result. Thus we differ a bit from
18291 ix86_prepare_fp_compare_args which expects to produce a flags result.
18293 The DEST operand exists to help determine whether to commute commutative
18294 operators. The POP0/POP1 operands are updated in place. The new
18295 comparison code is returned, or UNKNOWN if not implementable. */
18297 static enum rtx_code
18298 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18299 rtx *pop0, rtx *pop1)
18307 /* We have no LTGT as an operator. We could implement it with
18308 NE & ORDERED, but this requires an extra temporary. It's
18309 not clear that it's worth it. */
18316 /* These are supported directly. */
18323 /* For commutative operators, try to canonicalize the destination
18324 operand to be first in the comparison - this helps reload to
18325 avoid extra moves. */
18326 if (!dest || !rtx_equal_p (dest, *pop1))
18334 /* These are not supported directly. Swap the comparison operands
18335 to transform into something that is supported. */
18339 code = swap_condition (code);
18343 gcc_unreachable ();
18349 /* Detect conditional moves that exactly match min/max operational
18350 semantics. Note that this is IEEE safe, as long as we don't
18351 interchange the operands.
18353 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18354 and TRUE if the operation is successful and instructions are emitted. */
18357 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18358 rtx cmp_op1, rtx if_true, rtx if_false)
18360 enum machine_mode mode;
18366 else if (code == UNGE)
18369 if_true = if_false;
18375 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18377 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18382 mode = GET_MODE (dest);
18384 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18385 but MODE may be a vector mode and thus not appropriate. */
18386 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18388 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18391 if_true = force_reg (mode, if_true);
18392 v = gen_rtvec (2, if_true, if_false);
18393 tmp = gen_rtx_UNSPEC (mode, v, u);
18397 code = is_min ? SMIN : SMAX;
18398 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18401 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18405 /* Expand an sse vector comparison. Return the register with the result. */
18408 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18409 rtx op_true, rtx op_false)
18411 enum machine_mode mode = GET_MODE (dest);
18414 cmp_op0 = force_reg (mode, cmp_op0);
18415 if (!nonimmediate_operand (cmp_op1, mode))
18416 cmp_op1 = force_reg (mode, cmp_op1);
18419 || reg_overlap_mentioned_p (dest, op_true)
18420 || reg_overlap_mentioned_p (dest, op_false))
18421 dest = gen_reg_rtx (mode);
18423 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18424 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18429 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18430 operations. This is used for both scalar and vector conditional moves. */
18433 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18435 enum machine_mode mode = GET_MODE (dest);
18438 if (op_false == CONST0_RTX (mode))
18440 op_true = force_reg (mode, op_true);
18441 x = gen_rtx_AND (mode, cmp, op_true);
18442 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18444 else if (op_true == CONST0_RTX (mode))
18446 op_false = force_reg (mode, op_false);
18447 x = gen_rtx_NOT (mode, cmp);
18448 x = gen_rtx_AND (mode, x, op_false);
18449 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18451 else if (TARGET_XOP)
18453 rtx pcmov = gen_rtx_SET (mode, dest,
18454 gen_rtx_IF_THEN_ELSE (mode, cmp,
18461 op_true = force_reg (mode, op_true);
18462 op_false = force_reg (mode, op_false);
18464 t2 = gen_reg_rtx (mode);
18466 t3 = gen_reg_rtx (mode);
18470 x = gen_rtx_AND (mode, op_true, cmp);
18471 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18473 x = gen_rtx_NOT (mode, cmp);
18474 x = gen_rtx_AND (mode, x, op_false);
18475 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18477 x = gen_rtx_IOR (mode, t3, t2);
18478 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18482 /* Expand a floating-point conditional move. Return true if successful. */
18485 ix86_expand_fp_movcc (rtx operands[])
18487 enum machine_mode mode = GET_MODE (operands[0]);
18488 enum rtx_code code = GET_CODE (operands[1]);
18489 rtx tmp, compare_op;
18490 rtx op0 = XEXP (operands[1], 0);
18491 rtx op1 = XEXP (operands[1], 1);
18493 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18495 enum machine_mode cmode;
18497 /* Since we've no cmove for sse registers, don't force bad register
18498 allocation just to gain access to it. Deny movcc when the
18499 comparison mode doesn't match the move mode. */
18500 cmode = GET_MODE (op0);
18501 if (cmode == VOIDmode)
18502 cmode = GET_MODE (op1);
18506 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18507 if (code == UNKNOWN)
18510 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18511 operands[2], operands[3]))
18514 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18515 operands[2], operands[3]);
18516 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18520 /* The floating point conditional move instructions don't directly
18521 support conditions resulting from a signed integer comparison. */
18523 compare_op = ix86_expand_compare (code, op0, op1);
18524 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18526 tmp = gen_reg_rtx (QImode);
18527 ix86_expand_setcc (tmp, code, op0, op1);
18529 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18532 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18533 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18534 operands[2], operands[3])));
18539 /* Expand a floating-point vector conditional move; a vcond operation
18540 rather than a movcc operation. */
18543 ix86_expand_fp_vcond (rtx operands[])
18545 enum rtx_code code = GET_CODE (operands[3]);
18548 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18549 &operands[4], &operands[5]);
18550 if (code == UNKNOWN)
18553 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18554 operands[5], operands[1], operands[2]))
18557 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18558 operands[1], operands[2]);
18559 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18563 /* Expand a signed/unsigned integral vector conditional move. */
18566 ix86_expand_int_vcond (rtx operands[])
18568 enum machine_mode mode = GET_MODE (operands[0]);
18569 enum rtx_code code = GET_CODE (operands[3]);
18570 bool negate = false;
18573 cop0 = operands[4];
18574 cop1 = operands[5];
18576 /* XOP supports all of the comparisons on all vector int types. */
18579 /* Canonicalize the comparison to EQ, GT, GTU. */
18590 code = reverse_condition (code);
18596 code = reverse_condition (code);
18602 code = swap_condition (code);
18603 x = cop0, cop0 = cop1, cop1 = x;
18607 gcc_unreachable ();
18610 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18611 if (mode == V2DImode)
18616 /* SSE4.1 supports EQ. */
18617 if (!TARGET_SSE4_1)
18623 /* SSE4.2 supports GT/GTU. */
18624 if (!TARGET_SSE4_2)
18629 gcc_unreachable ();
18633 /* Unsigned parallel compare is not supported by the hardware.
18634 Play some tricks to turn this into a signed comparison
18638 cop0 = force_reg (mode, cop0);
18646 rtx (*gen_sub3) (rtx, rtx, rtx);
18648 /* Subtract (-(INT MAX) - 1) from both operands to make
18650 mask = ix86_build_signbit_mask (mode, true, false);
18651 gen_sub3 = (mode == V4SImode
18652 ? gen_subv4si3 : gen_subv2di3);
18653 t1 = gen_reg_rtx (mode);
18654 emit_insn (gen_sub3 (t1, cop0, mask));
18656 t2 = gen_reg_rtx (mode);
18657 emit_insn (gen_sub3 (t2, cop1, mask));
18667 /* Perform a parallel unsigned saturating subtraction. */
18668 x = gen_reg_rtx (mode);
18669 emit_insn (gen_rtx_SET (VOIDmode, x,
18670 gen_rtx_US_MINUS (mode, cop0, cop1)));
18673 cop1 = CONST0_RTX (mode);
18679 gcc_unreachable ();
18684 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
18685 operands[1+negate], operands[2-negate]);
18687 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
18688 operands[2-negate]);
18692 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18693 true if we should do zero extension, else sign extension. HIGH_P is
18694 true if we want the N/2 high elements, else the low elements. */
18697 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18699 enum machine_mode imode = GET_MODE (operands[1]);
18704 rtx (*unpack)(rtx, rtx);
18710 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
18712 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
18716 unpack = gen_sse4_1_zero_extendv4hiv4si2;
18718 unpack = gen_sse4_1_sign_extendv4hiv4si2;
18722 unpack = gen_sse4_1_zero_extendv2siv2di2;
18724 unpack = gen_sse4_1_sign_extendv2siv2di2;
18727 gcc_unreachable ();
18732 /* Shift higher 8 bytes to lower 8 bytes. */
18733 tmp = gen_reg_rtx (imode);
18734 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
18735 gen_lowpart (V1TImode, operands[1]),
18741 emit_insn (unpack (operands[0], tmp));
18745 rtx (*unpack)(rtx, rtx, rtx);
18751 unpack = gen_vec_interleave_highv16qi;
18753 unpack = gen_vec_interleave_lowv16qi;
18757 unpack = gen_vec_interleave_highv8hi;
18759 unpack = gen_vec_interleave_lowv8hi;
18763 unpack = gen_vec_interleave_highv4si;
18765 unpack = gen_vec_interleave_lowv4si;
18768 gcc_unreachable ();
18771 dest = gen_lowpart (imode, operands[0]);
18774 tmp = force_reg (imode, CONST0_RTX (imode));
18776 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
18777 operands[1], pc_rtx, pc_rtx);
18779 emit_insn (unpack (dest, operands[1], tmp));
18783 /* Expand conditional increment or decrement using adb/sbb instructions.
18784 The default case using setcc followed by the conditional move can be
18785 done by generic code. */
18787 ix86_expand_int_addcc (rtx operands[])
18789 enum rtx_code code = GET_CODE (operands[1]);
18791 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18793 rtx val = const0_rtx;
18794 bool fpcmp = false;
18795 enum machine_mode mode;
18796 rtx op0 = XEXP (operands[1], 0);
18797 rtx op1 = XEXP (operands[1], 1);
18799 if (operands[3] != const1_rtx
18800 && operands[3] != constm1_rtx)
18802 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18804 code = GET_CODE (compare_op);
18806 flags = XEXP (compare_op, 0);
18808 if (GET_MODE (flags) == CCFPmode
18809 || GET_MODE (flags) == CCFPUmode)
18812 code = ix86_fp_compare_code_to_integer (code);
18819 PUT_CODE (compare_op,
18820 reverse_condition_maybe_unordered
18821 (GET_CODE (compare_op)));
18823 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18826 mode = GET_MODE (operands[0]);
18828 /* Construct either adc or sbb insn. */
18829 if ((code == LTU) == (operands[3] == constm1_rtx))
18834 insn = gen_subqi3_carry;
18837 insn = gen_subhi3_carry;
18840 insn = gen_subsi3_carry;
18843 insn = gen_subdi3_carry;
18846 gcc_unreachable ();
18854 insn = gen_addqi3_carry;
18857 insn = gen_addhi3_carry;
18860 insn = gen_addsi3_carry;
18863 insn = gen_adddi3_carry;
18866 gcc_unreachable ();
18869 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18875 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18876 but works for floating pointer parameters and nonoffsetable memories.
18877 For pushes, it returns just stack offsets; the values will be saved
18878 in the right order. Maximally three parts are generated. */
18881 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18886 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18888 size = (GET_MODE_SIZE (mode) + 4) / 8;
18890 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18891 gcc_assert (size >= 2 && size <= 4);
18893 /* Optimize constant pool reference to immediates. This is used by fp
18894 moves, that force all constants to memory to allow combining. */
18895 if (MEM_P (operand) && MEM_READONLY_P (operand))
18897 rtx tmp = maybe_get_pool_constant (operand);
18902 if (MEM_P (operand) && !offsettable_memref_p (operand))
18904 /* The only non-offsetable memories we handle are pushes. */
18905 int ok = push_operand (operand, VOIDmode);
18909 operand = copy_rtx (operand);
18910 PUT_MODE (operand, Pmode);
18911 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18915 if (GET_CODE (operand) == CONST_VECTOR)
18917 enum machine_mode imode = int_mode_for_mode (mode);
18918 /* Caution: if we looked through a constant pool memory above,
18919 the operand may actually have a different mode now. That's
18920 ok, since we want to pun this all the way back to an integer. */
18921 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18922 gcc_assert (operand != NULL);
18928 if (mode == DImode)
18929 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18934 if (REG_P (operand))
18936 gcc_assert (reload_completed);
18937 for (i = 0; i < size; i++)
18938 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18940 else if (offsettable_memref_p (operand))
18942 operand = adjust_address (operand, SImode, 0);
18943 parts[0] = operand;
18944 for (i = 1; i < size; i++)
18945 parts[i] = adjust_address (operand, SImode, 4 * i);
18947 else if (GET_CODE (operand) == CONST_DOUBLE)
18952 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18956 real_to_target (l, &r, mode);
18957 parts[3] = gen_int_mode (l[3], SImode);
18958 parts[2] = gen_int_mode (l[2], SImode);
18961 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18962 parts[2] = gen_int_mode (l[2], SImode);
18965 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18968 gcc_unreachable ();
18970 parts[1] = gen_int_mode (l[1], SImode);
18971 parts[0] = gen_int_mode (l[0], SImode);
18974 gcc_unreachable ();
18979 if (mode == TImode)
18980 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18981 if (mode == XFmode || mode == TFmode)
18983 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
18984 if (REG_P (operand))
18986 gcc_assert (reload_completed);
18987 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
18988 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
18990 else if (offsettable_memref_p (operand))
18992 operand = adjust_address (operand, DImode, 0);
18993 parts[0] = operand;
18994 parts[1] = adjust_address (operand, upper_mode, 8);
18996 else if (GET_CODE (operand) == CONST_DOUBLE)
19001 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19002 real_to_target (l, &r, mode);
19004 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19005 if (HOST_BITS_PER_WIDE_INT >= 64)
19008 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
19009 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
19012 parts[0] = immed_double_const (l[0], l[1], DImode);
19014 if (upper_mode == SImode)
19015 parts[1] = gen_int_mode (l[2], SImode);
19016 else if (HOST_BITS_PER_WIDE_INT >= 64)
19019 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
19020 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
19023 parts[1] = immed_double_const (l[2], l[3], DImode);
19026 gcc_unreachable ();
19033 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19034 Return false when normal moves are needed; true when all required
19035 insns have been emitted. Operands 2-4 contain the input values
19036 int the correct order; operands 5-7 contain the output values. */
19039 ix86_split_long_move (rtx operands[])
19044 int collisions = 0;
19045 enum machine_mode mode = GET_MODE (operands[0]);
19046 bool collisionparts[4];
19048 /* The DFmode expanders may ask us to move double.
19049 For 64bit target this is single move. By hiding the fact
19050 here we simplify i386.md splitters. */
19051 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19053 /* Optimize constant pool reference to immediates. This is used by
19054 fp moves, that force all constants to memory to allow combining. */
19056 if (MEM_P (operands[1])
19057 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19058 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19059 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19060 if (push_operand (operands[0], VOIDmode))
19062 operands[0] = copy_rtx (operands[0]);
19063 PUT_MODE (operands[0], Pmode);
19066 operands[0] = gen_lowpart (DImode, operands[0]);
19067 operands[1] = gen_lowpart (DImode, operands[1]);
19068 emit_move_insn (operands[0], operands[1]);
19072 /* The only non-offsettable memory we handle is push. */
19073 if (push_operand (operands[0], VOIDmode))
19076 gcc_assert (!MEM_P (operands[0])
19077 || offsettable_memref_p (operands[0]));
19079 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19080 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19082 /* When emitting push, take care for source operands on the stack. */
19083 if (push && MEM_P (operands[1])
19084 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19086 rtx src_base = XEXP (part[1][nparts - 1], 0);
19088 /* Compensate for the stack decrement by 4. */
19089 if (!TARGET_64BIT && nparts == 3
19090 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19091 src_base = plus_constant (src_base, 4);
19093 /* src_base refers to the stack pointer and is
19094 automatically decreased by emitted push. */
19095 for (i = 0; i < nparts; i++)
19096 part[1][i] = change_address (part[1][i],
19097 GET_MODE (part[1][i]), src_base);
19100 /* We need to do copy in the right order in case an address register
19101 of the source overlaps the destination. */
19102 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19106 for (i = 0; i < nparts; i++)
19109 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19110 if (collisionparts[i])
19114 /* Collision in the middle part can be handled by reordering. */
19115 if (collisions == 1 && nparts == 3 && collisionparts [1])
19117 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19118 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19120 else if (collisions == 1
19122 && (collisionparts [1] || collisionparts [2]))
19124 if (collisionparts [1])
19126 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19127 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19131 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19132 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19136 /* If there are more collisions, we can't handle it by reordering.
19137 Do an lea to the last part and use only one colliding move. */
19138 else if (collisions > 1)
19144 base = part[0][nparts - 1];
19146 /* Handle the case when the last part isn't valid for lea.
19147 Happens in 64-bit mode storing the 12-byte XFmode. */
19148 if (GET_MODE (base) != Pmode)
19149 base = gen_rtx_REG (Pmode, REGNO (base));
19151 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19152 part[1][0] = replace_equiv_address (part[1][0], base);
19153 for (i = 1; i < nparts; i++)
19155 tmp = plus_constant (base, UNITS_PER_WORD * i);
19156 part[1][i] = replace_equiv_address (part[1][i], tmp);
19167 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19168 emit_insn (gen_addsi3 (stack_pointer_rtx,
19169 stack_pointer_rtx, GEN_INT (-4)));
19170 emit_move_insn (part[0][2], part[1][2]);
19172 else if (nparts == 4)
19174 emit_move_insn (part[0][3], part[1][3]);
19175 emit_move_insn (part[0][2], part[1][2]);
19180 /* In 64bit mode we don't have 32bit push available. In case this is
19181 register, it is OK - we will just use larger counterpart. We also
19182 retype memory - these comes from attempt to avoid REX prefix on
19183 moving of second half of TFmode value. */
19184 if (GET_MODE (part[1][1]) == SImode)
19186 switch (GET_CODE (part[1][1]))
19189 part[1][1] = adjust_address (part[1][1], DImode, 0);
19193 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19197 gcc_unreachable ();
19200 if (GET_MODE (part[1][0]) == SImode)
19201 part[1][0] = part[1][1];
19204 emit_move_insn (part[0][1], part[1][1]);
19205 emit_move_insn (part[0][0], part[1][0]);
19209 /* Choose correct order to not overwrite the source before it is copied. */
19210 if ((REG_P (part[0][0])
19211 && REG_P (part[1][1])
19212 && (REGNO (part[0][0]) == REGNO (part[1][1])
19214 && REGNO (part[0][0]) == REGNO (part[1][2]))
19216 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19218 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19220 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19222 operands[2 + i] = part[0][j];
19223 operands[6 + i] = part[1][j];
19228 for (i = 0; i < nparts; i++)
19230 operands[2 + i] = part[0][i];
19231 operands[6 + i] = part[1][i];
19235 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19236 if (optimize_insn_for_size_p ())
19238 for (j = 0; j < nparts - 1; j++)
19239 if (CONST_INT_P (operands[6 + j])
19240 && operands[6 + j] != const0_rtx
19241 && REG_P (operands[2 + j]))
19242 for (i = j; i < nparts - 1; i++)
19243 if (CONST_INT_P (operands[7 + i])
19244 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19245 operands[7 + i] = operands[2 + j];
19248 for (i = 0; i < nparts; i++)
19249 emit_move_insn (operands[2 + i], operands[6 + i]);
19254 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19255 left shift by a constant, either using a single shift or
19256 a sequence of add instructions. */
19259 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19261 rtx (*insn)(rtx, rtx, rtx);
19264 || (count * ix86_cost->add <= ix86_cost->shift_const
19265 && !optimize_insn_for_size_p ()))
19267 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19268 while (count-- > 0)
19269 emit_insn (insn (operand, operand, operand));
19273 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19274 emit_insn (insn (operand, operand, GEN_INT (count)));
19279 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19281 rtx (*gen_ashl3)(rtx, rtx, rtx);
19282 rtx (*gen_shld)(rtx, rtx, rtx);
19283 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19285 rtx low[2], high[2];
19288 if (CONST_INT_P (operands[2]))
19290 split_double_mode (mode, operands, 2, low, high);
19291 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19293 if (count >= half_width)
19295 emit_move_insn (high[0], low[1]);
19296 emit_move_insn (low[0], const0_rtx);
19298 if (count > half_width)
19299 ix86_expand_ashl_const (high[0], count - half_width, mode);
19303 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19305 if (!rtx_equal_p (operands[0], operands[1]))
19306 emit_move_insn (operands[0], operands[1]);
19308 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19309 ix86_expand_ashl_const (low[0], count, mode);
19314 split_double_mode (mode, operands, 1, low, high);
19316 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19318 if (operands[1] == const1_rtx)
19320 /* Assuming we've chosen a QImode capable registers, then 1 << N
19321 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19322 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19324 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19326 ix86_expand_clear (low[0]);
19327 ix86_expand_clear (high[0]);
19328 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19330 d = gen_lowpart (QImode, low[0]);
19331 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19332 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19333 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19335 d = gen_lowpart (QImode, high[0]);
19336 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19337 s = gen_rtx_NE (QImode, flags, const0_rtx);
19338 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19341 /* Otherwise, we can get the same results by manually performing
19342 a bit extract operation on bit 5/6, and then performing the two
19343 shifts. The two methods of getting 0/1 into low/high are exactly
19344 the same size. Avoiding the shift in the bit extract case helps
19345 pentium4 a bit; no one else seems to care much either way. */
19348 enum machine_mode half_mode;
19349 rtx (*gen_lshr3)(rtx, rtx, rtx);
19350 rtx (*gen_and3)(rtx, rtx, rtx);
19351 rtx (*gen_xor3)(rtx, rtx, rtx);
19352 HOST_WIDE_INT bits;
19355 if (mode == DImode)
19357 half_mode = SImode;
19358 gen_lshr3 = gen_lshrsi3;
19359 gen_and3 = gen_andsi3;
19360 gen_xor3 = gen_xorsi3;
19365 half_mode = DImode;
19366 gen_lshr3 = gen_lshrdi3;
19367 gen_and3 = gen_anddi3;
19368 gen_xor3 = gen_xordi3;
19372 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19373 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19375 x = gen_lowpart (half_mode, operands[2]);
19376 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19378 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19379 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19380 emit_move_insn (low[0], high[0]);
19381 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19384 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19385 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19389 if (operands[1] == constm1_rtx)
19391 /* For -1 << N, we can avoid the shld instruction, because we
19392 know that we're shifting 0...31/63 ones into a -1. */
19393 emit_move_insn (low[0], constm1_rtx);
19394 if (optimize_insn_for_size_p ())
19395 emit_move_insn (high[0], low[0]);
19397 emit_move_insn (high[0], constm1_rtx);
19401 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19403 if (!rtx_equal_p (operands[0], operands[1]))
19404 emit_move_insn (operands[0], operands[1]);
19406 split_double_mode (mode, operands, 1, low, high);
19407 emit_insn (gen_shld (high[0], low[0], operands[2]));
19410 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19412 if (TARGET_CMOVE && scratch)
19414 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19415 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19417 ix86_expand_clear (scratch);
19418 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19422 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19423 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19425 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19430 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19432 rtx (*gen_ashr3)(rtx, rtx, rtx)
19433 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19434 rtx (*gen_shrd)(rtx, rtx, rtx);
19435 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19437 rtx low[2], high[2];
19440 if (CONST_INT_P (operands[2]))
19442 split_double_mode (mode, operands, 2, low, high);
19443 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19445 if (count == GET_MODE_BITSIZE (mode) - 1)
19447 emit_move_insn (high[0], high[1]);
19448 emit_insn (gen_ashr3 (high[0], high[0],
19449 GEN_INT (half_width - 1)));
19450 emit_move_insn (low[0], high[0]);
19453 else if (count >= half_width)
19455 emit_move_insn (low[0], high[1]);
19456 emit_move_insn (high[0], low[0]);
19457 emit_insn (gen_ashr3 (high[0], high[0],
19458 GEN_INT (half_width - 1)));
19460 if (count > half_width)
19461 emit_insn (gen_ashr3 (low[0], low[0],
19462 GEN_INT (count - half_width)));
19466 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19468 if (!rtx_equal_p (operands[0], operands[1]))
19469 emit_move_insn (operands[0], operands[1]);
19471 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19472 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19477 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19479 if (!rtx_equal_p (operands[0], operands[1]))
19480 emit_move_insn (operands[0], operands[1]);
19482 split_double_mode (mode, operands, 1, low, high);
19484 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19485 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19487 if (TARGET_CMOVE && scratch)
19489 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19490 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19492 emit_move_insn (scratch, high[0]);
19493 emit_insn (gen_ashr3 (scratch, scratch,
19494 GEN_INT (half_width - 1)));
19495 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19500 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19501 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19503 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19509 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19511 rtx (*gen_lshr3)(rtx, rtx, rtx)
19512 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19513 rtx (*gen_shrd)(rtx, rtx, rtx);
19514 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19516 rtx low[2], high[2];
19519 if (CONST_INT_P (operands[2]))
19521 split_double_mode (mode, operands, 2, low, high);
19522 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19524 if (count >= half_width)
19526 emit_move_insn (low[0], high[1]);
19527 ix86_expand_clear (high[0]);
19529 if (count > half_width)
19530 emit_insn (gen_lshr3 (low[0], low[0],
19531 GEN_INT (count - half_width)));
19535 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19537 if (!rtx_equal_p (operands[0], operands[1]))
19538 emit_move_insn (operands[0], operands[1]);
19540 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19541 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19546 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19548 if (!rtx_equal_p (operands[0], operands[1]))
19549 emit_move_insn (operands[0], operands[1]);
19551 split_double_mode (mode, operands, 1, low, high);
19553 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19554 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19556 if (TARGET_CMOVE && scratch)
19558 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19559 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19561 ix86_expand_clear (scratch);
19562 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19567 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19568 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19570 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19575 /* Predict just emitted jump instruction to be taken with probability PROB. */
19577 predict_jump (int prob)
19579 rtx insn = get_last_insn ();
19580 gcc_assert (JUMP_P (insn));
19581 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19584 /* Helper function for the string operations below. Dest VARIABLE whether
19585 it is aligned to VALUE bytes. If true, jump to the label. */
19587 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19589 rtx label = gen_label_rtx ();
19590 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19591 if (GET_MODE (variable) == DImode)
19592 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19594 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19595 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19598 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19600 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19604 /* Adjust COUNTER by the VALUE. */
19606 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
19608 rtx (*gen_add)(rtx, rtx, rtx)
19609 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
19611 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
19614 /* Zero extend possibly SImode EXP to Pmode register. */
19616 ix86_zero_extend_to_Pmode (rtx exp)
19619 if (GET_MODE (exp) == VOIDmode)
19620 return force_reg (Pmode, exp);
19621 if (GET_MODE (exp) == Pmode)
19622 return copy_to_mode_reg (Pmode, exp);
19623 r = gen_reg_rtx (Pmode);
19624 emit_insn (gen_zero_extendsidi2 (r, exp));
19628 /* Divide COUNTREG by SCALE. */
19630 scale_counter (rtx countreg, int scale)
19636 if (CONST_INT_P (countreg))
19637 return GEN_INT (INTVAL (countreg) / scale);
19638 gcc_assert (REG_P (countreg));
19640 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
19641 GEN_INT (exact_log2 (scale)),
19642 NULL, 1, OPTAB_DIRECT);
19646 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19647 DImode for constant loop counts. */
19649 static enum machine_mode
19650 counter_mode (rtx count_exp)
19652 if (GET_MODE (count_exp) != VOIDmode)
19653 return GET_MODE (count_exp);
19654 if (!CONST_INT_P (count_exp))
19656 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
19661 /* When SRCPTR is non-NULL, output simple loop to move memory
19662 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19663 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19664 equivalent loop to set memory by VALUE (supposed to be in MODE).
19666 The size is rounded down to whole number of chunk size moved at once.
19667 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19671 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
19672 rtx destptr, rtx srcptr, rtx value,
19673 rtx count, enum machine_mode mode, int unroll,
19676 rtx out_label, top_label, iter, tmp;
19677 enum machine_mode iter_mode = counter_mode (count);
19678 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
19679 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
19685 top_label = gen_label_rtx ();
19686 out_label = gen_label_rtx ();
19687 iter = gen_reg_rtx (iter_mode);
19689 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
19690 NULL, 1, OPTAB_DIRECT);
19691 /* Those two should combine. */
19692 if (piece_size == const1_rtx)
19694 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
19696 predict_jump (REG_BR_PROB_BASE * 10 / 100);
19698 emit_move_insn (iter, const0_rtx);
19700 emit_label (top_label);
19702 tmp = convert_modes (Pmode, iter_mode, iter, true);
19703 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
19704 destmem = change_address (destmem, mode, x_addr);
19708 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
19709 srcmem = change_address (srcmem, mode, y_addr);
19711 /* When unrolling for chips that reorder memory reads and writes,
19712 we can save registers by using single temporary.
19713 Also using 4 temporaries is overkill in 32bit mode. */
19714 if (!TARGET_64BIT && 0)
19716 for (i = 0; i < unroll; i++)
19721 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19723 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19725 emit_move_insn (destmem, srcmem);
19731 gcc_assert (unroll <= 4);
19732 for (i = 0; i < unroll; i++)
19734 tmpreg[i] = gen_reg_rtx (mode);
19738 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19740 emit_move_insn (tmpreg[i], srcmem);
19742 for (i = 0; i < unroll; i++)
19747 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19749 emit_move_insn (destmem, tmpreg[i]);
19754 for (i = 0; i < unroll; i++)
19758 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19759 emit_move_insn (destmem, value);
19762 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
19763 true, OPTAB_LIB_WIDEN);
19765 emit_move_insn (iter, tmp);
19767 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
19769 if (expected_size != -1)
19771 expected_size /= GET_MODE_SIZE (mode) * unroll;
19772 if (expected_size == 0)
19774 else if (expected_size > REG_BR_PROB_BASE)
19775 predict_jump (REG_BR_PROB_BASE - 1);
19777 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
19780 predict_jump (REG_BR_PROB_BASE * 80 / 100);
19781 iter = ix86_zero_extend_to_Pmode (iter);
19782 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
19783 true, OPTAB_LIB_WIDEN);
19784 if (tmp != destptr)
19785 emit_move_insn (destptr, tmp);
19788 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19789 true, OPTAB_LIB_WIDEN);
19791 emit_move_insn (srcptr, tmp);
19793 emit_label (out_label);
19796 /* Output "rep; mov" instruction.
19797 Arguments have same meaning as for previous function */
19799 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19800 rtx destptr, rtx srcptr,
19802 enum machine_mode mode)
19807 HOST_WIDE_INT rounded_count;
19809 /* If the size is known, it is shorter to use rep movs. */
19810 if (mode == QImode && CONST_INT_P (count)
19811 && !(INTVAL (count) & 3))
19814 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19815 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19816 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19817 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19818 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19819 if (mode != QImode)
19821 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19822 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19823 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19824 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19825 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19826 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19830 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19831 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19833 if (CONST_INT_P (count))
19835 rounded_count = (INTVAL (count)
19836 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19837 destmem = shallow_copy_rtx (destmem);
19838 srcmem = shallow_copy_rtx (srcmem);
19839 set_mem_size (destmem, rounded_count);
19840 set_mem_size (srcmem, rounded_count);
19844 if (MEM_SIZE_KNOWN_P (destmem))
19845 clear_mem_size (destmem);
19846 if (MEM_SIZE_KNOWN_P (srcmem))
19847 clear_mem_size (srcmem);
19849 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19853 /* Output "rep; stos" instruction.
19854 Arguments have same meaning as for previous function */
19856 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19857 rtx count, enum machine_mode mode,
19862 HOST_WIDE_INT rounded_count;
19864 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19865 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19866 value = force_reg (mode, gen_lowpart (mode, value));
19867 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19868 if (mode != QImode)
19870 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19871 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19872 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19875 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19876 if (orig_value == const0_rtx && CONST_INT_P (count))
19878 rounded_count = (INTVAL (count)
19879 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19880 destmem = shallow_copy_rtx (destmem);
19881 set_mem_size (destmem, rounded_count);
19883 else if (MEM_SIZE_KNOWN_P (destmem))
19884 clear_mem_size (destmem);
19885 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19889 emit_strmov (rtx destmem, rtx srcmem,
19890 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19892 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19893 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19894 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19897 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19899 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19900 rtx destptr, rtx srcptr, rtx count, int max_size)
19903 if (CONST_INT_P (count))
19905 HOST_WIDE_INT countval = INTVAL (count);
19908 if ((countval & 0x10) && max_size > 16)
19912 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19913 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19916 gcc_unreachable ();
19919 if ((countval & 0x08) && max_size > 8)
19922 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19925 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19926 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19930 if ((countval & 0x04) && max_size > 4)
19932 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19935 if ((countval & 0x02) && max_size > 2)
19937 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19940 if ((countval & 0x01) && max_size > 1)
19942 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19949 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19950 count, 1, OPTAB_DIRECT);
19951 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19952 count, QImode, 1, 4);
19956 /* When there are stringops, we can cheaply increase dest and src pointers.
19957 Otherwise we save code size by maintaining offset (zero is readily
19958 available from preceding rep operation) and using x86 addressing modes.
19960 if (TARGET_SINGLE_STRINGOP)
19964 rtx label = ix86_expand_aligntest (count, 4, true);
19965 src = change_address (srcmem, SImode, srcptr);
19966 dest = change_address (destmem, SImode, destptr);
19967 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19968 emit_label (label);
19969 LABEL_NUSES (label) = 1;
19973 rtx label = ix86_expand_aligntest (count, 2, true);
19974 src = change_address (srcmem, HImode, srcptr);
19975 dest = change_address (destmem, HImode, destptr);
19976 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19977 emit_label (label);
19978 LABEL_NUSES (label) = 1;
19982 rtx label = ix86_expand_aligntest (count, 1, true);
19983 src = change_address (srcmem, QImode, srcptr);
19984 dest = change_address (destmem, QImode, destptr);
19985 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19986 emit_label (label);
19987 LABEL_NUSES (label) = 1;
19992 rtx offset = force_reg (Pmode, const0_rtx);
19997 rtx label = ix86_expand_aligntest (count, 4, true);
19998 src = change_address (srcmem, SImode, srcptr);
19999 dest = change_address (destmem, SImode, destptr);
20000 emit_move_insn (dest, src);
20001 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
20002 true, OPTAB_LIB_WIDEN);
20004 emit_move_insn (offset, tmp);
20005 emit_label (label);
20006 LABEL_NUSES (label) = 1;
20010 rtx label = ix86_expand_aligntest (count, 2, true);
20011 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20012 src = change_address (srcmem, HImode, tmp);
20013 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20014 dest = change_address (destmem, HImode, tmp);
20015 emit_move_insn (dest, src);
20016 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
20017 true, OPTAB_LIB_WIDEN);
20019 emit_move_insn (offset, tmp);
20020 emit_label (label);
20021 LABEL_NUSES (label) = 1;
20025 rtx label = ix86_expand_aligntest (count, 1, true);
20026 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20027 src = change_address (srcmem, QImode, tmp);
20028 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20029 dest = change_address (destmem, QImode, tmp);
20030 emit_move_insn (dest, src);
20031 emit_label (label);
20032 LABEL_NUSES (label) = 1;
20037 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20039 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
20040 rtx count, int max_size)
20043 expand_simple_binop (counter_mode (count), AND, count,
20044 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
20045 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
20046 gen_lowpart (QImode, value), count, QImode,
20050 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20052 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20056 if (CONST_INT_P (count))
20058 HOST_WIDE_INT countval = INTVAL (count);
20061 if ((countval & 0x10) && max_size > 16)
20065 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20066 emit_insn (gen_strset (destptr, dest, value));
20067 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20068 emit_insn (gen_strset (destptr, dest, value));
20071 gcc_unreachable ();
20074 if ((countval & 0x08) && max_size > 8)
20078 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20079 emit_insn (gen_strset (destptr, dest, value));
20083 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20084 emit_insn (gen_strset (destptr, dest, value));
20085 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20086 emit_insn (gen_strset (destptr, dest, value));
20090 if ((countval & 0x04) && max_size > 4)
20092 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20093 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20096 if ((countval & 0x02) && max_size > 2)
20098 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20099 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20102 if ((countval & 0x01) && max_size > 1)
20104 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20105 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20112 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20117 rtx label = ix86_expand_aligntest (count, 16, true);
20120 dest = change_address (destmem, DImode, destptr);
20121 emit_insn (gen_strset (destptr, dest, value));
20122 emit_insn (gen_strset (destptr, dest, value));
20126 dest = change_address (destmem, SImode, destptr);
20127 emit_insn (gen_strset (destptr, dest, value));
20128 emit_insn (gen_strset (destptr, dest, value));
20129 emit_insn (gen_strset (destptr, dest, value));
20130 emit_insn (gen_strset (destptr, dest, value));
20132 emit_label (label);
20133 LABEL_NUSES (label) = 1;
20137 rtx label = ix86_expand_aligntest (count, 8, true);
20140 dest = change_address (destmem, DImode, destptr);
20141 emit_insn (gen_strset (destptr, dest, value));
20145 dest = change_address (destmem, SImode, destptr);
20146 emit_insn (gen_strset (destptr, dest, value));
20147 emit_insn (gen_strset (destptr, dest, value));
20149 emit_label (label);
20150 LABEL_NUSES (label) = 1;
20154 rtx label = ix86_expand_aligntest (count, 4, true);
20155 dest = change_address (destmem, SImode, destptr);
20156 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20157 emit_label (label);
20158 LABEL_NUSES (label) = 1;
20162 rtx label = ix86_expand_aligntest (count, 2, true);
20163 dest = change_address (destmem, HImode, destptr);
20164 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20165 emit_label (label);
20166 LABEL_NUSES (label) = 1;
20170 rtx label = ix86_expand_aligntest (count, 1, true);
20171 dest = change_address (destmem, QImode, destptr);
20172 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20173 emit_label (label);
20174 LABEL_NUSES (label) = 1;
20178 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20179 DESIRED_ALIGNMENT. */
20181 expand_movmem_prologue (rtx destmem, rtx srcmem,
20182 rtx destptr, rtx srcptr, rtx count,
20183 int align, int desired_alignment)
20185 if (align <= 1 && desired_alignment > 1)
20187 rtx label = ix86_expand_aligntest (destptr, 1, false);
20188 srcmem = change_address (srcmem, QImode, srcptr);
20189 destmem = change_address (destmem, QImode, destptr);
20190 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20191 ix86_adjust_counter (count, 1);
20192 emit_label (label);
20193 LABEL_NUSES (label) = 1;
20195 if (align <= 2 && desired_alignment > 2)
20197 rtx label = ix86_expand_aligntest (destptr, 2, false);
20198 srcmem = change_address (srcmem, HImode, srcptr);
20199 destmem = change_address (destmem, HImode, destptr);
20200 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20201 ix86_adjust_counter (count, 2);
20202 emit_label (label);
20203 LABEL_NUSES (label) = 1;
20205 if (align <= 4 && desired_alignment > 4)
20207 rtx label = ix86_expand_aligntest (destptr, 4, false);
20208 srcmem = change_address (srcmem, SImode, srcptr);
20209 destmem = change_address (destmem, SImode, destptr);
20210 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20211 ix86_adjust_counter (count, 4);
20212 emit_label (label);
20213 LABEL_NUSES (label) = 1;
20215 gcc_assert (desired_alignment <= 8);
20218 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20219 ALIGN_BYTES is how many bytes need to be copied. */
20221 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20222 int desired_align, int align_bytes)
20225 rtx orig_dst = dst;
20226 rtx orig_src = src;
20228 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20229 if (src_align_bytes >= 0)
20230 src_align_bytes = desired_align - src_align_bytes;
20231 if (align_bytes & 1)
20233 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20234 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20236 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20238 if (align_bytes & 2)
20240 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20241 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20242 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20243 set_mem_align (dst, 2 * BITS_PER_UNIT);
20244 if (src_align_bytes >= 0
20245 && (src_align_bytes & 1) == (align_bytes & 1)
20246 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20247 set_mem_align (src, 2 * BITS_PER_UNIT);
20249 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20251 if (align_bytes & 4)
20253 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20254 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20255 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20256 set_mem_align (dst, 4 * BITS_PER_UNIT);
20257 if (src_align_bytes >= 0)
20259 unsigned int src_align = 0;
20260 if ((src_align_bytes & 3) == (align_bytes & 3))
20262 else if ((src_align_bytes & 1) == (align_bytes & 1))
20264 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20265 set_mem_align (src, src_align * BITS_PER_UNIT);
20268 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20270 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20271 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20272 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20273 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20274 if (src_align_bytes >= 0)
20276 unsigned int src_align = 0;
20277 if ((src_align_bytes & 7) == (align_bytes & 7))
20279 else if ((src_align_bytes & 3) == (align_bytes & 3))
20281 else if ((src_align_bytes & 1) == (align_bytes & 1))
20283 if (src_align > (unsigned int) desired_align)
20284 src_align = desired_align;
20285 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20286 set_mem_align (src, src_align * BITS_PER_UNIT);
20288 if (MEM_SIZE_KNOWN_P (orig_dst))
20289 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
20290 if (MEM_SIZE_KNOWN_P (orig_src))
20291 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
20296 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20297 DESIRED_ALIGNMENT. */
20299 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20300 int align, int desired_alignment)
20302 if (align <= 1 && desired_alignment > 1)
20304 rtx label = ix86_expand_aligntest (destptr, 1, false);
20305 destmem = change_address (destmem, QImode, destptr);
20306 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20307 ix86_adjust_counter (count, 1);
20308 emit_label (label);
20309 LABEL_NUSES (label) = 1;
20311 if (align <= 2 && desired_alignment > 2)
20313 rtx label = ix86_expand_aligntest (destptr, 2, false);
20314 destmem = change_address (destmem, HImode, destptr);
20315 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20316 ix86_adjust_counter (count, 2);
20317 emit_label (label);
20318 LABEL_NUSES (label) = 1;
20320 if (align <= 4 && desired_alignment > 4)
20322 rtx label = ix86_expand_aligntest (destptr, 4, false);
20323 destmem = change_address (destmem, SImode, destptr);
20324 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20325 ix86_adjust_counter (count, 4);
20326 emit_label (label);
20327 LABEL_NUSES (label) = 1;
20329 gcc_assert (desired_alignment <= 8);
20332 /* Set enough from DST to align DST known to by aligned by ALIGN to
20333 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20335 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20336 int desired_align, int align_bytes)
20339 rtx orig_dst = dst;
20340 if (align_bytes & 1)
20342 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20344 emit_insn (gen_strset (destreg, dst,
20345 gen_lowpart (QImode, value)));
20347 if (align_bytes & 2)
20349 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20350 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20351 set_mem_align (dst, 2 * BITS_PER_UNIT);
20353 emit_insn (gen_strset (destreg, dst,
20354 gen_lowpart (HImode, value)));
20356 if (align_bytes & 4)
20358 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20359 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20360 set_mem_align (dst, 4 * BITS_PER_UNIT);
20362 emit_insn (gen_strset (destreg, dst,
20363 gen_lowpart (SImode, value)));
20365 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20366 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20367 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20368 if (MEM_SIZE_KNOWN_P (orig_dst))
20369 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
20373 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20374 static enum stringop_alg
20375 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20376 int *dynamic_check)
20378 const struct stringop_algs * algs;
20379 bool optimize_for_speed;
20380 /* Algorithms using the rep prefix want at least edi and ecx;
20381 additionally, memset wants eax and memcpy wants esi. Don't
20382 consider such algorithms if the user has appropriated those
20383 registers for their own purposes. */
20384 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20386 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20388 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20389 || (alg != rep_prefix_1_byte \
20390 && alg != rep_prefix_4_byte \
20391 && alg != rep_prefix_8_byte))
20392 const struct processor_costs *cost;
20394 /* Even if the string operation call is cold, we still might spend a lot
20395 of time processing large blocks. */
20396 if (optimize_function_for_size_p (cfun)
20397 || (optimize_insn_for_size_p ()
20398 && expected_size != -1 && expected_size < 256))
20399 optimize_for_speed = false;
20401 optimize_for_speed = true;
20403 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20405 *dynamic_check = -1;
20407 algs = &cost->memset[TARGET_64BIT != 0];
20409 algs = &cost->memcpy[TARGET_64BIT != 0];
20410 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
20411 return ix86_stringop_alg;
20412 /* rep; movq or rep; movl is the smallest variant. */
20413 else if (!optimize_for_speed)
20415 if (!count || (count & 3))
20416 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20418 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20420 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20422 else if (expected_size != -1 && expected_size < 4)
20423 return loop_1_byte;
20424 else if (expected_size != -1)
20427 enum stringop_alg alg = libcall;
20428 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20430 /* We get here if the algorithms that were not libcall-based
20431 were rep-prefix based and we are unable to use rep prefixes
20432 based on global register usage. Break out of the loop and
20433 use the heuristic below. */
20434 if (algs->size[i].max == 0)
20436 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20438 enum stringop_alg candidate = algs->size[i].alg;
20440 if (candidate != libcall && ALG_USABLE_P (candidate))
20442 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20443 last non-libcall inline algorithm. */
20444 if (TARGET_INLINE_ALL_STRINGOPS)
20446 /* When the current size is best to be copied by a libcall,
20447 but we are still forced to inline, run the heuristic below
20448 that will pick code for medium sized blocks. */
20449 if (alg != libcall)
20453 else if (ALG_USABLE_P (candidate))
20457 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20459 /* When asked to inline the call anyway, try to pick meaningful choice.
20460 We look for maximal size of block that is faster to copy by hand and
20461 take blocks of at most of that size guessing that average size will
20462 be roughly half of the block.
20464 If this turns out to be bad, we might simply specify the preferred
20465 choice in ix86_costs. */
20466 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20467 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20470 enum stringop_alg alg;
20472 bool any_alg_usable_p = true;
20474 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20476 enum stringop_alg candidate = algs->size[i].alg;
20477 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20479 if (candidate != libcall && candidate
20480 && ALG_USABLE_P (candidate))
20481 max = algs->size[i].max;
20483 /* If there aren't any usable algorithms, then recursing on
20484 smaller sizes isn't going to find anything. Just return the
20485 simple byte-at-a-time copy loop. */
20486 if (!any_alg_usable_p)
20488 /* Pick something reasonable. */
20489 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20490 *dynamic_check = 128;
20491 return loop_1_byte;
20495 alg = decide_alg (count, max / 2, memset, dynamic_check);
20496 gcc_assert (*dynamic_check == -1);
20497 gcc_assert (alg != libcall);
20498 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20499 *dynamic_check = max;
20502 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20503 #undef ALG_USABLE_P
20506 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20507 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20509 decide_alignment (int align,
20510 enum stringop_alg alg,
20513 int desired_align = 0;
20517 gcc_unreachable ();
20519 case unrolled_loop:
20520 desired_align = GET_MODE_SIZE (Pmode);
20522 case rep_prefix_8_byte:
20525 case rep_prefix_4_byte:
20526 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20527 copying whole cacheline at once. */
20528 if (TARGET_PENTIUMPRO)
20533 case rep_prefix_1_byte:
20534 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20535 copying whole cacheline at once. */
20536 if (TARGET_PENTIUMPRO)
20550 if (desired_align < align)
20551 desired_align = align;
20552 if (expected_size != -1 && expected_size < 4)
20553 desired_align = align;
20554 return desired_align;
20557 /* Return the smallest power of 2 greater than VAL. */
20559 smallest_pow2_greater_than (int val)
20567 /* Expand string move (memcpy) operation. Use i386 string operations
20568 when profitable. expand_setmem contains similar code. The code
20569 depends upon architecture, block size and alignment, but always has
20570 the same overall structure:
20572 1) Prologue guard: Conditional that jumps up to epilogues for small
20573 blocks that can be handled by epilogue alone. This is faster
20574 but also needed for correctness, since prologue assume the block
20575 is larger than the desired alignment.
20577 Optional dynamic check for size and libcall for large
20578 blocks is emitted here too, with -minline-stringops-dynamically.
20580 2) Prologue: copy first few bytes in order to get destination
20581 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
20582 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
20583 copied. We emit either a jump tree on power of two sized
20584 blocks, or a byte loop.
20586 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20587 with specified algorithm.
20589 4) Epilogue: code copying tail of the block that is too small to be
20590 handled by main body (or up to size guarded by prologue guard). */
20593 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20594 rtx expected_align_exp, rtx expected_size_exp)
20600 rtx jump_around_label = NULL;
20601 HOST_WIDE_INT align = 1;
20602 unsigned HOST_WIDE_INT count = 0;
20603 HOST_WIDE_INT expected_size = -1;
20604 int size_needed = 0, epilogue_size_needed;
20605 int desired_align = 0, align_bytes = 0;
20606 enum stringop_alg alg;
20608 bool need_zero_guard = false;
20610 if (CONST_INT_P (align_exp))
20611 align = INTVAL (align_exp);
20612 /* i386 can do misaligned access on reasonably increased cost. */
20613 if (CONST_INT_P (expected_align_exp)
20614 && INTVAL (expected_align_exp) > align)
20615 align = INTVAL (expected_align_exp);
20616 /* ALIGN is the minimum of destination and source alignment, but we care here
20617 just about destination alignment. */
20618 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
20619 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
20621 if (CONST_INT_P (count_exp))
20622 count = expected_size = INTVAL (count_exp);
20623 if (CONST_INT_P (expected_size_exp) && count == 0)
20624 expected_size = INTVAL (expected_size_exp);
20626 /* Make sure we don't need to care about overflow later on. */
20627 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20630 /* Step 0: Decide on preferred algorithm, desired alignment and
20631 size of chunks to be copied by main loop. */
20633 alg = decide_alg (count, expected_size, false, &dynamic_check);
20634 desired_align = decide_alignment (align, alg, expected_size);
20636 if (!TARGET_ALIGN_STRINGOPS)
20637 align = desired_align;
20639 if (alg == libcall)
20641 gcc_assert (alg != no_stringop);
20643 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
20644 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20645 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
20650 gcc_unreachable ();
20652 need_zero_guard = true;
20653 size_needed = GET_MODE_SIZE (Pmode);
20655 case unrolled_loop:
20656 need_zero_guard = true;
20657 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
20659 case rep_prefix_8_byte:
20662 case rep_prefix_4_byte:
20665 case rep_prefix_1_byte:
20669 need_zero_guard = true;
20674 epilogue_size_needed = size_needed;
20676 /* Step 1: Prologue guard. */
20678 /* Alignment code needs count to be in register. */
20679 if (CONST_INT_P (count_exp) && desired_align > align)
20681 if (INTVAL (count_exp) > desired_align
20682 && INTVAL (count_exp) > size_needed)
20685 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20686 if (align_bytes <= 0)
20689 align_bytes = desired_align - align_bytes;
20691 if (align_bytes == 0)
20692 count_exp = force_reg (counter_mode (count_exp), count_exp);
20694 gcc_assert (desired_align >= 1 && align >= 1);
20696 /* Ensure that alignment prologue won't copy past end of block. */
20697 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20699 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20700 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20701 Make sure it is power of 2. */
20702 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20706 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20708 /* If main algorithm works on QImode, no epilogue is needed.
20709 For small sizes just don't align anything. */
20710 if (size_needed == 1)
20711 desired_align = align;
20718 label = gen_label_rtx ();
20719 emit_cmp_and_jump_insns (count_exp,
20720 GEN_INT (epilogue_size_needed),
20721 LTU, 0, counter_mode (count_exp), 1, label);
20722 if (expected_size == -1 || expected_size < epilogue_size_needed)
20723 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20725 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20729 /* Emit code to decide on runtime whether library call or inline should be
20731 if (dynamic_check != -1)
20733 if (CONST_INT_P (count_exp))
20735 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
20737 emit_block_move_via_libcall (dst, src, count_exp, false);
20738 count_exp = const0_rtx;
20744 rtx hot_label = gen_label_rtx ();
20745 jump_around_label = gen_label_rtx ();
20746 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20747 LEU, 0, GET_MODE (count_exp), 1, hot_label);
20748 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20749 emit_block_move_via_libcall (dst, src, count_exp, false);
20750 emit_jump (jump_around_label);
20751 emit_label (hot_label);
20755 /* Step 2: Alignment prologue. */
20757 if (desired_align > align)
20759 if (align_bytes == 0)
20761 /* Except for the first move in epilogue, we no longer know
20762 constant offset in aliasing info. It don't seems to worth
20763 the pain to maintain it for the first move, so throw away
20765 src = change_address (src, BLKmode, srcreg);
20766 dst = change_address (dst, BLKmode, destreg);
20767 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
20772 /* If we know how many bytes need to be stored before dst is
20773 sufficiently aligned, maintain aliasing info accurately. */
20774 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
20775 desired_align, align_bytes);
20776 count_exp = plus_constant (count_exp, -align_bytes);
20777 count -= align_bytes;
20779 if (need_zero_guard
20780 && (count < (unsigned HOST_WIDE_INT) size_needed
20781 || (align_bytes == 0
20782 && count < ((unsigned HOST_WIDE_INT) size_needed
20783 + desired_align - align))))
20785 /* It is possible that we copied enough so the main loop will not
20787 gcc_assert (size_needed > 1);
20788 if (label == NULL_RTX)
20789 label = gen_label_rtx ();
20790 emit_cmp_and_jump_insns (count_exp,
20791 GEN_INT (size_needed),
20792 LTU, 0, counter_mode (count_exp), 1, label);
20793 if (expected_size == -1
20794 || expected_size < (desired_align - align) / 2 + size_needed)
20795 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20797 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20800 if (label && size_needed == 1)
20802 emit_label (label);
20803 LABEL_NUSES (label) = 1;
20805 epilogue_size_needed = 1;
20807 else if (label == NULL_RTX)
20808 epilogue_size_needed = size_needed;
20810 /* Step 3: Main loop. */
20816 gcc_unreachable ();
20818 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20819 count_exp, QImode, 1, expected_size);
20822 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20823 count_exp, Pmode, 1, expected_size);
20825 case unrolled_loop:
20826 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20827 registers for 4 temporaries anyway. */
20828 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20829 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20832 case rep_prefix_8_byte:
20833 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20836 case rep_prefix_4_byte:
20837 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20840 case rep_prefix_1_byte:
20841 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20845 /* Adjust properly the offset of src and dest memory for aliasing. */
20846 if (CONST_INT_P (count_exp))
20848 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20849 (count / size_needed) * size_needed);
20850 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20851 (count / size_needed) * size_needed);
20855 src = change_address (src, BLKmode, srcreg);
20856 dst = change_address (dst, BLKmode, destreg);
20859 /* Step 4: Epilogue to copy the remaining bytes. */
20863 /* When the main loop is done, COUNT_EXP might hold original count,
20864 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20865 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20866 bytes. Compensate if needed. */
20868 if (size_needed < epilogue_size_needed)
20871 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20872 GEN_INT (size_needed - 1), count_exp, 1,
20874 if (tmp != count_exp)
20875 emit_move_insn (count_exp, tmp);
20877 emit_label (label);
20878 LABEL_NUSES (label) = 1;
20881 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20882 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20883 epilogue_size_needed);
20884 if (jump_around_label)
20885 emit_label (jump_around_label);
20889 /* Helper function for memcpy. For QImode value 0xXY produce
20890 0xXYXYXYXY of wide specified by MODE. This is essentially
20891 a * 0x10101010, but we can do slightly better than
20892 synth_mult by unwinding the sequence by hand on CPUs with
20895 promote_duplicated_reg (enum machine_mode mode, rtx val)
20897 enum machine_mode valmode = GET_MODE (val);
20899 int nops = mode == DImode ? 3 : 2;
20901 gcc_assert (mode == SImode || mode == DImode);
20902 if (val == const0_rtx)
20903 return copy_to_mode_reg (mode, const0_rtx);
20904 if (CONST_INT_P (val))
20906 HOST_WIDE_INT v = INTVAL (val) & 255;
20910 if (mode == DImode)
20911 v |= (v << 16) << 16;
20912 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20915 if (valmode == VOIDmode)
20917 if (valmode != QImode)
20918 val = gen_lowpart (QImode, val);
20919 if (mode == QImode)
20921 if (!TARGET_PARTIAL_REG_STALL)
20923 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20924 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20925 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20926 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20928 rtx reg = convert_modes (mode, QImode, val, true);
20929 tmp = promote_duplicated_reg (mode, const1_rtx);
20930 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20935 rtx reg = convert_modes (mode, QImode, val, true);
20937 if (!TARGET_PARTIAL_REG_STALL)
20938 if (mode == SImode)
20939 emit_insn (gen_movsi_insv_1 (reg, reg));
20941 emit_insn (gen_movdi_insv_1 (reg, reg));
20944 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20945 NULL, 1, OPTAB_DIRECT);
20947 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20949 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20950 NULL, 1, OPTAB_DIRECT);
20951 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20952 if (mode == SImode)
20954 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20955 NULL, 1, OPTAB_DIRECT);
20956 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20961 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20962 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20963 alignment from ALIGN to DESIRED_ALIGN. */
20965 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20970 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20971 promoted_val = promote_duplicated_reg (DImode, val);
20972 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20973 promoted_val = promote_duplicated_reg (SImode, val);
20974 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
20975 promoted_val = promote_duplicated_reg (HImode, val);
20977 promoted_val = val;
20979 return promoted_val;
20982 /* Expand string clear operation (bzero). Use i386 string operations when
20983 profitable. See expand_movmem comment for explanation of individual
20984 steps performed. */
20986 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
20987 rtx expected_align_exp, rtx expected_size_exp)
20992 rtx jump_around_label = NULL;
20993 HOST_WIDE_INT align = 1;
20994 unsigned HOST_WIDE_INT count = 0;
20995 HOST_WIDE_INT expected_size = -1;
20996 int size_needed = 0, epilogue_size_needed;
20997 int desired_align = 0, align_bytes = 0;
20998 enum stringop_alg alg;
20999 rtx promoted_val = NULL;
21000 bool force_loopy_epilogue = false;
21002 bool need_zero_guard = false;
21004 if (CONST_INT_P (align_exp))
21005 align = INTVAL (align_exp);
21006 /* i386 can do misaligned access on reasonably increased cost. */
21007 if (CONST_INT_P (expected_align_exp)
21008 && INTVAL (expected_align_exp) > align)
21009 align = INTVAL (expected_align_exp);
21010 if (CONST_INT_P (count_exp))
21011 count = expected_size = INTVAL (count_exp);
21012 if (CONST_INT_P (expected_size_exp) && count == 0)
21013 expected_size = INTVAL (expected_size_exp);
21015 /* Make sure we don't need to care about overflow later on. */
21016 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21019 /* Step 0: Decide on preferred algorithm, desired alignment and
21020 size of chunks to be copied by main loop. */
21022 alg = decide_alg (count, expected_size, true, &dynamic_check);
21023 desired_align = decide_alignment (align, alg, expected_size);
21025 if (!TARGET_ALIGN_STRINGOPS)
21026 align = desired_align;
21028 if (alg == libcall)
21030 gcc_assert (alg != no_stringop);
21032 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
21033 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21038 gcc_unreachable ();
21040 need_zero_guard = true;
21041 size_needed = GET_MODE_SIZE (Pmode);
21043 case unrolled_loop:
21044 need_zero_guard = true;
21045 size_needed = GET_MODE_SIZE (Pmode) * 4;
21047 case rep_prefix_8_byte:
21050 case rep_prefix_4_byte:
21053 case rep_prefix_1_byte:
21057 need_zero_guard = true;
21061 epilogue_size_needed = size_needed;
21063 /* Step 1: Prologue guard. */
21065 /* Alignment code needs count to be in register. */
21066 if (CONST_INT_P (count_exp) && desired_align > align)
21068 if (INTVAL (count_exp) > desired_align
21069 && INTVAL (count_exp) > size_needed)
21072 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21073 if (align_bytes <= 0)
21076 align_bytes = desired_align - align_bytes;
21078 if (align_bytes == 0)
21080 enum machine_mode mode = SImode;
21081 if (TARGET_64BIT && (count & ~0xffffffff))
21083 count_exp = force_reg (mode, count_exp);
21086 /* Do the cheap promotion to allow better CSE across the
21087 main loop and epilogue (ie one load of the big constant in the
21088 front of all code. */
21089 if (CONST_INT_P (val_exp))
21090 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21091 desired_align, align);
21092 /* Ensure that alignment prologue won't copy past end of block. */
21093 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21095 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21096 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21097 Make sure it is power of 2. */
21098 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21100 /* To improve performance of small blocks, we jump around the VAL
21101 promoting mode. This mean that if the promoted VAL is not constant,
21102 we might not use it in the epilogue and have to use byte
21104 if (epilogue_size_needed > 2 && !promoted_val)
21105 force_loopy_epilogue = true;
21108 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21110 /* If main algorithm works on QImode, no epilogue is needed.
21111 For small sizes just don't align anything. */
21112 if (size_needed == 1)
21113 desired_align = align;
21120 label = gen_label_rtx ();
21121 emit_cmp_and_jump_insns (count_exp,
21122 GEN_INT (epilogue_size_needed),
21123 LTU, 0, counter_mode (count_exp), 1, label);
21124 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21125 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21127 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21130 if (dynamic_check != -1)
21132 rtx hot_label = gen_label_rtx ();
21133 jump_around_label = gen_label_rtx ();
21134 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21135 LEU, 0, counter_mode (count_exp), 1, hot_label);
21136 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21137 set_storage_via_libcall (dst, count_exp, val_exp, false);
21138 emit_jump (jump_around_label);
21139 emit_label (hot_label);
21142 /* Step 2: Alignment prologue. */
21144 /* Do the expensive promotion once we branched off the small blocks. */
21146 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21147 desired_align, align);
21148 gcc_assert (desired_align >= 1 && align >= 1);
21150 if (desired_align > align)
21152 if (align_bytes == 0)
21154 /* Except for the first move in epilogue, we no longer know
21155 constant offset in aliasing info. It don't seems to worth
21156 the pain to maintain it for the first move, so throw away
21158 dst = change_address (dst, BLKmode, destreg);
21159 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21164 /* If we know how many bytes need to be stored before dst is
21165 sufficiently aligned, maintain aliasing info accurately. */
21166 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21167 desired_align, align_bytes);
21168 count_exp = plus_constant (count_exp, -align_bytes);
21169 count -= align_bytes;
21171 if (need_zero_guard
21172 && (count < (unsigned HOST_WIDE_INT) size_needed
21173 || (align_bytes == 0
21174 && count < ((unsigned HOST_WIDE_INT) size_needed
21175 + desired_align - align))))
21177 /* It is possible that we copied enough so the main loop will not
21179 gcc_assert (size_needed > 1);
21180 if (label == NULL_RTX)
21181 label = gen_label_rtx ();
21182 emit_cmp_and_jump_insns (count_exp,
21183 GEN_INT (size_needed),
21184 LTU, 0, counter_mode (count_exp), 1, label);
21185 if (expected_size == -1
21186 || expected_size < (desired_align - align) / 2 + size_needed)
21187 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21189 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21192 if (label && size_needed == 1)
21194 emit_label (label);
21195 LABEL_NUSES (label) = 1;
21197 promoted_val = val_exp;
21198 epilogue_size_needed = 1;
21200 else if (label == NULL_RTX)
21201 epilogue_size_needed = size_needed;
21203 /* Step 3: Main loop. */
21209 gcc_unreachable ();
21211 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21212 count_exp, QImode, 1, expected_size);
21215 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21216 count_exp, Pmode, 1, expected_size);
21218 case unrolled_loop:
21219 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21220 count_exp, Pmode, 4, expected_size);
21222 case rep_prefix_8_byte:
21223 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21226 case rep_prefix_4_byte:
21227 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21230 case rep_prefix_1_byte:
21231 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21235 /* Adjust properly the offset of src and dest memory for aliasing. */
21236 if (CONST_INT_P (count_exp))
21237 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21238 (count / size_needed) * size_needed);
21240 dst = change_address (dst, BLKmode, destreg);
21242 /* Step 4: Epilogue to copy the remaining bytes. */
21246 /* When the main loop is done, COUNT_EXP might hold original count,
21247 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21248 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21249 bytes. Compensate if needed. */
21251 if (size_needed < epilogue_size_needed)
21254 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21255 GEN_INT (size_needed - 1), count_exp, 1,
21257 if (tmp != count_exp)
21258 emit_move_insn (count_exp, tmp);
21260 emit_label (label);
21261 LABEL_NUSES (label) = 1;
21264 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21266 if (force_loopy_epilogue)
21267 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21268 epilogue_size_needed);
21270 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21271 epilogue_size_needed);
21273 if (jump_around_label)
21274 emit_label (jump_around_label);
21278 /* Expand the appropriate insns for doing strlen if not just doing
21281 out = result, initialized with the start address
21282 align_rtx = alignment of the address.
21283 scratch = scratch register, initialized with the startaddress when
21284 not aligned, otherwise undefined
21286 This is just the body. It needs the initializations mentioned above and
21287 some address computing at the end. These things are done in i386.md. */
21290 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21294 rtx align_2_label = NULL_RTX;
21295 rtx align_3_label = NULL_RTX;
21296 rtx align_4_label = gen_label_rtx ();
21297 rtx end_0_label = gen_label_rtx ();
21299 rtx tmpreg = gen_reg_rtx (SImode);
21300 rtx scratch = gen_reg_rtx (SImode);
21304 if (CONST_INT_P (align_rtx))
21305 align = INTVAL (align_rtx);
21307 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21309 /* Is there a known alignment and is it less than 4? */
21312 rtx scratch1 = gen_reg_rtx (Pmode);
21313 emit_move_insn (scratch1, out);
21314 /* Is there a known alignment and is it not 2? */
21317 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21318 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21320 /* Leave just the 3 lower bits. */
21321 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21322 NULL_RTX, 0, OPTAB_WIDEN);
21324 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21325 Pmode, 1, align_4_label);
21326 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21327 Pmode, 1, align_2_label);
21328 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21329 Pmode, 1, align_3_label);
21333 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21334 check if is aligned to 4 - byte. */
21336 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21337 NULL_RTX, 0, OPTAB_WIDEN);
21339 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21340 Pmode, 1, align_4_label);
21343 mem = change_address (src, QImode, out);
21345 /* Now compare the bytes. */
21347 /* Compare the first n unaligned byte on a byte per byte basis. */
21348 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21349 QImode, 1, end_0_label);
21351 /* Increment the address. */
21352 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21354 /* Not needed with an alignment of 2 */
21357 emit_label (align_2_label);
21359 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21362 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21364 emit_label (align_3_label);
21367 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21370 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21373 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21374 align this loop. It gives only huge programs, but does not help to
21376 emit_label (align_4_label);
21378 mem = change_address (src, SImode, out);
21379 emit_move_insn (scratch, mem);
21380 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21382 /* This formula yields a nonzero result iff one of the bytes is zero.
21383 This saves three branches inside loop and many cycles. */
21385 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21386 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21387 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21388 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21389 gen_int_mode (0x80808080, SImode)));
21390 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21395 rtx reg = gen_reg_rtx (SImode);
21396 rtx reg2 = gen_reg_rtx (Pmode);
21397 emit_move_insn (reg, tmpreg);
21398 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21400 /* If zero is not in the first two bytes, move two bytes forward. */
21401 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21402 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21403 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21404 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21405 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21408 /* Emit lea manually to avoid clobbering of flags. */
21409 emit_insn (gen_rtx_SET (SImode, reg2,
21410 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21412 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21413 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21414 emit_insn (gen_rtx_SET (VOIDmode, out,
21415 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21421 rtx end_2_label = gen_label_rtx ();
21422 /* Is zero in the first two bytes? */
21424 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21425 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21426 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21427 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21428 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21430 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21431 JUMP_LABEL (tmp) = end_2_label;
21433 /* Not in the first two. Move two bytes forward. */
21434 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21435 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21437 emit_label (end_2_label);
21441 /* Avoid branch in fixing the byte. */
21442 tmpreg = gen_lowpart (QImode, tmpreg);
21443 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21444 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21445 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21446 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21448 emit_label (end_0_label);
21451 /* Expand strlen. */
21454 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21456 rtx addr, scratch1, scratch2, scratch3, scratch4;
21458 /* The generic case of strlen expander is long. Avoid it's
21459 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21461 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21462 && !TARGET_INLINE_ALL_STRINGOPS
21463 && !optimize_insn_for_size_p ()
21464 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21467 addr = force_reg (Pmode, XEXP (src, 0));
21468 scratch1 = gen_reg_rtx (Pmode);
21470 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21471 && !optimize_insn_for_size_p ())
21473 /* Well it seems that some optimizer does not combine a call like
21474 foo(strlen(bar), strlen(bar));
21475 when the move and the subtraction is done here. It does calculate
21476 the length just once when these instructions are done inside of
21477 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21478 often used and I use one fewer register for the lifetime of
21479 output_strlen_unroll() this is better. */
21481 emit_move_insn (out, addr);
21483 ix86_expand_strlensi_unroll_1 (out, src, align);
21485 /* strlensi_unroll_1 returns the address of the zero at the end of
21486 the string, like memchr(), so compute the length by subtracting
21487 the start address. */
21488 emit_insn (ix86_gen_sub3 (out, out, addr));
21494 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21495 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21498 scratch2 = gen_reg_rtx (Pmode);
21499 scratch3 = gen_reg_rtx (Pmode);
21500 scratch4 = force_reg (Pmode, constm1_rtx);
21502 emit_move_insn (scratch3, addr);
21503 eoschar = force_reg (QImode, eoschar);
21505 src = replace_equiv_address_nv (src, scratch3);
21507 /* If .md starts supporting :P, this can be done in .md. */
21508 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21509 scratch4), UNSPEC_SCAS);
21510 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21511 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21512 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21517 /* For given symbol (function) construct code to compute address of it's PLT
21518 entry in large x86-64 PIC model. */
21520 construct_plt_address (rtx symbol)
21522 rtx tmp = gen_reg_rtx (Pmode);
21523 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21525 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21526 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21528 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21529 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21534 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21536 rtx pop, bool sibcall)
21538 /* We need to represent that SI and DI registers are clobbered
21540 static int clobbered_registers[] = {
21541 XMM6_REG, XMM7_REG, XMM8_REG,
21542 XMM9_REG, XMM10_REG, XMM11_REG,
21543 XMM12_REG, XMM13_REG, XMM14_REG,
21544 XMM15_REG, SI_REG, DI_REG
21546 rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
21547 rtx use = NULL, call;
21548 unsigned int vec_len;
21550 if (pop == const0_rtx)
21552 gcc_assert (!TARGET_64BIT || !pop);
21554 if (TARGET_MACHO && !TARGET_64BIT)
21557 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21558 fnaddr = machopic_indirect_call_target (fnaddr);
21563 /* Static functions and indirect calls don't need the pic register. */
21564 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21565 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21566 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21567 use_reg (&use, pic_offset_table_rtx);
21570 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21572 rtx al = gen_rtx_REG (QImode, AX_REG);
21573 emit_move_insn (al, callarg2);
21574 use_reg (&use, al);
21577 if (ix86_cmodel == CM_LARGE_PIC
21579 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21580 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21581 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21583 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21584 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21586 fnaddr = XEXP (fnaddr, 0);
21587 if (GET_MODE (fnaddr) != Pmode)
21588 fnaddr = convert_to_mode (Pmode, fnaddr, 1);
21589 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
21593 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21595 call = gen_rtx_SET (VOIDmode, retval, call);
21596 vec[vec_len++] = call;
21600 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21601 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21602 vec[vec_len++] = pop;
21605 if (TARGET_64BIT_MS_ABI
21606 && (!callarg2 || INTVAL (callarg2) != -2))
21610 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
21611 UNSPEC_MS_TO_SYSV_CALL);
21613 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
21615 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
21617 gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
21619 clobbered_registers[i]));
21622 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
21623 if (TARGET_VZEROUPPER)
21626 if (cfun->machine->callee_pass_avx256_p)
21628 if (cfun->machine->callee_return_avx256_p)
21629 avx256 = callee_return_pass_avx256;
21631 avx256 = callee_pass_avx256;
21633 else if (cfun->machine->callee_return_avx256_p)
21634 avx256 = callee_return_avx256;
21636 avx256 = call_no_avx256;
21638 if (reload_completed)
21639 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
21641 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
21642 gen_rtvec (1, GEN_INT (avx256)),
21643 UNSPEC_CALL_NEEDS_VZEROUPPER);
21647 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
21648 call = emit_call_insn (call);
21650 CALL_INSN_FUNCTION_USAGE (call) = use;
21656 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
21658 rtx pat = PATTERN (insn);
21659 rtvec vec = XVEC (pat, 0);
21660 int len = GET_NUM_ELEM (vec) - 1;
21662 /* Strip off the last entry of the parallel. */
21663 gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
21664 gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
21666 pat = RTVEC_ELT (vec, 0);
21668 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
21670 emit_insn (gen_avx_vzeroupper (vzeroupper));
21671 emit_call_insn (pat);
21674 /* Output the assembly for a call instruction. */
21677 ix86_output_call_insn (rtx insn, rtx call_op)
21679 bool direct_p = constant_call_address_operand (call_op, Pmode);
21680 bool seh_nop_p = false;
21683 if (SIBLING_CALL_P (insn))
21687 /* SEH epilogue detection requires the indirect branch case
21688 to include REX.W. */
21689 else if (TARGET_SEH)
21690 xasm = "rex.W jmp %A0";
21694 output_asm_insn (xasm, &call_op);
21698 /* SEH unwinding can require an extra nop to be emitted in several
21699 circumstances. Determine if we have one of those. */
21704 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
21706 /* If we get to another real insn, we don't need the nop. */
21710 /* If we get to the epilogue note, prevent a catch region from
21711 being adjacent to the standard epilogue sequence. If non-
21712 call-exceptions, we'll have done this during epilogue emission. */
21713 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
21714 && !flag_non_call_exceptions
21715 && !can_throw_internal (insn))
21722 /* If we didn't find a real insn following the call, prevent the
21723 unwinder from looking into the next function. */
21729 xasm = "call\t%P0";
21731 xasm = "call\t%A0";
21733 output_asm_insn (xasm, &call_op);
21741 /* Clear stack slot assignments remembered from previous functions.
21742 This is called from INIT_EXPANDERS once before RTL is emitted for each
21745 static struct machine_function *
21746 ix86_init_machine_status (void)
21748 struct machine_function *f;
21750 f = ggc_alloc_cleared_machine_function ();
21751 f->use_fast_prologue_epilogue_nregs = -1;
21752 f->tls_descriptor_call_expanded_p = 0;
21753 f->call_abi = ix86_abi;
21758 /* Return a MEM corresponding to a stack slot with mode MODE.
21759 Allocate a new slot if necessary.
21761 The RTL for a function can have several slots available: N is
21762 which slot to use. */
21765 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
21767 struct stack_local_entry *s;
21769 gcc_assert (n < MAX_386_STACK_LOCALS);
21771 /* Virtual slot is valid only before vregs are instantiated. */
21772 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
21774 for (s = ix86_stack_locals; s; s = s->next)
21775 if (s->mode == mode && s->n == n)
21776 return copy_rtx (s->rtl);
21778 s = ggc_alloc_stack_local_entry ();
21781 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21783 s->next = ix86_stack_locals;
21784 ix86_stack_locals = s;
21788 /* Calculate the length of the memory address in the instruction
21789 encoding. Includes addr32 prefix, does not include the one-byte modrm,
21790 opcode, or other prefixes. */
21793 memory_address_length (rtx addr)
21795 struct ix86_address parts;
21796 rtx base, index, disp;
21800 if (GET_CODE (addr) == PRE_DEC
21801 || GET_CODE (addr) == POST_INC
21802 || GET_CODE (addr) == PRE_MODIFY
21803 || GET_CODE (addr) == POST_MODIFY)
21806 ok = ix86_decompose_address (addr, &parts);
21809 if (parts.base && GET_CODE (parts.base) == SUBREG)
21810 parts.base = SUBREG_REG (parts.base);
21811 if (parts.index && GET_CODE (parts.index) == SUBREG)
21812 parts.index = SUBREG_REG (parts.index);
21815 index = parts.index;
21818 /* Add length of addr32 prefix. */
21819 len = (GET_CODE (addr) == ZERO_EXTEND);
21822 - esp as the base always wants an index,
21823 - ebp as the base always wants a displacement,
21824 - r12 as the base always wants an index,
21825 - r13 as the base always wants a displacement. */
21827 /* Register Indirect. */
21828 if (base && !index && !disp)
21830 /* esp (for its index) and ebp (for its displacement) need
21831 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
21834 && (addr == arg_pointer_rtx
21835 || addr == frame_pointer_rtx
21836 || REGNO (addr) == SP_REG
21837 || REGNO (addr) == BP_REG
21838 || REGNO (addr) == R12_REG
21839 || REGNO (addr) == R13_REG))
21843 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21844 is not disp32, but disp32(%rip), so for disp32
21845 SIB byte is needed, unless print_operand_address
21846 optimizes it into disp32(%rip) or (%rip) is implied
21848 else if (disp && !base && !index)
21855 if (GET_CODE (disp) == CONST)
21856 symbol = XEXP (disp, 0);
21857 if (GET_CODE (symbol) == PLUS
21858 && CONST_INT_P (XEXP (symbol, 1)))
21859 symbol = XEXP (symbol, 0);
21861 if (GET_CODE (symbol) != LABEL_REF
21862 && (GET_CODE (symbol) != SYMBOL_REF
21863 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
21864 && (GET_CODE (symbol) != UNSPEC
21865 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
21866 && XINT (symbol, 1) != UNSPEC_PCREL
21867 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
21874 /* Find the length of the displacement constant. */
21877 if (base && satisfies_constraint_K (disp))
21882 /* ebp always wants a displacement. Similarly r13. */
21883 else if (base && REG_P (base)
21884 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
21887 /* An index requires the two-byte modrm form.... */
21889 /* ...like esp (or r12), which always wants an index. */
21890 || base == arg_pointer_rtx
21891 || base == frame_pointer_rtx
21892 || (base && REG_P (base)
21893 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
21910 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21911 is set, expect that insn have 8bit immediate alternative. */
21913 ix86_attr_length_immediate_default (rtx insn, bool shortform)
21917 extract_insn_cached (insn);
21918 for (i = recog_data.n_operands - 1; i >= 0; --i)
21919 if (CONSTANT_P (recog_data.operand[i]))
21921 enum attr_mode mode = get_attr_mode (insn);
21924 if (shortform && CONST_INT_P (recog_data.operand[i]))
21926 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21933 ival = trunc_int_for_mode (ival, HImode);
21936 ival = trunc_int_for_mode (ival, SImode);
21941 if (IN_RANGE (ival, -128, 127))
21958 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21963 fatal_insn ("unknown insn mode", insn);
21968 /* Compute default value for "length_address" attribute. */
21970 ix86_attr_length_address_default (rtx insn)
21974 if (get_attr_type (insn) == TYPE_LEA)
21976 rtx set = PATTERN (insn), addr;
21978 if (GET_CODE (set) == PARALLEL)
21979 set = XVECEXP (set, 0, 0);
21981 gcc_assert (GET_CODE (set) == SET);
21983 addr = SET_SRC (set);
21984 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
21986 if (GET_CODE (addr) == ZERO_EXTEND)
21987 addr = XEXP (addr, 0);
21988 if (GET_CODE (addr) == SUBREG)
21989 addr = SUBREG_REG (addr);
21992 return memory_address_length (addr);
21995 extract_insn_cached (insn);
21996 for (i = recog_data.n_operands - 1; i >= 0; --i)
21997 if (MEM_P (recog_data.operand[i]))
21999 constrain_operands_cached (reload_completed);
22000 if (which_alternative != -1)
22002 const char *constraints = recog_data.constraints[i];
22003 int alt = which_alternative;
22005 while (*constraints == '=' || *constraints == '+')
22008 while (*constraints++ != ',')
22010 /* Skip ignored operands. */
22011 if (*constraints == 'X')
22014 return memory_address_length (XEXP (recog_data.operand[i], 0));
22019 /* Compute default value for "length_vex" attribute. It includes
22020 2 or 3 byte VEX prefix and 1 opcode byte. */
22023 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
22027 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22028 byte VEX prefix. */
22029 if (!has_0f_opcode || has_vex_w)
22032 /* We can always use 2 byte VEX prefix in 32bit. */
22036 extract_insn_cached (insn);
22038 for (i = recog_data.n_operands - 1; i >= 0; --i)
22039 if (REG_P (recog_data.operand[i]))
22041 /* REX.W bit uses 3 byte VEX prefix. */
22042 if (GET_MODE (recog_data.operand[i]) == DImode
22043 && GENERAL_REG_P (recog_data.operand[i]))
22048 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22049 if (MEM_P (recog_data.operand[i])
22050 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22057 /* Return the maximum number of instructions a cpu can issue. */
22060 ix86_issue_rate (void)
22064 case PROCESSOR_PENTIUM:
22065 case PROCESSOR_ATOM:
22069 case PROCESSOR_PENTIUMPRO:
22070 case PROCESSOR_PENTIUM4:
22071 case PROCESSOR_CORE2_32:
22072 case PROCESSOR_CORE2_64:
22073 case PROCESSOR_COREI7_32:
22074 case PROCESSOR_COREI7_64:
22075 case PROCESSOR_ATHLON:
22077 case PROCESSOR_AMDFAM10:
22078 case PROCESSOR_NOCONA:
22079 case PROCESSOR_GENERIC32:
22080 case PROCESSOR_GENERIC64:
22081 case PROCESSOR_BDVER1:
22082 case PROCESSOR_BDVER2:
22083 case PROCESSOR_BTVER1:
22091 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
22092 by DEP_INSN and nothing set by DEP_INSN. */
22095 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22099 /* Simplify the test for uninteresting insns. */
22100 if (insn_type != TYPE_SETCC
22101 && insn_type != TYPE_ICMOV
22102 && insn_type != TYPE_FCMOV
22103 && insn_type != TYPE_IBR)
22106 if ((set = single_set (dep_insn)) != 0)
22108 set = SET_DEST (set);
22111 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22112 && XVECLEN (PATTERN (dep_insn), 0) == 2
22113 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22114 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22116 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22117 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22122 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22125 /* This test is true if the dependent insn reads the flags but
22126 not any other potentially set register. */
22127 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22130 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22136 /* Return true iff USE_INSN has a memory address with operands set by
22140 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22143 extract_insn_cached (use_insn);
22144 for (i = recog_data.n_operands - 1; i >= 0; --i)
22145 if (MEM_P (recog_data.operand[i]))
22147 rtx addr = XEXP (recog_data.operand[i], 0);
22148 return modified_in_p (addr, set_insn) != 0;
22154 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22156 enum attr_type insn_type, dep_insn_type;
22157 enum attr_memory memory;
22159 int dep_insn_code_number;
22161 /* Anti and output dependencies have zero cost on all CPUs. */
22162 if (REG_NOTE_KIND (link) != 0)
22165 dep_insn_code_number = recog_memoized (dep_insn);
22167 /* If we can't recognize the insns, we can't really do anything. */
22168 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22171 insn_type = get_attr_type (insn);
22172 dep_insn_type = get_attr_type (dep_insn);
22176 case PROCESSOR_PENTIUM:
22177 /* Address Generation Interlock adds a cycle of latency. */
22178 if (insn_type == TYPE_LEA)
22180 rtx addr = PATTERN (insn);
22182 if (GET_CODE (addr) == PARALLEL)
22183 addr = XVECEXP (addr, 0, 0);
22185 gcc_assert (GET_CODE (addr) == SET);
22187 addr = SET_SRC (addr);
22188 if (modified_in_p (addr, dep_insn))
22191 else if (ix86_agi_dependent (dep_insn, insn))
22194 /* ??? Compares pair with jump/setcc. */
22195 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22198 /* Floating point stores require value to be ready one cycle earlier. */
22199 if (insn_type == TYPE_FMOV
22200 && get_attr_memory (insn) == MEMORY_STORE
22201 && !ix86_agi_dependent (dep_insn, insn))
22205 case PROCESSOR_PENTIUMPRO:
22206 memory = get_attr_memory (insn);
22208 /* INT->FP conversion is expensive. */
22209 if (get_attr_fp_int_src (dep_insn))
22212 /* There is one cycle extra latency between an FP op and a store. */
22213 if (insn_type == TYPE_FMOV
22214 && (set = single_set (dep_insn)) != NULL_RTX
22215 && (set2 = single_set (insn)) != NULL_RTX
22216 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22217 && MEM_P (SET_DEST (set2)))
22220 /* Show ability of reorder buffer to hide latency of load by executing
22221 in parallel with previous instruction in case
22222 previous instruction is not needed to compute the address. */
22223 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22224 && !ix86_agi_dependent (dep_insn, insn))
22226 /* Claim moves to take one cycle, as core can issue one load
22227 at time and the next load can start cycle later. */
22228 if (dep_insn_type == TYPE_IMOV
22229 || dep_insn_type == TYPE_FMOV)
22237 memory = get_attr_memory (insn);
22239 /* The esp dependency is resolved before the instruction is really
22241 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22242 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22245 /* INT->FP conversion is expensive. */
22246 if (get_attr_fp_int_src (dep_insn))
22249 /* Show ability of reorder buffer to hide latency of load by executing
22250 in parallel with previous instruction in case
22251 previous instruction is not needed to compute the address. */
22252 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22253 && !ix86_agi_dependent (dep_insn, insn))
22255 /* Claim moves to take one cycle, as core can issue one load
22256 at time and the next load can start cycle later. */
22257 if (dep_insn_type == TYPE_IMOV
22258 || dep_insn_type == TYPE_FMOV)
22267 case PROCESSOR_ATHLON:
22269 case PROCESSOR_AMDFAM10:
22270 case PROCESSOR_BDVER1:
22271 case PROCESSOR_BDVER2:
22272 case PROCESSOR_BTVER1:
22273 case PROCESSOR_ATOM:
22274 case PROCESSOR_GENERIC32:
22275 case PROCESSOR_GENERIC64:
22276 memory = get_attr_memory (insn);
22278 /* Show ability of reorder buffer to hide latency of load by executing
22279 in parallel with previous instruction in case
22280 previous instruction is not needed to compute the address. */
22281 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22282 && !ix86_agi_dependent (dep_insn, insn))
22284 enum attr_unit unit = get_attr_unit (insn);
22287 /* Because of the difference between the length of integer and
22288 floating unit pipeline preparation stages, the memory operands
22289 for floating point are cheaper.
22291 ??? For Athlon it the difference is most probably 2. */
22292 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22295 loadcost = TARGET_ATHLON ? 2 : 0;
22297 if (cost >= loadcost)
22310 /* How many alternative schedules to try. This should be as wide as the
22311 scheduling freedom in the DFA, but no wider. Making this value too
22312 large results extra work for the scheduler. */
22315 ia32_multipass_dfa_lookahead (void)
22319 case PROCESSOR_PENTIUM:
22322 case PROCESSOR_PENTIUMPRO:
22326 case PROCESSOR_CORE2_32:
22327 case PROCESSOR_CORE2_64:
22328 case PROCESSOR_COREI7_32:
22329 case PROCESSOR_COREI7_64:
22330 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22331 as many instructions can be executed on a cycle, i.e.,
22332 issue_rate. I wonder why tuning for many CPUs does not do this. */
22333 return ix86_issue_rate ();
22342 /* Model decoder of Core 2/i7.
22343 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22344 track the instruction fetch block boundaries and make sure that long
22345 (9+ bytes) instructions are assigned to D0. */
22347 /* Maximum length of an insn that can be handled by
22348 a secondary decoder unit. '8' for Core 2/i7. */
22349 static int core2i7_secondary_decoder_max_insn_size;
22351 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22352 '16' for Core 2/i7. */
22353 static int core2i7_ifetch_block_size;
22355 /* Maximum number of instructions decoder can handle per cycle.
22356 '6' for Core 2/i7. */
22357 static int core2i7_ifetch_block_max_insns;
22359 typedef struct ix86_first_cycle_multipass_data_ *
22360 ix86_first_cycle_multipass_data_t;
22361 typedef const struct ix86_first_cycle_multipass_data_ *
22362 const_ix86_first_cycle_multipass_data_t;
22364 /* A variable to store target state across calls to max_issue within
22366 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22367 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22369 /* Initialize DATA. */
22371 core2i7_first_cycle_multipass_init (void *_data)
22373 ix86_first_cycle_multipass_data_t data
22374 = (ix86_first_cycle_multipass_data_t) _data;
22376 data->ifetch_block_len = 0;
22377 data->ifetch_block_n_insns = 0;
22378 data->ready_try_change = NULL;
22379 data->ready_try_change_size = 0;
22382 /* Advancing the cycle; reset ifetch block counts. */
22384 core2i7_dfa_post_advance_cycle (void)
22386 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22388 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22390 data->ifetch_block_len = 0;
22391 data->ifetch_block_n_insns = 0;
22394 static int min_insn_size (rtx);
22396 /* Filter out insns from ready_try that the core will not be able to issue
22397 on current cycle due to decoder. */
22399 core2i7_first_cycle_multipass_filter_ready_try
22400 (const_ix86_first_cycle_multipass_data_t data,
22401 char *ready_try, int n_ready, bool first_cycle_insn_p)
22408 if (ready_try[n_ready])
22411 insn = get_ready_element (n_ready);
22412 insn_size = min_insn_size (insn);
22414 if (/* If this is a too long an insn for a secondary decoder ... */
22415 (!first_cycle_insn_p
22416 && insn_size > core2i7_secondary_decoder_max_insn_size)
22417 /* ... or it would not fit into the ifetch block ... */
22418 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22419 /* ... or the decoder is full already ... */
22420 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22421 /* ... mask the insn out. */
22423 ready_try[n_ready] = 1;
22425 if (data->ready_try_change)
22426 SET_BIT (data->ready_try_change, n_ready);
22431 /* Prepare for a new round of multipass lookahead scheduling. */
22433 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22434 bool first_cycle_insn_p)
22436 ix86_first_cycle_multipass_data_t data
22437 = (ix86_first_cycle_multipass_data_t) _data;
22438 const_ix86_first_cycle_multipass_data_t prev_data
22439 = ix86_first_cycle_multipass_data;
22441 /* Restore the state from the end of the previous round. */
22442 data->ifetch_block_len = prev_data->ifetch_block_len;
22443 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22445 /* Filter instructions that cannot be issued on current cycle due to
22446 decoder restrictions. */
22447 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22448 first_cycle_insn_p);
22451 /* INSN is being issued in current solution. Account for its impact on
22452 the decoder model. */
22454 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22455 rtx insn, const void *_prev_data)
22457 ix86_first_cycle_multipass_data_t data
22458 = (ix86_first_cycle_multipass_data_t) _data;
22459 const_ix86_first_cycle_multipass_data_t prev_data
22460 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22462 int insn_size = min_insn_size (insn);
22464 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22465 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22466 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22467 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22469 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22470 if (!data->ready_try_change)
22472 data->ready_try_change = sbitmap_alloc (n_ready);
22473 data->ready_try_change_size = n_ready;
22475 else if (data->ready_try_change_size < n_ready)
22477 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22479 data->ready_try_change_size = n_ready;
22481 sbitmap_zero (data->ready_try_change);
22483 /* Filter out insns from ready_try that the core will not be able to issue
22484 on current cycle due to decoder. */
22485 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22489 /* Revert the effect on ready_try. */
22491 core2i7_first_cycle_multipass_backtrack (const void *_data,
22493 int n_ready ATTRIBUTE_UNUSED)
22495 const_ix86_first_cycle_multipass_data_t data
22496 = (const_ix86_first_cycle_multipass_data_t) _data;
22497 unsigned int i = 0;
22498 sbitmap_iterator sbi;
22500 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22501 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22507 /* Save the result of multipass lookahead scheduling for the next round. */
22509 core2i7_first_cycle_multipass_end (const void *_data)
22511 const_ix86_first_cycle_multipass_data_t data
22512 = (const_ix86_first_cycle_multipass_data_t) _data;
22513 ix86_first_cycle_multipass_data_t next_data
22514 = ix86_first_cycle_multipass_data;
22518 next_data->ifetch_block_len = data->ifetch_block_len;
22519 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22523 /* Deallocate target data. */
22525 core2i7_first_cycle_multipass_fini (void *_data)
22527 ix86_first_cycle_multipass_data_t data
22528 = (ix86_first_cycle_multipass_data_t) _data;
22530 if (data->ready_try_change)
22532 sbitmap_free (data->ready_try_change);
22533 data->ready_try_change = NULL;
22534 data->ready_try_change_size = 0;
22538 /* Prepare for scheduling pass. */
22540 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22541 int verbose ATTRIBUTE_UNUSED,
22542 int max_uid ATTRIBUTE_UNUSED)
22544 /* Install scheduling hooks for current CPU. Some of these hooks are used
22545 in time-critical parts of the scheduler, so we only set them up when
22546 they are actually used. */
22549 case PROCESSOR_CORE2_32:
22550 case PROCESSOR_CORE2_64:
22551 case PROCESSOR_COREI7_32:
22552 case PROCESSOR_COREI7_64:
22553 targetm.sched.dfa_post_advance_cycle
22554 = core2i7_dfa_post_advance_cycle;
22555 targetm.sched.first_cycle_multipass_init
22556 = core2i7_first_cycle_multipass_init;
22557 targetm.sched.first_cycle_multipass_begin
22558 = core2i7_first_cycle_multipass_begin;
22559 targetm.sched.first_cycle_multipass_issue
22560 = core2i7_first_cycle_multipass_issue;
22561 targetm.sched.first_cycle_multipass_backtrack
22562 = core2i7_first_cycle_multipass_backtrack;
22563 targetm.sched.first_cycle_multipass_end
22564 = core2i7_first_cycle_multipass_end;
22565 targetm.sched.first_cycle_multipass_fini
22566 = core2i7_first_cycle_multipass_fini;
22568 /* Set decoder parameters. */
22569 core2i7_secondary_decoder_max_insn_size = 8;
22570 core2i7_ifetch_block_size = 16;
22571 core2i7_ifetch_block_max_insns = 6;
22575 targetm.sched.dfa_post_advance_cycle = NULL;
22576 targetm.sched.first_cycle_multipass_init = NULL;
22577 targetm.sched.first_cycle_multipass_begin = NULL;
22578 targetm.sched.first_cycle_multipass_issue = NULL;
22579 targetm.sched.first_cycle_multipass_backtrack = NULL;
22580 targetm.sched.first_cycle_multipass_end = NULL;
22581 targetm.sched.first_cycle_multipass_fini = NULL;
22587 /* Compute the alignment given to a constant that is being placed in memory.
22588 EXP is the constant and ALIGN is the alignment that the object would
22590 The value of this function is used instead of that alignment to align
22594 ix86_constant_alignment (tree exp, int align)
22596 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
22597 || TREE_CODE (exp) == INTEGER_CST)
22599 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
22601 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
22604 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
22605 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
22606 return BITS_PER_WORD;
22611 /* Compute the alignment for a static variable.
22612 TYPE is the data type, and ALIGN is the alignment that
22613 the object would ordinarily have. The value of this function is used
22614 instead of that alignment to align the object. */
22617 ix86_data_alignment (tree type, int align)
22619 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
22621 if (AGGREGATE_TYPE_P (type)
22622 && TYPE_SIZE (type)
22623 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22624 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
22625 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
22626 && align < max_align)
22629 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22630 to 16byte boundary. */
22633 if (AGGREGATE_TYPE_P (type)
22634 && TYPE_SIZE (type)
22635 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22636 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
22637 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22641 if (TREE_CODE (type) == ARRAY_TYPE)
22643 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22645 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22648 else if (TREE_CODE (type) == COMPLEX_TYPE)
22651 if (TYPE_MODE (type) == DCmode && align < 64)
22653 if ((TYPE_MODE (type) == XCmode
22654 || TYPE_MODE (type) == TCmode) && align < 128)
22657 else if ((TREE_CODE (type) == RECORD_TYPE
22658 || TREE_CODE (type) == UNION_TYPE
22659 || TREE_CODE (type) == QUAL_UNION_TYPE)
22660 && TYPE_FIELDS (type))
22662 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22664 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22667 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22668 || TREE_CODE (type) == INTEGER_TYPE)
22670 if (TYPE_MODE (type) == DFmode && align < 64)
22672 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22679 /* Compute the alignment for a local variable or a stack slot. EXP is
22680 the data type or decl itself, MODE is the widest mode available and
22681 ALIGN is the alignment that the object would ordinarily have. The
22682 value of this macro is used instead of that alignment to align the
22686 ix86_local_alignment (tree exp, enum machine_mode mode,
22687 unsigned int align)
22691 if (exp && DECL_P (exp))
22693 type = TREE_TYPE (exp);
22702 /* Don't do dynamic stack realignment for long long objects with
22703 -mpreferred-stack-boundary=2. */
22706 && ix86_preferred_stack_boundary < 64
22707 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
22708 && (!type || !TYPE_USER_ALIGN (type))
22709 && (!decl || !DECL_USER_ALIGN (decl)))
22712 /* If TYPE is NULL, we are allocating a stack slot for caller-save
22713 register in MODE. We will return the largest alignment of XF
22717 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
22718 align = GET_MODE_ALIGNMENT (DFmode);
22722 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22723 to 16byte boundary. Exact wording is:
22725 An array uses the same alignment as its elements, except that a local or
22726 global array variable of length at least 16 bytes or
22727 a C99 variable-length array variable always has alignment of at least 16 bytes.
22729 This was added to allow use of aligned SSE instructions at arrays. This
22730 rule is meant for static storage (where compiler can not do the analysis
22731 by itself). We follow it for automatic variables only when convenient.
22732 We fully control everything in the function compiled and functions from
22733 other unit can not rely on the alignment.
22735 Exclude va_list type. It is the common case of local array where
22736 we can not benefit from the alignment. */
22737 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
22740 if (AGGREGATE_TYPE_P (type)
22741 && (va_list_type_node == NULL_TREE
22742 || (TYPE_MAIN_VARIANT (type)
22743 != TYPE_MAIN_VARIANT (va_list_type_node)))
22744 && TYPE_SIZE (type)
22745 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22746 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
22747 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22750 if (TREE_CODE (type) == ARRAY_TYPE)
22752 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22754 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22757 else if (TREE_CODE (type) == COMPLEX_TYPE)
22759 if (TYPE_MODE (type) == DCmode && align < 64)
22761 if ((TYPE_MODE (type) == XCmode
22762 || TYPE_MODE (type) == TCmode) && align < 128)
22765 else if ((TREE_CODE (type) == RECORD_TYPE
22766 || TREE_CODE (type) == UNION_TYPE
22767 || TREE_CODE (type) == QUAL_UNION_TYPE)
22768 && TYPE_FIELDS (type))
22770 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22772 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22775 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22776 || TREE_CODE (type) == INTEGER_TYPE)
22779 if (TYPE_MODE (type) == DFmode && align < 64)
22781 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22787 /* Compute the minimum required alignment for dynamic stack realignment
22788 purposes for a local variable, parameter or a stack slot. EXP is
22789 the data type or decl itself, MODE is its mode and ALIGN is the
22790 alignment that the object would ordinarily have. */
22793 ix86_minimum_alignment (tree exp, enum machine_mode mode,
22794 unsigned int align)
22798 if (exp && DECL_P (exp))
22800 type = TREE_TYPE (exp);
22809 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
22812 /* Don't do dynamic stack realignment for long long objects with
22813 -mpreferred-stack-boundary=2. */
22814 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
22815 && (!type || !TYPE_USER_ALIGN (type))
22816 && (!decl || !DECL_USER_ALIGN (decl)))
22822 /* Find a location for the static chain incoming to a nested function.
22823 This is a register, unless all free registers are used by arguments. */
22826 ix86_static_chain (const_tree fndecl, bool incoming_p)
22830 if (!DECL_STATIC_CHAIN (fndecl))
22835 /* We always use R10 in 64-bit mode. */
22843 /* By default in 32-bit mode we use ECX to pass the static chain. */
22846 fntype = TREE_TYPE (fndecl);
22847 ccvt = ix86_get_callcvt (fntype);
22848 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
22850 /* Fastcall functions use ecx/edx for arguments, which leaves
22851 us with EAX for the static chain.
22852 Thiscall functions use ecx for arguments, which also
22853 leaves us with EAX for the static chain. */
22856 else if (ix86_function_regparm (fntype, fndecl) == 3)
22858 /* For regparm 3, we have no free call-clobbered registers in
22859 which to store the static chain. In order to implement this,
22860 we have the trampoline push the static chain to the stack.
22861 However, we can't push a value below the return address when
22862 we call the nested function directly, so we have to use an
22863 alternate entry point. For this we use ESI, and have the
22864 alternate entry point push ESI, so that things appear the
22865 same once we're executing the nested function. */
22868 if (fndecl == current_function_decl)
22869 ix86_static_chain_on_stack = true;
22870 return gen_frame_mem (SImode,
22871 plus_constant (arg_pointer_rtx, -8));
22877 return gen_rtx_REG (Pmode, regno);
22880 /* Emit RTL insns to initialize the variable parts of a trampoline.
22881 FNDECL is the decl of the target address; M_TRAMP is a MEM for
22882 the trampoline, and CHAIN_VALUE is an RTX for the static chain
22883 to be passed to the target function. */
22886 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
22892 fnaddr = XEXP (DECL_RTL (fndecl), 0);
22898 /* Load the function address to r11. Try to load address using
22899 the shorter movl instead of movabs. We may want to support
22900 movq for kernel mode, but kernel does not use trampolines at
22902 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
22904 fnaddr = copy_to_mode_reg (DImode, fnaddr);
22906 mem = adjust_address (m_tramp, HImode, offset);
22907 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
22909 mem = adjust_address (m_tramp, SImode, offset + 2);
22910 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
22915 mem = adjust_address (m_tramp, HImode, offset);
22916 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
22918 mem = adjust_address (m_tramp, DImode, offset + 2);
22919 emit_move_insn (mem, fnaddr);
22923 /* Load static chain using movabs to r10. Use the
22924 shorter movl instead of movabs for x32. */
22936 mem = adjust_address (m_tramp, HImode, offset);
22937 emit_move_insn (mem, gen_int_mode (opcode, HImode));
22939 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
22940 emit_move_insn (mem, chain_value);
22943 /* Jump to r11; the last (unused) byte is a nop, only there to
22944 pad the write out to a single 32-bit store. */
22945 mem = adjust_address (m_tramp, SImode, offset);
22946 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
22953 /* Depending on the static chain location, either load a register
22954 with a constant, or push the constant to the stack. All of the
22955 instructions are the same size. */
22956 chain = ix86_static_chain (fndecl, true);
22959 switch (REGNO (chain))
22962 opcode = 0xb8; break;
22964 opcode = 0xb9; break;
22966 gcc_unreachable ();
22972 mem = adjust_address (m_tramp, QImode, offset);
22973 emit_move_insn (mem, gen_int_mode (opcode, QImode));
22975 mem = adjust_address (m_tramp, SImode, offset + 1);
22976 emit_move_insn (mem, chain_value);
22979 mem = adjust_address (m_tramp, QImode, offset);
22980 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
22982 mem = adjust_address (m_tramp, SImode, offset + 1);
22984 /* Compute offset from the end of the jmp to the target function.
22985 In the case in which the trampoline stores the static chain on
22986 the stack, we need to skip the first insn which pushes the
22987 (call-saved) register static chain; this push is 1 byte. */
22989 disp = expand_binop (SImode, sub_optab, fnaddr,
22990 plus_constant (XEXP (m_tramp, 0),
22991 offset - (MEM_P (chain) ? 1 : 0)),
22992 NULL_RTX, 1, OPTAB_DIRECT);
22993 emit_move_insn (mem, disp);
22996 gcc_assert (offset <= TRAMPOLINE_SIZE);
22998 #ifdef HAVE_ENABLE_EXECUTE_STACK
22999 #ifdef CHECK_EXECUTE_STACK_ENABLED
23000 if (CHECK_EXECUTE_STACK_ENABLED)
23002 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
23003 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
23007 /* The following file contains several enumerations and data structures
23008 built from the definitions in i386-builtin-types.def. */
23010 #include "i386-builtin-types.inc"
23012 /* Table for the ix86 builtin non-function types. */
23013 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
23015 /* Retrieve an element from the above table, building some of
23016 the types lazily. */
23019 ix86_get_builtin_type (enum ix86_builtin_type tcode)
23021 unsigned int index;
23024 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
23026 type = ix86_builtin_type_tab[(int) tcode];
23030 gcc_assert (tcode > IX86_BT_LAST_PRIM);
23031 if (tcode <= IX86_BT_LAST_VECT)
23033 enum machine_mode mode;
23035 index = tcode - IX86_BT_LAST_PRIM - 1;
23036 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
23037 mode = ix86_builtin_type_vect_mode[index];
23039 type = build_vector_type_for_mode (itype, mode);
23045 index = tcode - IX86_BT_LAST_VECT - 1;
23046 if (tcode <= IX86_BT_LAST_PTR)
23047 quals = TYPE_UNQUALIFIED;
23049 quals = TYPE_QUAL_CONST;
23051 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
23052 if (quals != TYPE_UNQUALIFIED)
23053 itype = build_qualified_type (itype, quals);
23055 type = build_pointer_type (itype);
23058 ix86_builtin_type_tab[(int) tcode] = type;
23062 /* Table for the ix86 builtin function types. */
23063 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23065 /* Retrieve an element from the above table, building some of
23066 the types lazily. */
23069 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23073 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23075 type = ix86_builtin_func_type_tab[(int) tcode];
23079 if (tcode <= IX86_BT_LAST_FUNC)
23081 unsigned start = ix86_builtin_func_start[(int) tcode];
23082 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23083 tree rtype, atype, args = void_list_node;
23086 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23087 for (i = after - 1; i > start; --i)
23089 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23090 args = tree_cons (NULL, atype, args);
23093 type = build_function_type (rtype, args);
23097 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23098 enum ix86_builtin_func_type icode;
23100 icode = ix86_builtin_func_alias_base[index];
23101 type = ix86_get_builtin_func_type (icode);
23104 ix86_builtin_func_type_tab[(int) tcode] = type;
23109 /* Codes for all the SSE/MMX builtins. */
23112 IX86_BUILTIN_ADDPS,
23113 IX86_BUILTIN_ADDSS,
23114 IX86_BUILTIN_DIVPS,
23115 IX86_BUILTIN_DIVSS,
23116 IX86_BUILTIN_MULPS,
23117 IX86_BUILTIN_MULSS,
23118 IX86_BUILTIN_SUBPS,
23119 IX86_BUILTIN_SUBSS,
23121 IX86_BUILTIN_CMPEQPS,
23122 IX86_BUILTIN_CMPLTPS,
23123 IX86_BUILTIN_CMPLEPS,
23124 IX86_BUILTIN_CMPGTPS,
23125 IX86_BUILTIN_CMPGEPS,
23126 IX86_BUILTIN_CMPNEQPS,
23127 IX86_BUILTIN_CMPNLTPS,
23128 IX86_BUILTIN_CMPNLEPS,
23129 IX86_BUILTIN_CMPNGTPS,
23130 IX86_BUILTIN_CMPNGEPS,
23131 IX86_BUILTIN_CMPORDPS,
23132 IX86_BUILTIN_CMPUNORDPS,
23133 IX86_BUILTIN_CMPEQSS,
23134 IX86_BUILTIN_CMPLTSS,
23135 IX86_BUILTIN_CMPLESS,
23136 IX86_BUILTIN_CMPNEQSS,
23137 IX86_BUILTIN_CMPNLTSS,
23138 IX86_BUILTIN_CMPNLESS,
23139 IX86_BUILTIN_CMPNGTSS,
23140 IX86_BUILTIN_CMPNGESS,
23141 IX86_BUILTIN_CMPORDSS,
23142 IX86_BUILTIN_CMPUNORDSS,
23144 IX86_BUILTIN_COMIEQSS,
23145 IX86_BUILTIN_COMILTSS,
23146 IX86_BUILTIN_COMILESS,
23147 IX86_BUILTIN_COMIGTSS,
23148 IX86_BUILTIN_COMIGESS,
23149 IX86_BUILTIN_COMINEQSS,
23150 IX86_BUILTIN_UCOMIEQSS,
23151 IX86_BUILTIN_UCOMILTSS,
23152 IX86_BUILTIN_UCOMILESS,
23153 IX86_BUILTIN_UCOMIGTSS,
23154 IX86_BUILTIN_UCOMIGESS,
23155 IX86_BUILTIN_UCOMINEQSS,
23157 IX86_BUILTIN_CVTPI2PS,
23158 IX86_BUILTIN_CVTPS2PI,
23159 IX86_BUILTIN_CVTSI2SS,
23160 IX86_BUILTIN_CVTSI642SS,
23161 IX86_BUILTIN_CVTSS2SI,
23162 IX86_BUILTIN_CVTSS2SI64,
23163 IX86_BUILTIN_CVTTPS2PI,
23164 IX86_BUILTIN_CVTTSS2SI,
23165 IX86_BUILTIN_CVTTSS2SI64,
23167 IX86_BUILTIN_MAXPS,
23168 IX86_BUILTIN_MAXSS,
23169 IX86_BUILTIN_MINPS,
23170 IX86_BUILTIN_MINSS,
23172 IX86_BUILTIN_LOADUPS,
23173 IX86_BUILTIN_STOREUPS,
23174 IX86_BUILTIN_MOVSS,
23176 IX86_BUILTIN_MOVHLPS,
23177 IX86_BUILTIN_MOVLHPS,
23178 IX86_BUILTIN_LOADHPS,
23179 IX86_BUILTIN_LOADLPS,
23180 IX86_BUILTIN_STOREHPS,
23181 IX86_BUILTIN_STORELPS,
23183 IX86_BUILTIN_MASKMOVQ,
23184 IX86_BUILTIN_MOVMSKPS,
23185 IX86_BUILTIN_PMOVMSKB,
23187 IX86_BUILTIN_MOVNTPS,
23188 IX86_BUILTIN_MOVNTQ,
23190 IX86_BUILTIN_LOADDQU,
23191 IX86_BUILTIN_STOREDQU,
23193 IX86_BUILTIN_PACKSSWB,
23194 IX86_BUILTIN_PACKSSDW,
23195 IX86_BUILTIN_PACKUSWB,
23197 IX86_BUILTIN_PADDB,
23198 IX86_BUILTIN_PADDW,
23199 IX86_BUILTIN_PADDD,
23200 IX86_BUILTIN_PADDQ,
23201 IX86_BUILTIN_PADDSB,
23202 IX86_BUILTIN_PADDSW,
23203 IX86_BUILTIN_PADDUSB,
23204 IX86_BUILTIN_PADDUSW,
23205 IX86_BUILTIN_PSUBB,
23206 IX86_BUILTIN_PSUBW,
23207 IX86_BUILTIN_PSUBD,
23208 IX86_BUILTIN_PSUBQ,
23209 IX86_BUILTIN_PSUBSB,
23210 IX86_BUILTIN_PSUBSW,
23211 IX86_BUILTIN_PSUBUSB,
23212 IX86_BUILTIN_PSUBUSW,
23215 IX86_BUILTIN_PANDN,
23219 IX86_BUILTIN_PAVGB,
23220 IX86_BUILTIN_PAVGW,
23222 IX86_BUILTIN_PCMPEQB,
23223 IX86_BUILTIN_PCMPEQW,
23224 IX86_BUILTIN_PCMPEQD,
23225 IX86_BUILTIN_PCMPGTB,
23226 IX86_BUILTIN_PCMPGTW,
23227 IX86_BUILTIN_PCMPGTD,
23229 IX86_BUILTIN_PMADDWD,
23231 IX86_BUILTIN_PMAXSW,
23232 IX86_BUILTIN_PMAXUB,
23233 IX86_BUILTIN_PMINSW,
23234 IX86_BUILTIN_PMINUB,
23236 IX86_BUILTIN_PMULHUW,
23237 IX86_BUILTIN_PMULHW,
23238 IX86_BUILTIN_PMULLW,
23240 IX86_BUILTIN_PSADBW,
23241 IX86_BUILTIN_PSHUFW,
23243 IX86_BUILTIN_PSLLW,
23244 IX86_BUILTIN_PSLLD,
23245 IX86_BUILTIN_PSLLQ,
23246 IX86_BUILTIN_PSRAW,
23247 IX86_BUILTIN_PSRAD,
23248 IX86_BUILTIN_PSRLW,
23249 IX86_BUILTIN_PSRLD,
23250 IX86_BUILTIN_PSRLQ,
23251 IX86_BUILTIN_PSLLWI,
23252 IX86_BUILTIN_PSLLDI,
23253 IX86_BUILTIN_PSLLQI,
23254 IX86_BUILTIN_PSRAWI,
23255 IX86_BUILTIN_PSRADI,
23256 IX86_BUILTIN_PSRLWI,
23257 IX86_BUILTIN_PSRLDI,
23258 IX86_BUILTIN_PSRLQI,
23260 IX86_BUILTIN_PUNPCKHBW,
23261 IX86_BUILTIN_PUNPCKHWD,
23262 IX86_BUILTIN_PUNPCKHDQ,
23263 IX86_BUILTIN_PUNPCKLBW,
23264 IX86_BUILTIN_PUNPCKLWD,
23265 IX86_BUILTIN_PUNPCKLDQ,
23267 IX86_BUILTIN_SHUFPS,
23269 IX86_BUILTIN_RCPPS,
23270 IX86_BUILTIN_RCPSS,
23271 IX86_BUILTIN_RSQRTPS,
23272 IX86_BUILTIN_RSQRTPS_NR,
23273 IX86_BUILTIN_RSQRTSS,
23274 IX86_BUILTIN_RSQRTF,
23275 IX86_BUILTIN_SQRTPS,
23276 IX86_BUILTIN_SQRTPS_NR,
23277 IX86_BUILTIN_SQRTSS,
23279 IX86_BUILTIN_UNPCKHPS,
23280 IX86_BUILTIN_UNPCKLPS,
23282 IX86_BUILTIN_ANDPS,
23283 IX86_BUILTIN_ANDNPS,
23285 IX86_BUILTIN_XORPS,
23288 IX86_BUILTIN_LDMXCSR,
23289 IX86_BUILTIN_STMXCSR,
23290 IX86_BUILTIN_SFENCE,
23292 /* 3DNow! Original */
23293 IX86_BUILTIN_FEMMS,
23294 IX86_BUILTIN_PAVGUSB,
23295 IX86_BUILTIN_PF2ID,
23296 IX86_BUILTIN_PFACC,
23297 IX86_BUILTIN_PFADD,
23298 IX86_BUILTIN_PFCMPEQ,
23299 IX86_BUILTIN_PFCMPGE,
23300 IX86_BUILTIN_PFCMPGT,
23301 IX86_BUILTIN_PFMAX,
23302 IX86_BUILTIN_PFMIN,
23303 IX86_BUILTIN_PFMUL,
23304 IX86_BUILTIN_PFRCP,
23305 IX86_BUILTIN_PFRCPIT1,
23306 IX86_BUILTIN_PFRCPIT2,
23307 IX86_BUILTIN_PFRSQIT1,
23308 IX86_BUILTIN_PFRSQRT,
23309 IX86_BUILTIN_PFSUB,
23310 IX86_BUILTIN_PFSUBR,
23311 IX86_BUILTIN_PI2FD,
23312 IX86_BUILTIN_PMULHRW,
23314 /* 3DNow! Athlon Extensions */
23315 IX86_BUILTIN_PF2IW,
23316 IX86_BUILTIN_PFNACC,
23317 IX86_BUILTIN_PFPNACC,
23318 IX86_BUILTIN_PI2FW,
23319 IX86_BUILTIN_PSWAPDSI,
23320 IX86_BUILTIN_PSWAPDSF,
23323 IX86_BUILTIN_ADDPD,
23324 IX86_BUILTIN_ADDSD,
23325 IX86_BUILTIN_DIVPD,
23326 IX86_BUILTIN_DIVSD,
23327 IX86_BUILTIN_MULPD,
23328 IX86_BUILTIN_MULSD,
23329 IX86_BUILTIN_SUBPD,
23330 IX86_BUILTIN_SUBSD,
23332 IX86_BUILTIN_CMPEQPD,
23333 IX86_BUILTIN_CMPLTPD,
23334 IX86_BUILTIN_CMPLEPD,
23335 IX86_BUILTIN_CMPGTPD,
23336 IX86_BUILTIN_CMPGEPD,
23337 IX86_BUILTIN_CMPNEQPD,
23338 IX86_BUILTIN_CMPNLTPD,
23339 IX86_BUILTIN_CMPNLEPD,
23340 IX86_BUILTIN_CMPNGTPD,
23341 IX86_BUILTIN_CMPNGEPD,
23342 IX86_BUILTIN_CMPORDPD,
23343 IX86_BUILTIN_CMPUNORDPD,
23344 IX86_BUILTIN_CMPEQSD,
23345 IX86_BUILTIN_CMPLTSD,
23346 IX86_BUILTIN_CMPLESD,
23347 IX86_BUILTIN_CMPNEQSD,
23348 IX86_BUILTIN_CMPNLTSD,
23349 IX86_BUILTIN_CMPNLESD,
23350 IX86_BUILTIN_CMPORDSD,
23351 IX86_BUILTIN_CMPUNORDSD,
23353 IX86_BUILTIN_COMIEQSD,
23354 IX86_BUILTIN_COMILTSD,
23355 IX86_BUILTIN_COMILESD,
23356 IX86_BUILTIN_COMIGTSD,
23357 IX86_BUILTIN_COMIGESD,
23358 IX86_BUILTIN_COMINEQSD,
23359 IX86_BUILTIN_UCOMIEQSD,
23360 IX86_BUILTIN_UCOMILTSD,
23361 IX86_BUILTIN_UCOMILESD,
23362 IX86_BUILTIN_UCOMIGTSD,
23363 IX86_BUILTIN_UCOMIGESD,
23364 IX86_BUILTIN_UCOMINEQSD,
23366 IX86_BUILTIN_MAXPD,
23367 IX86_BUILTIN_MAXSD,
23368 IX86_BUILTIN_MINPD,
23369 IX86_BUILTIN_MINSD,
23371 IX86_BUILTIN_ANDPD,
23372 IX86_BUILTIN_ANDNPD,
23374 IX86_BUILTIN_XORPD,
23376 IX86_BUILTIN_SQRTPD,
23377 IX86_BUILTIN_SQRTSD,
23379 IX86_BUILTIN_UNPCKHPD,
23380 IX86_BUILTIN_UNPCKLPD,
23382 IX86_BUILTIN_SHUFPD,
23384 IX86_BUILTIN_LOADUPD,
23385 IX86_BUILTIN_STOREUPD,
23386 IX86_BUILTIN_MOVSD,
23388 IX86_BUILTIN_LOADHPD,
23389 IX86_BUILTIN_LOADLPD,
23391 IX86_BUILTIN_CVTDQ2PD,
23392 IX86_BUILTIN_CVTDQ2PS,
23394 IX86_BUILTIN_CVTPD2DQ,
23395 IX86_BUILTIN_CVTPD2PI,
23396 IX86_BUILTIN_CVTPD2PS,
23397 IX86_BUILTIN_CVTTPD2DQ,
23398 IX86_BUILTIN_CVTTPD2PI,
23400 IX86_BUILTIN_CVTPI2PD,
23401 IX86_BUILTIN_CVTSI2SD,
23402 IX86_BUILTIN_CVTSI642SD,
23404 IX86_BUILTIN_CVTSD2SI,
23405 IX86_BUILTIN_CVTSD2SI64,
23406 IX86_BUILTIN_CVTSD2SS,
23407 IX86_BUILTIN_CVTSS2SD,
23408 IX86_BUILTIN_CVTTSD2SI,
23409 IX86_BUILTIN_CVTTSD2SI64,
23411 IX86_BUILTIN_CVTPS2DQ,
23412 IX86_BUILTIN_CVTPS2PD,
23413 IX86_BUILTIN_CVTTPS2DQ,
23415 IX86_BUILTIN_MOVNTI,
23416 IX86_BUILTIN_MOVNTPD,
23417 IX86_BUILTIN_MOVNTDQ,
23419 IX86_BUILTIN_MOVQ128,
23422 IX86_BUILTIN_MASKMOVDQU,
23423 IX86_BUILTIN_MOVMSKPD,
23424 IX86_BUILTIN_PMOVMSKB128,
23426 IX86_BUILTIN_PACKSSWB128,
23427 IX86_BUILTIN_PACKSSDW128,
23428 IX86_BUILTIN_PACKUSWB128,
23430 IX86_BUILTIN_PADDB128,
23431 IX86_BUILTIN_PADDW128,
23432 IX86_BUILTIN_PADDD128,
23433 IX86_BUILTIN_PADDQ128,
23434 IX86_BUILTIN_PADDSB128,
23435 IX86_BUILTIN_PADDSW128,
23436 IX86_BUILTIN_PADDUSB128,
23437 IX86_BUILTIN_PADDUSW128,
23438 IX86_BUILTIN_PSUBB128,
23439 IX86_BUILTIN_PSUBW128,
23440 IX86_BUILTIN_PSUBD128,
23441 IX86_BUILTIN_PSUBQ128,
23442 IX86_BUILTIN_PSUBSB128,
23443 IX86_BUILTIN_PSUBSW128,
23444 IX86_BUILTIN_PSUBUSB128,
23445 IX86_BUILTIN_PSUBUSW128,
23447 IX86_BUILTIN_PAND128,
23448 IX86_BUILTIN_PANDN128,
23449 IX86_BUILTIN_POR128,
23450 IX86_BUILTIN_PXOR128,
23452 IX86_BUILTIN_PAVGB128,
23453 IX86_BUILTIN_PAVGW128,
23455 IX86_BUILTIN_PCMPEQB128,
23456 IX86_BUILTIN_PCMPEQW128,
23457 IX86_BUILTIN_PCMPEQD128,
23458 IX86_BUILTIN_PCMPGTB128,
23459 IX86_BUILTIN_PCMPGTW128,
23460 IX86_BUILTIN_PCMPGTD128,
23462 IX86_BUILTIN_PMADDWD128,
23464 IX86_BUILTIN_PMAXSW128,
23465 IX86_BUILTIN_PMAXUB128,
23466 IX86_BUILTIN_PMINSW128,
23467 IX86_BUILTIN_PMINUB128,
23469 IX86_BUILTIN_PMULUDQ,
23470 IX86_BUILTIN_PMULUDQ128,
23471 IX86_BUILTIN_PMULHUW128,
23472 IX86_BUILTIN_PMULHW128,
23473 IX86_BUILTIN_PMULLW128,
23475 IX86_BUILTIN_PSADBW128,
23476 IX86_BUILTIN_PSHUFHW,
23477 IX86_BUILTIN_PSHUFLW,
23478 IX86_BUILTIN_PSHUFD,
23480 IX86_BUILTIN_PSLLDQI128,
23481 IX86_BUILTIN_PSLLWI128,
23482 IX86_BUILTIN_PSLLDI128,
23483 IX86_BUILTIN_PSLLQI128,
23484 IX86_BUILTIN_PSRAWI128,
23485 IX86_BUILTIN_PSRADI128,
23486 IX86_BUILTIN_PSRLDQI128,
23487 IX86_BUILTIN_PSRLWI128,
23488 IX86_BUILTIN_PSRLDI128,
23489 IX86_BUILTIN_PSRLQI128,
23491 IX86_BUILTIN_PSLLDQ128,
23492 IX86_BUILTIN_PSLLW128,
23493 IX86_BUILTIN_PSLLD128,
23494 IX86_BUILTIN_PSLLQ128,
23495 IX86_BUILTIN_PSRAW128,
23496 IX86_BUILTIN_PSRAD128,
23497 IX86_BUILTIN_PSRLW128,
23498 IX86_BUILTIN_PSRLD128,
23499 IX86_BUILTIN_PSRLQ128,
23501 IX86_BUILTIN_PUNPCKHBW128,
23502 IX86_BUILTIN_PUNPCKHWD128,
23503 IX86_BUILTIN_PUNPCKHDQ128,
23504 IX86_BUILTIN_PUNPCKHQDQ128,
23505 IX86_BUILTIN_PUNPCKLBW128,
23506 IX86_BUILTIN_PUNPCKLWD128,
23507 IX86_BUILTIN_PUNPCKLDQ128,
23508 IX86_BUILTIN_PUNPCKLQDQ128,
23510 IX86_BUILTIN_CLFLUSH,
23511 IX86_BUILTIN_MFENCE,
23512 IX86_BUILTIN_LFENCE,
23513 IX86_BUILTIN_PAUSE,
23515 IX86_BUILTIN_BSRSI,
23516 IX86_BUILTIN_BSRDI,
23517 IX86_BUILTIN_RDPMC,
23518 IX86_BUILTIN_RDTSC,
23519 IX86_BUILTIN_RDTSCP,
23520 IX86_BUILTIN_ROLQI,
23521 IX86_BUILTIN_ROLHI,
23522 IX86_BUILTIN_RORQI,
23523 IX86_BUILTIN_RORHI,
23526 IX86_BUILTIN_ADDSUBPS,
23527 IX86_BUILTIN_HADDPS,
23528 IX86_BUILTIN_HSUBPS,
23529 IX86_BUILTIN_MOVSHDUP,
23530 IX86_BUILTIN_MOVSLDUP,
23531 IX86_BUILTIN_ADDSUBPD,
23532 IX86_BUILTIN_HADDPD,
23533 IX86_BUILTIN_HSUBPD,
23534 IX86_BUILTIN_LDDQU,
23536 IX86_BUILTIN_MONITOR,
23537 IX86_BUILTIN_MWAIT,
23540 IX86_BUILTIN_PHADDW,
23541 IX86_BUILTIN_PHADDD,
23542 IX86_BUILTIN_PHADDSW,
23543 IX86_BUILTIN_PHSUBW,
23544 IX86_BUILTIN_PHSUBD,
23545 IX86_BUILTIN_PHSUBSW,
23546 IX86_BUILTIN_PMADDUBSW,
23547 IX86_BUILTIN_PMULHRSW,
23548 IX86_BUILTIN_PSHUFB,
23549 IX86_BUILTIN_PSIGNB,
23550 IX86_BUILTIN_PSIGNW,
23551 IX86_BUILTIN_PSIGND,
23552 IX86_BUILTIN_PALIGNR,
23553 IX86_BUILTIN_PABSB,
23554 IX86_BUILTIN_PABSW,
23555 IX86_BUILTIN_PABSD,
23557 IX86_BUILTIN_PHADDW128,
23558 IX86_BUILTIN_PHADDD128,
23559 IX86_BUILTIN_PHADDSW128,
23560 IX86_BUILTIN_PHSUBW128,
23561 IX86_BUILTIN_PHSUBD128,
23562 IX86_BUILTIN_PHSUBSW128,
23563 IX86_BUILTIN_PMADDUBSW128,
23564 IX86_BUILTIN_PMULHRSW128,
23565 IX86_BUILTIN_PSHUFB128,
23566 IX86_BUILTIN_PSIGNB128,
23567 IX86_BUILTIN_PSIGNW128,
23568 IX86_BUILTIN_PSIGND128,
23569 IX86_BUILTIN_PALIGNR128,
23570 IX86_BUILTIN_PABSB128,
23571 IX86_BUILTIN_PABSW128,
23572 IX86_BUILTIN_PABSD128,
23574 /* AMDFAM10 - SSE4A New Instructions. */
23575 IX86_BUILTIN_MOVNTSD,
23576 IX86_BUILTIN_MOVNTSS,
23577 IX86_BUILTIN_EXTRQI,
23578 IX86_BUILTIN_EXTRQ,
23579 IX86_BUILTIN_INSERTQI,
23580 IX86_BUILTIN_INSERTQ,
23583 IX86_BUILTIN_BLENDPD,
23584 IX86_BUILTIN_BLENDPS,
23585 IX86_BUILTIN_BLENDVPD,
23586 IX86_BUILTIN_BLENDVPS,
23587 IX86_BUILTIN_PBLENDVB128,
23588 IX86_BUILTIN_PBLENDW128,
23593 IX86_BUILTIN_INSERTPS128,
23595 IX86_BUILTIN_MOVNTDQA,
23596 IX86_BUILTIN_MPSADBW128,
23597 IX86_BUILTIN_PACKUSDW128,
23598 IX86_BUILTIN_PCMPEQQ,
23599 IX86_BUILTIN_PHMINPOSUW128,
23601 IX86_BUILTIN_PMAXSB128,
23602 IX86_BUILTIN_PMAXSD128,
23603 IX86_BUILTIN_PMAXUD128,
23604 IX86_BUILTIN_PMAXUW128,
23606 IX86_BUILTIN_PMINSB128,
23607 IX86_BUILTIN_PMINSD128,
23608 IX86_BUILTIN_PMINUD128,
23609 IX86_BUILTIN_PMINUW128,
23611 IX86_BUILTIN_PMOVSXBW128,
23612 IX86_BUILTIN_PMOVSXBD128,
23613 IX86_BUILTIN_PMOVSXBQ128,
23614 IX86_BUILTIN_PMOVSXWD128,
23615 IX86_BUILTIN_PMOVSXWQ128,
23616 IX86_BUILTIN_PMOVSXDQ128,
23618 IX86_BUILTIN_PMOVZXBW128,
23619 IX86_BUILTIN_PMOVZXBD128,
23620 IX86_BUILTIN_PMOVZXBQ128,
23621 IX86_BUILTIN_PMOVZXWD128,
23622 IX86_BUILTIN_PMOVZXWQ128,
23623 IX86_BUILTIN_PMOVZXDQ128,
23625 IX86_BUILTIN_PMULDQ128,
23626 IX86_BUILTIN_PMULLD128,
23628 IX86_BUILTIN_ROUNDPD,
23629 IX86_BUILTIN_ROUNDPS,
23630 IX86_BUILTIN_ROUNDSD,
23631 IX86_BUILTIN_ROUNDSS,
23633 IX86_BUILTIN_FLOORPD,
23634 IX86_BUILTIN_CEILPD,
23635 IX86_BUILTIN_TRUNCPD,
23636 IX86_BUILTIN_RINTPD,
23637 IX86_BUILTIN_FLOORPS,
23638 IX86_BUILTIN_CEILPS,
23639 IX86_BUILTIN_TRUNCPS,
23640 IX86_BUILTIN_RINTPS,
23642 IX86_BUILTIN_PTESTZ,
23643 IX86_BUILTIN_PTESTC,
23644 IX86_BUILTIN_PTESTNZC,
23646 IX86_BUILTIN_VEC_INIT_V2SI,
23647 IX86_BUILTIN_VEC_INIT_V4HI,
23648 IX86_BUILTIN_VEC_INIT_V8QI,
23649 IX86_BUILTIN_VEC_EXT_V2DF,
23650 IX86_BUILTIN_VEC_EXT_V2DI,
23651 IX86_BUILTIN_VEC_EXT_V4SF,
23652 IX86_BUILTIN_VEC_EXT_V4SI,
23653 IX86_BUILTIN_VEC_EXT_V8HI,
23654 IX86_BUILTIN_VEC_EXT_V2SI,
23655 IX86_BUILTIN_VEC_EXT_V4HI,
23656 IX86_BUILTIN_VEC_EXT_V16QI,
23657 IX86_BUILTIN_VEC_SET_V2DI,
23658 IX86_BUILTIN_VEC_SET_V4SF,
23659 IX86_BUILTIN_VEC_SET_V4SI,
23660 IX86_BUILTIN_VEC_SET_V8HI,
23661 IX86_BUILTIN_VEC_SET_V4HI,
23662 IX86_BUILTIN_VEC_SET_V16QI,
23664 IX86_BUILTIN_VEC_PACK_SFIX,
23667 IX86_BUILTIN_CRC32QI,
23668 IX86_BUILTIN_CRC32HI,
23669 IX86_BUILTIN_CRC32SI,
23670 IX86_BUILTIN_CRC32DI,
23672 IX86_BUILTIN_PCMPESTRI128,
23673 IX86_BUILTIN_PCMPESTRM128,
23674 IX86_BUILTIN_PCMPESTRA128,
23675 IX86_BUILTIN_PCMPESTRC128,
23676 IX86_BUILTIN_PCMPESTRO128,
23677 IX86_BUILTIN_PCMPESTRS128,
23678 IX86_BUILTIN_PCMPESTRZ128,
23679 IX86_BUILTIN_PCMPISTRI128,
23680 IX86_BUILTIN_PCMPISTRM128,
23681 IX86_BUILTIN_PCMPISTRA128,
23682 IX86_BUILTIN_PCMPISTRC128,
23683 IX86_BUILTIN_PCMPISTRO128,
23684 IX86_BUILTIN_PCMPISTRS128,
23685 IX86_BUILTIN_PCMPISTRZ128,
23687 IX86_BUILTIN_PCMPGTQ,
23689 /* AES instructions */
23690 IX86_BUILTIN_AESENC128,
23691 IX86_BUILTIN_AESENCLAST128,
23692 IX86_BUILTIN_AESDEC128,
23693 IX86_BUILTIN_AESDECLAST128,
23694 IX86_BUILTIN_AESIMC128,
23695 IX86_BUILTIN_AESKEYGENASSIST128,
23697 /* PCLMUL instruction */
23698 IX86_BUILTIN_PCLMULQDQ128,
23701 IX86_BUILTIN_ADDPD256,
23702 IX86_BUILTIN_ADDPS256,
23703 IX86_BUILTIN_ADDSUBPD256,
23704 IX86_BUILTIN_ADDSUBPS256,
23705 IX86_BUILTIN_ANDPD256,
23706 IX86_BUILTIN_ANDPS256,
23707 IX86_BUILTIN_ANDNPD256,
23708 IX86_BUILTIN_ANDNPS256,
23709 IX86_BUILTIN_BLENDPD256,
23710 IX86_BUILTIN_BLENDPS256,
23711 IX86_BUILTIN_BLENDVPD256,
23712 IX86_BUILTIN_BLENDVPS256,
23713 IX86_BUILTIN_DIVPD256,
23714 IX86_BUILTIN_DIVPS256,
23715 IX86_BUILTIN_DPPS256,
23716 IX86_BUILTIN_HADDPD256,
23717 IX86_BUILTIN_HADDPS256,
23718 IX86_BUILTIN_HSUBPD256,
23719 IX86_BUILTIN_HSUBPS256,
23720 IX86_BUILTIN_MAXPD256,
23721 IX86_BUILTIN_MAXPS256,
23722 IX86_BUILTIN_MINPD256,
23723 IX86_BUILTIN_MINPS256,
23724 IX86_BUILTIN_MULPD256,
23725 IX86_BUILTIN_MULPS256,
23726 IX86_BUILTIN_ORPD256,
23727 IX86_BUILTIN_ORPS256,
23728 IX86_BUILTIN_SHUFPD256,
23729 IX86_BUILTIN_SHUFPS256,
23730 IX86_BUILTIN_SUBPD256,
23731 IX86_BUILTIN_SUBPS256,
23732 IX86_BUILTIN_XORPD256,
23733 IX86_BUILTIN_XORPS256,
23734 IX86_BUILTIN_CMPSD,
23735 IX86_BUILTIN_CMPSS,
23736 IX86_BUILTIN_CMPPD,
23737 IX86_BUILTIN_CMPPS,
23738 IX86_BUILTIN_CMPPD256,
23739 IX86_BUILTIN_CMPPS256,
23740 IX86_BUILTIN_CVTDQ2PD256,
23741 IX86_BUILTIN_CVTDQ2PS256,
23742 IX86_BUILTIN_CVTPD2PS256,
23743 IX86_BUILTIN_CVTPS2DQ256,
23744 IX86_BUILTIN_CVTPS2PD256,
23745 IX86_BUILTIN_CVTTPD2DQ256,
23746 IX86_BUILTIN_CVTPD2DQ256,
23747 IX86_BUILTIN_CVTTPS2DQ256,
23748 IX86_BUILTIN_EXTRACTF128PD256,
23749 IX86_BUILTIN_EXTRACTF128PS256,
23750 IX86_BUILTIN_EXTRACTF128SI256,
23751 IX86_BUILTIN_VZEROALL,
23752 IX86_BUILTIN_VZEROUPPER,
23753 IX86_BUILTIN_VPERMILVARPD,
23754 IX86_BUILTIN_VPERMILVARPS,
23755 IX86_BUILTIN_VPERMILVARPD256,
23756 IX86_BUILTIN_VPERMILVARPS256,
23757 IX86_BUILTIN_VPERMILPD,
23758 IX86_BUILTIN_VPERMILPS,
23759 IX86_BUILTIN_VPERMILPD256,
23760 IX86_BUILTIN_VPERMILPS256,
23761 IX86_BUILTIN_VPERMIL2PD,
23762 IX86_BUILTIN_VPERMIL2PS,
23763 IX86_BUILTIN_VPERMIL2PD256,
23764 IX86_BUILTIN_VPERMIL2PS256,
23765 IX86_BUILTIN_VPERM2F128PD256,
23766 IX86_BUILTIN_VPERM2F128PS256,
23767 IX86_BUILTIN_VPERM2F128SI256,
23768 IX86_BUILTIN_VBROADCASTSS,
23769 IX86_BUILTIN_VBROADCASTSD256,
23770 IX86_BUILTIN_VBROADCASTSS256,
23771 IX86_BUILTIN_VBROADCASTPD256,
23772 IX86_BUILTIN_VBROADCASTPS256,
23773 IX86_BUILTIN_VINSERTF128PD256,
23774 IX86_BUILTIN_VINSERTF128PS256,
23775 IX86_BUILTIN_VINSERTF128SI256,
23776 IX86_BUILTIN_LOADUPD256,
23777 IX86_BUILTIN_LOADUPS256,
23778 IX86_BUILTIN_STOREUPD256,
23779 IX86_BUILTIN_STOREUPS256,
23780 IX86_BUILTIN_LDDQU256,
23781 IX86_BUILTIN_MOVNTDQ256,
23782 IX86_BUILTIN_MOVNTPD256,
23783 IX86_BUILTIN_MOVNTPS256,
23784 IX86_BUILTIN_LOADDQU256,
23785 IX86_BUILTIN_STOREDQU256,
23786 IX86_BUILTIN_MASKLOADPD,
23787 IX86_BUILTIN_MASKLOADPS,
23788 IX86_BUILTIN_MASKSTOREPD,
23789 IX86_BUILTIN_MASKSTOREPS,
23790 IX86_BUILTIN_MASKLOADPD256,
23791 IX86_BUILTIN_MASKLOADPS256,
23792 IX86_BUILTIN_MASKSTOREPD256,
23793 IX86_BUILTIN_MASKSTOREPS256,
23794 IX86_BUILTIN_MOVSHDUP256,
23795 IX86_BUILTIN_MOVSLDUP256,
23796 IX86_BUILTIN_MOVDDUP256,
23798 IX86_BUILTIN_SQRTPD256,
23799 IX86_BUILTIN_SQRTPS256,
23800 IX86_BUILTIN_SQRTPS_NR256,
23801 IX86_BUILTIN_RSQRTPS256,
23802 IX86_BUILTIN_RSQRTPS_NR256,
23804 IX86_BUILTIN_RCPPS256,
23806 IX86_BUILTIN_ROUNDPD256,
23807 IX86_BUILTIN_ROUNDPS256,
23809 IX86_BUILTIN_FLOORPD256,
23810 IX86_BUILTIN_CEILPD256,
23811 IX86_BUILTIN_TRUNCPD256,
23812 IX86_BUILTIN_RINTPD256,
23813 IX86_BUILTIN_FLOORPS256,
23814 IX86_BUILTIN_CEILPS256,
23815 IX86_BUILTIN_TRUNCPS256,
23816 IX86_BUILTIN_RINTPS256,
23818 IX86_BUILTIN_UNPCKHPD256,
23819 IX86_BUILTIN_UNPCKLPD256,
23820 IX86_BUILTIN_UNPCKHPS256,
23821 IX86_BUILTIN_UNPCKLPS256,
23823 IX86_BUILTIN_SI256_SI,
23824 IX86_BUILTIN_PS256_PS,
23825 IX86_BUILTIN_PD256_PD,
23826 IX86_BUILTIN_SI_SI256,
23827 IX86_BUILTIN_PS_PS256,
23828 IX86_BUILTIN_PD_PD256,
23830 IX86_BUILTIN_VTESTZPD,
23831 IX86_BUILTIN_VTESTCPD,
23832 IX86_BUILTIN_VTESTNZCPD,
23833 IX86_BUILTIN_VTESTZPS,
23834 IX86_BUILTIN_VTESTCPS,
23835 IX86_BUILTIN_VTESTNZCPS,
23836 IX86_BUILTIN_VTESTZPD256,
23837 IX86_BUILTIN_VTESTCPD256,
23838 IX86_BUILTIN_VTESTNZCPD256,
23839 IX86_BUILTIN_VTESTZPS256,
23840 IX86_BUILTIN_VTESTCPS256,
23841 IX86_BUILTIN_VTESTNZCPS256,
23842 IX86_BUILTIN_PTESTZ256,
23843 IX86_BUILTIN_PTESTC256,
23844 IX86_BUILTIN_PTESTNZC256,
23846 IX86_BUILTIN_MOVMSKPD256,
23847 IX86_BUILTIN_MOVMSKPS256,
23849 /* TFmode support builtins. */
23851 IX86_BUILTIN_HUGE_VALQ,
23852 IX86_BUILTIN_FABSQ,
23853 IX86_BUILTIN_COPYSIGNQ,
23855 /* Vectorizer support builtins. */
23856 IX86_BUILTIN_CPYSGNPS,
23857 IX86_BUILTIN_CPYSGNPD,
23858 IX86_BUILTIN_CPYSGNPS256,
23859 IX86_BUILTIN_CPYSGNPD256,
23861 IX86_BUILTIN_CVTUDQ2PS,
23863 IX86_BUILTIN_VEC_PERM_V2DF,
23864 IX86_BUILTIN_VEC_PERM_V4SF,
23865 IX86_BUILTIN_VEC_PERM_V2DI,
23866 IX86_BUILTIN_VEC_PERM_V4SI,
23867 IX86_BUILTIN_VEC_PERM_V8HI,
23868 IX86_BUILTIN_VEC_PERM_V16QI,
23869 IX86_BUILTIN_VEC_PERM_V2DI_U,
23870 IX86_BUILTIN_VEC_PERM_V4SI_U,
23871 IX86_BUILTIN_VEC_PERM_V8HI_U,
23872 IX86_BUILTIN_VEC_PERM_V16QI_U,
23873 IX86_BUILTIN_VEC_PERM_V4DF,
23874 IX86_BUILTIN_VEC_PERM_V8SF,
23876 /* FMA4 and XOP instructions. */
23877 IX86_BUILTIN_VFMADDSS,
23878 IX86_BUILTIN_VFMADDSD,
23879 IX86_BUILTIN_VFMADDPS,
23880 IX86_BUILTIN_VFMADDPD,
23881 IX86_BUILTIN_VFMADDPS256,
23882 IX86_BUILTIN_VFMADDPD256,
23883 IX86_BUILTIN_VFMADDSUBPS,
23884 IX86_BUILTIN_VFMADDSUBPD,
23885 IX86_BUILTIN_VFMADDSUBPS256,
23886 IX86_BUILTIN_VFMADDSUBPD256,
23888 IX86_BUILTIN_VPCMOV,
23889 IX86_BUILTIN_VPCMOV_V2DI,
23890 IX86_BUILTIN_VPCMOV_V4SI,
23891 IX86_BUILTIN_VPCMOV_V8HI,
23892 IX86_BUILTIN_VPCMOV_V16QI,
23893 IX86_BUILTIN_VPCMOV_V4SF,
23894 IX86_BUILTIN_VPCMOV_V2DF,
23895 IX86_BUILTIN_VPCMOV256,
23896 IX86_BUILTIN_VPCMOV_V4DI256,
23897 IX86_BUILTIN_VPCMOV_V8SI256,
23898 IX86_BUILTIN_VPCMOV_V16HI256,
23899 IX86_BUILTIN_VPCMOV_V32QI256,
23900 IX86_BUILTIN_VPCMOV_V8SF256,
23901 IX86_BUILTIN_VPCMOV_V4DF256,
23903 IX86_BUILTIN_VPPERM,
23905 IX86_BUILTIN_VPMACSSWW,
23906 IX86_BUILTIN_VPMACSWW,
23907 IX86_BUILTIN_VPMACSSWD,
23908 IX86_BUILTIN_VPMACSWD,
23909 IX86_BUILTIN_VPMACSSDD,
23910 IX86_BUILTIN_VPMACSDD,
23911 IX86_BUILTIN_VPMACSSDQL,
23912 IX86_BUILTIN_VPMACSSDQH,
23913 IX86_BUILTIN_VPMACSDQL,
23914 IX86_BUILTIN_VPMACSDQH,
23915 IX86_BUILTIN_VPMADCSSWD,
23916 IX86_BUILTIN_VPMADCSWD,
23918 IX86_BUILTIN_VPHADDBW,
23919 IX86_BUILTIN_VPHADDBD,
23920 IX86_BUILTIN_VPHADDBQ,
23921 IX86_BUILTIN_VPHADDWD,
23922 IX86_BUILTIN_VPHADDWQ,
23923 IX86_BUILTIN_VPHADDDQ,
23924 IX86_BUILTIN_VPHADDUBW,
23925 IX86_BUILTIN_VPHADDUBD,
23926 IX86_BUILTIN_VPHADDUBQ,
23927 IX86_BUILTIN_VPHADDUWD,
23928 IX86_BUILTIN_VPHADDUWQ,
23929 IX86_BUILTIN_VPHADDUDQ,
23930 IX86_BUILTIN_VPHSUBBW,
23931 IX86_BUILTIN_VPHSUBWD,
23932 IX86_BUILTIN_VPHSUBDQ,
23934 IX86_BUILTIN_VPROTB,
23935 IX86_BUILTIN_VPROTW,
23936 IX86_BUILTIN_VPROTD,
23937 IX86_BUILTIN_VPROTQ,
23938 IX86_BUILTIN_VPROTB_IMM,
23939 IX86_BUILTIN_VPROTW_IMM,
23940 IX86_BUILTIN_VPROTD_IMM,
23941 IX86_BUILTIN_VPROTQ_IMM,
23943 IX86_BUILTIN_VPSHLB,
23944 IX86_BUILTIN_VPSHLW,
23945 IX86_BUILTIN_VPSHLD,
23946 IX86_BUILTIN_VPSHLQ,
23947 IX86_BUILTIN_VPSHAB,
23948 IX86_BUILTIN_VPSHAW,
23949 IX86_BUILTIN_VPSHAD,
23950 IX86_BUILTIN_VPSHAQ,
23952 IX86_BUILTIN_VFRCZSS,
23953 IX86_BUILTIN_VFRCZSD,
23954 IX86_BUILTIN_VFRCZPS,
23955 IX86_BUILTIN_VFRCZPD,
23956 IX86_BUILTIN_VFRCZPS256,
23957 IX86_BUILTIN_VFRCZPD256,
23959 IX86_BUILTIN_VPCOMEQUB,
23960 IX86_BUILTIN_VPCOMNEUB,
23961 IX86_BUILTIN_VPCOMLTUB,
23962 IX86_BUILTIN_VPCOMLEUB,
23963 IX86_BUILTIN_VPCOMGTUB,
23964 IX86_BUILTIN_VPCOMGEUB,
23965 IX86_BUILTIN_VPCOMFALSEUB,
23966 IX86_BUILTIN_VPCOMTRUEUB,
23968 IX86_BUILTIN_VPCOMEQUW,
23969 IX86_BUILTIN_VPCOMNEUW,
23970 IX86_BUILTIN_VPCOMLTUW,
23971 IX86_BUILTIN_VPCOMLEUW,
23972 IX86_BUILTIN_VPCOMGTUW,
23973 IX86_BUILTIN_VPCOMGEUW,
23974 IX86_BUILTIN_VPCOMFALSEUW,
23975 IX86_BUILTIN_VPCOMTRUEUW,
23977 IX86_BUILTIN_VPCOMEQUD,
23978 IX86_BUILTIN_VPCOMNEUD,
23979 IX86_BUILTIN_VPCOMLTUD,
23980 IX86_BUILTIN_VPCOMLEUD,
23981 IX86_BUILTIN_VPCOMGTUD,
23982 IX86_BUILTIN_VPCOMGEUD,
23983 IX86_BUILTIN_VPCOMFALSEUD,
23984 IX86_BUILTIN_VPCOMTRUEUD,
23986 IX86_BUILTIN_VPCOMEQUQ,
23987 IX86_BUILTIN_VPCOMNEUQ,
23988 IX86_BUILTIN_VPCOMLTUQ,
23989 IX86_BUILTIN_VPCOMLEUQ,
23990 IX86_BUILTIN_VPCOMGTUQ,
23991 IX86_BUILTIN_VPCOMGEUQ,
23992 IX86_BUILTIN_VPCOMFALSEUQ,
23993 IX86_BUILTIN_VPCOMTRUEUQ,
23995 IX86_BUILTIN_VPCOMEQB,
23996 IX86_BUILTIN_VPCOMNEB,
23997 IX86_BUILTIN_VPCOMLTB,
23998 IX86_BUILTIN_VPCOMLEB,
23999 IX86_BUILTIN_VPCOMGTB,
24000 IX86_BUILTIN_VPCOMGEB,
24001 IX86_BUILTIN_VPCOMFALSEB,
24002 IX86_BUILTIN_VPCOMTRUEB,
24004 IX86_BUILTIN_VPCOMEQW,
24005 IX86_BUILTIN_VPCOMNEW,
24006 IX86_BUILTIN_VPCOMLTW,
24007 IX86_BUILTIN_VPCOMLEW,
24008 IX86_BUILTIN_VPCOMGTW,
24009 IX86_BUILTIN_VPCOMGEW,
24010 IX86_BUILTIN_VPCOMFALSEW,
24011 IX86_BUILTIN_VPCOMTRUEW,
24013 IX86_BUILTIN_VPCOMEQD,
24014 IX86_BUILTIN_VPCOMNED,
24015 IX86_BUILTIN_VPCOMLTD,
24016 IX86_BUILTIN_VPCOMLED,
24017 IX86_BUILTIN_VPCOMGTD,
24018 IX86_BUILTIN_VPCOMGED,
24019 IX86_BUILTIN_VPCOMFALSED,
24020 IX86_BUILTIN_VPCOMTRUED,
24022 IX86_BUILTIN_VPCOMEQQ,
24023 IX86_BUILTIN_VPCOMNEQ,
24024 IX86_BUILTIN_VPCOMLTQ,
24025 IX86_BUILTIN_VPCOMLEQ,
24026 IX86_BUILTIN_VPCOMGTQ,
24027 IX86_BUILTIN_VPCOMGEQ,
24028 IX86_BUILTIN_VPCOMFALSEQ,
24029 IX86_BUILTIN_VPCOMTRUEQ,
24031 /* LWP instructions. */
24032 IX86_BUILTIN_LLWPCB,
24033 IX86_BUILTIN_SLWPCB,
24034 IX86_BUILTIN_LWPVAL32,
24035 IX86_BUILTIN_LWPVAL64,
24036 IX86_BUILTIN_LWPINS32,
24037 IX86_BUILTIN_LWPINS64,
24041 /* BMI instructions. */
24042 IX86_BUILTIN_BEXTR32,
24043 IX86_BUILTIN_BEXTR64,
24046 /* TBM instructions. */
24047 IX86_BUILTIN_BEXTRI32,
24048 IX86_BUILTIN_BEXTRI64,
24051 /* FSGSBASE instructions. */
24052 IX86_BUILTIN_RDFSBASE32,
24053 IX86_BUILTIN_RDFSBASE64,
24054 IX86_BUILTIN_RDGSBASE32,
24055 IX86_BUILTIN_RDGSBASE64,
24056 IX86_BUILTIN_WRFSBASE32,
24057 IX86_BUILTIN_WRFSBASE64,
24058 IX86_BUILTIN_WRGSBASE32,
24059 IX86_BUILTIN_WRGSBASE64,
24061 /* RDRND instructions. */
24062 IX86_BUILTIN_RDRAND16_STEP,
24063 IX86_BUILTIN_RDRAND32_STEP,
24064 IX86_BUILTIN_RDRAND64_STEP,
24066 /* F16C instructions. */
24067 IX86_BUILTIN_CVTPH2PS,
24068 IX86_BUILTIN_CVTPH2PS256,
24069 IX86_BUILTIN_CVTPS2PH,
24070 IX86_BUILTIN_CVTPS2PH256,
24072 /* CFString built-in for darwin */
24073 IX86_BUILTIN_CFSTRING,
24078 /* Table for the ix86 builtin decls. */
24079 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24081 /* Table of all of the builtin functions that are possible with different ISA's
24082 but are waiting to be built until a function is declared to use that
24084 struct builtin_isa {
24085 const char *name; /* function name */
24086 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24087 int isa; /* isa_flags this builtin is defined for */
24088 bool const_p; /* true if the declaration is constant */
24089 bool set_and_not_built_p;
24092 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24095 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24096 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24097 function decl in the ix86_builtins array. Returns the function decl or
24098 NULL_TREE, if the builtin was not added.
24100 If the front end has a special hook for builtin functions, delay adding
24101 builtin functions that aren't in the current ISA until the ISA is changed
24102 with function specific optimization. Doing so, can save about 300K for the
24103 default compiler. When the builtin is expanded, check at that time whether
24106 If the front end doesn't have a special hook, record all builtins, even if
24107 it isn't an instruction set in the current ISA in case the user uses
24108 function specific options for a different ISA, so that we don't get scope
24109 errors if a builtin is added in the middle of a function scope. */
24112 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
24113 enum ix86_builtins code)
24115 tree decl = NULL_TREE;
24117 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24119 ix86_builtins_isa[(int) code].isa = mask;
24121 mask &= ~OPTION_MASK_ISA_64BIT;
24123 || (mask & ix86_isa_flags) != 0
24124 || (lang_hooks.builtin_function
24125 == lang_hooks.builtin_function_ext_scope))
24128 tree type = ix86_get_builtin_func_type (tcode);
24129 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24131 ix86_builtins[(int) code] = decl;
24132 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24136 ix86_builtins[(int) code] = NULL_TREE;
24137 ix86_builtins_isa[(int) code].tcode = tcode;
24138 ix86_builtins_isa[(int) code].name = name;
24139 ix86_builtins_isa[(int) code].const_p = false;
24140 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24147 /* Like def_builtin, but also marks the function decl "const". */
24150 def_builtin_const (int mask, const char *name,
24151 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24153 tree decl = def_builtin (mask, name, tcode, code);
24155 TREE_READONLY (decl) = 1;
24157 ix86_builtins_isa[(int) code].const_p = true;
24162 /* Add any new builtin functions for a given ISA that may not have been
24163 declared. This saves a bit of space compared to adding all of the
24164 declarations to the tree, even if we didn't use them. */
24167 ix86_add_new_builtins (int isa)
24171 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24173 if ((ix86_builtins_isa[i].isa & isa) != 0
24174 && ix86_builtins_isa[i].set_and_not_built_p)
24178 /* Don't define the builtin again. */
24179 ix86_builtins_isa[i].set_and_not_built_p = false;
24181 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24182 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24183 type, i, BUILT_IN_MD, NULL,
24186 ix86_builtins[i] = decl;
24187 if (ix86_builtins_isa[i].const_p)
24188 TREE_READONLY (decl) = 1;
24193 /* Bits for builtin_description.flag. */
24195 /* Set when we don't support the comparison natively, and should
24196 swap_comparison in order to support it. */
24197 #define BUILTIN_DESC_SWAP_OPERANDS 1
24199 struct builtin_description
24201 const unsigned int mask;
24202 const enum insn_code icode;
24203 const char *const name;
24204 const enum ix86_builtins code;
24205 const enum rtx_code comparison;
24209 static const struct builtin_description bdesc_comi[] =
24211 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24212 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24213 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24214 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24215 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24216 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24217 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24218 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24219 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24220 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24221 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24222 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24237 static const struct builtin_description bdesc_pcmpestr[] =
24240 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24241 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24242 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24243 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24244 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24245 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24246 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24249 static const struct builtin_description bdesc_pcmpistr[] =
24252 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24253 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24254 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24255 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24256 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24257 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24258 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24261 /* Special builtins with variable number of arguments. */
24262 static const struct builtin_description bdesc_special_args[] =
24264 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24265 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24266 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
24269 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24272 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24275 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24276 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24277 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24284 /* SSE or 3DNow!A */
24285 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24286 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24289 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24290 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24291 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24292 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24293 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24294 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24295 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24296 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24297 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24299 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24303 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24306 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24309 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24310 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24313 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24314 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24316 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24317 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24318 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24319 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24320 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24322 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24323 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24324 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24325 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24326 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24327 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24328 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24330 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24331 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24332 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24334 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
24335 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
24336 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
24337 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
24338 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
24339 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
24340 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
24341 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
24343 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24344 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24345 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24346 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24347 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24348 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24351 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24352 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24353 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24354 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24355 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24356 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24357 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24358 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24361 /* Builtins with variable number of arguments. */
24362 static const struct builtin_description bdesc_args[] =
24364 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24365 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24366 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24367 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24368 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24369 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24370 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24373 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24374 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24376 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24377 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24378 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24380 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24381 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24382 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24383 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24384 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24385 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24386 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24387 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24389 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24390 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24392 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24393 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24394 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24395 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24397 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24398 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24399 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24400 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24401 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24402 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24404 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24405 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24406 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24407 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24408 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24409 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24411 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24412 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24413 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24415 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24417 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24418 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24419 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24420 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24421 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24422 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24424 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24425 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24426 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24427 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24428 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24429 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24431 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24432 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24433 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24434 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24437 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24438 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24439 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24440 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24442 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24443 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24444 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24445 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24446 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24447 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24448 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24449 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24450 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24451 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24452 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24453 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24454 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24455 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24456 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24459 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24460 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24461 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24462 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24463 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24464 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24469 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24471 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24472 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24473 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24475 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24476 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24477 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24478 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24480 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24482 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24483 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24484 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24485 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24487 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24488 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24489 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24491 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24492 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24494 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24495 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24496 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24497 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24498 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24499 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24500 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24501 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24502 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24503 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24504 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24505 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24506 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24507 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24508 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24509 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24510 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24511 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24512 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24514 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24515 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24516 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24517 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24519 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24520 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24521 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24522 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24524 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24526 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24527 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24528 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24529 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24530 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24532 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24533 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24534 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24536 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24538 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24539 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24540 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24542 /* SSE MMX or 3Dnow!A */
24543 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24544 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24545 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24547 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24548 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24549 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24550 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24552 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24553 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24555 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24560 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24561 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24562 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24563 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24564 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24565 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24566 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24567 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24568 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24569 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24570 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24571 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24590 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24591 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24597 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24598 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24599 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24600 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
24611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24617 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24618 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24623 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24628 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24632 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24633 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24634 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24635 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24637 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24640 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24641 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24643 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
24645 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24646 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24647 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24648 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24649 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24650 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24651 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24652 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24663 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24664 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
24666 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24667 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24668 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24669 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24674 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24676 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24677 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24678 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24679 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24681 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24682 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24683 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24684 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24686 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24687 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24688 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24689 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24690 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24691 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24692 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24693 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24696 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24697 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24699 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24700 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
24702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
24703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24705 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
24707 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
24708 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
24709 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
24710 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
24712 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24713 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24714 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24715 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24716 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24717 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24718 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24720 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24721 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24722 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24723 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24724 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24725 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24726 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24728 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24729 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24730 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24731 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24733 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
24734 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24735 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24737 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
24739 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
24740 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
24742 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24745 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24746 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24749 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
24750 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24752 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24753 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24754 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24755 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24756 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24757 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24760 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
24761 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
24762 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24763 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
24764 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
24765 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24767 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24768 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24769 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24770 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24771 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24772 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24773 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24774 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24775 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24776 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24777 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24778 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24779 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
24780 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
24781 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24782 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24783 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24784 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24785 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24786 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24787 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24788 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24789 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24790 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24793 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
24794 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
24797 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24798 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24799 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
24800 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
24801 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24802 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24803 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24804 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
24805 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24806 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
24808 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24809 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24810 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24811 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24812 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24813 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24814 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24815 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24816 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24817 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24818 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24819 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24820 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24822 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24823 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24824 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24825 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24826 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24827 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24828 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24829 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24830 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24831 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24832 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24833 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24836 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24837 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24838 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24839 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24841 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
24842 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
24843 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
24844 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
24846 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
24847 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
24848 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
24849 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
24851 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24852 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24853 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24856 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24857 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
24858 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
24859 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24860 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24863 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
24864 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
24865 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
24866 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24869 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
24870 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24872 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24873 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24874 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24875 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24878 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
24881 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24882 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24885 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24886 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24889 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24895 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24896 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24897 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24898 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24899 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24900 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24901 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24902 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24903 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24904 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24905 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24906 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
24909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
24910 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
24911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
24913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24915 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
24916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
24917 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24919 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24920 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24921 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24922 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24923 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24924 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24925 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24926 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
24927 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
24928 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
24929 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
24930 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
24931 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
24932 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24933 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
24934 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24935 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24936 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24937 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24938 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24939 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
24940 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24941 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24942 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24943 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24944 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
24945 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
24946 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
24948 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24949 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24950 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24952 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24953 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24954 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24955 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24956 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24958 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24960 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24961 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24963 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
24964 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
24965 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
24966 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
24968 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
24969 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
24970 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
24971 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
24973 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24974 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24975 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24976 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24978 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
24979 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
24980 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
24981 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
24982 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
24983 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
24985 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24986 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24987 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24988 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24989 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24990 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24991 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24994 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24995 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24996 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
25002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
25004 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25005 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25007 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25010 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25011 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25012 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25015 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25016 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25019 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
25020 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
25021 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
25022 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
25025 /* FMA4 and XOP. */
25026 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
25027 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
25028 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
25029 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
25030 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
25031 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
25032 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
25033 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
25034 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
25035 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
25036 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
25037 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
25038 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
25039 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
25040 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
25041 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
25042 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
25043 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
25044 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
25045 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
25046 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
25047 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
25048 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
25049 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
25050 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
25051 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
25052 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
25053 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
25054 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
25055 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
25056 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
25057 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
25058 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
25059 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
25060 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
25061 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25062 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25063 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25064 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25065 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25066 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25067 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25068 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25069 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25070 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25071 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25072 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25073 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25074 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25075 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25076 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25077 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25079 static const struct builtin_description bdesc_multi_arg[] =
25081 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25082 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25083 UNKNOWN, (int)MULTI_ARG_3_SF },
25084 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25085 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25086 UNKNOWN, (int)MULTI_ARG_3_DF },
25088 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
25089 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
25090 UNKNOWN, (int)MULTI_ARG_3_SF },
25091 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
25092 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
25093 UNKNOWN, (int)MULTI_ARG_3_DF },
25094 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
25095 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
25096 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25097 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
25098 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
25099 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25101 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
25102 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
25103 UNKNOWN, (int)MULTI_ARG_3_SF },
25104 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
25105 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
25106 UNKNOWN, (int)MULTI_ARG_3_DF },
25107 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
25108 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
25109 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25110 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
25111 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
25112 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
25115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
25116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
25117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
25118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
25119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
25120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25125 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25133 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
25147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
25148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
25149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
25150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25172 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25274 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25275 in the current target ISA to allow the user to compile particular modules
25276 with different target specific options that differ from the command line
25279 ix86_init_mmx_sse_builtins (void)
25281 const struct builtin_description * d;
25282 enum ix86_builtin_func_type ftype;
25285 /* Add all special builtins with variable number of operands. */
25286 for (i = 0, d = bdesc_special_args;
25287 i < ARRAY_SIZE (bdesc_special_args);
25293 ftype = (enum ix86_builtin_func_type) d->flag;
25294 def_builtin (d->mask, d->name, ftype, d->code);
25297 /* Add all builtins with variable number of operands. */
25298 for (i = 0, d = bdesc_args;
25299 i < ARRAY_SIZE (bdesc_args);
25305 ftype = (enum ix86_builtin_func_type) d->flag;
25306 def_builtin_const (d->mask, d->name, ftype, d->code);
25309 /* pcmpestr[im] insns. */
25310 for (i = 0, d = bdesc_pcmpestr;
25311 i < ARRAY_SIZE (bdesc_pcmpestr);
25314 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25315 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25317 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25318 def_builtin_const (d->mask, d->name, ftype, d->code);
25321 /* pcmpistr[im] insns. */
25322 for (i = 0, d = bdesc_pcmpistr;
25323 i < ARRAY_SIZE (bdesc_pcmpistr);
25326 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25327 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25329 ftype = INT_FTYPE_V16QI_V16QI_INT;
25330 def_builtin_const (d->mask, d->name, ftype, d->code);
25333 /* comi/ucomi insns. */
25334 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25336 if (d->mask == OPTION_MASK_ISA_SSE2)
25337 ftype = INT_FTYPE_V2DF_V2DF;
25339 ftype = INT_FTYPE_V4SF_V4SF;
25340 def_builtin_const (d->mask, d->name, ftype, d->code);
25344 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25345 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25346 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25347 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25349 /* SSE or 3DNow!A */
25350 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25351 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25352 IX86_BUILTIN_MASKMOVQ);
25355 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25356 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25358 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25359 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25360 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25361 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25364 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25365 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25366 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25367 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25370 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25371 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25372 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25373 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25374 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25375 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25376 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25377 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25378 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25379 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25380 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25381 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25384 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25385 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25388 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
25389 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
25390 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
25391 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
25392 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
25393 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
25394 IX86_BUILTIN_RDRAND64_STEP);
25396 /* MMX access to the vec_init patterns. */
25397 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25398 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25400 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25401 V4HI_FTYPE_HI_HI_HI_HI,
25402 IX86_BUILTIN_VEC_INIT_V4HI);
25404 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25405 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25406 IX86_BUILTIN_VEC_INIT_V8QI);
25408 /* Access to the vec_extract patterns. */
25409 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25410 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25411 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25412 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25413 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25414 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25415 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25416 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25417 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25418 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25420 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25421 "__builtin_ia32_vec_ext_v4hi",
25422 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25424 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25425 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25427 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25428 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25430 /* Access to the vec_set patterns. */
25431 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25432 "__builtin_ia32_vec_set_v2di",
25433 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25435 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25436 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25438 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25439 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25441 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25442 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25444 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25445 "__builtin_ia32_vec_set_v4hi",
25446 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25448 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25449 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25451 /* Add FMA4 multi-arg argument instructions */
25452 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25457 ftype = (enum ix86_builtin_func_type) d->flag;
25458 def_builtin_const (d->mask, d->name, ftype, d->code);
25462 /* Internal method for ix86_init_builtins. */
25465 ix86_init_builtins_va_builtins_abi (void)
25467 tree ms_va_ref, sysv_va_ref;
25468 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25469 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25470 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25471 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25475 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25476 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25477 ms_va_ref = build_reference_type (ms_va_list_type_node);
25479 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25482 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25483 fnvoid_va_start_ms =
25484 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25485 fnvoid_va_end_sysv =
25486 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25487 fnvoid_va_start_sysv =
25488 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25490 fnvoid_va_copy_ms =
25491 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25493 fnvoid_va_copy_sysv =
25494 build_function_type_list (void_type_node, sysv_va_ref,
25495 sysv_va_ref, NULL_TREE);
25497 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25498 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25499 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25500 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25501 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25502 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25503 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25504 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25505 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25506 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25507 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25508 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25512 ix86_init_builtin_types (void)
25514 tree float128_type_node, float80_type_node;
25516 /* The __float80 type. */
25517 float80_type_node = long_double_type_node;
25518 if (TYPE_MODE (float80_type_node) != XFmode)
25520 /* The __float80 type. */
25521 float80_type_node = make_node (REAL_TYPE);
25523 TYPE_PRECISION (float80_type_node) = 80;
25524 layout_type (float80_type_node);
25526 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25528 /* The __float128 type. */
25529 float128_type_node = make_node (REAL_TYPE);
25530 TYPE_PRECISION (float128_type_node) = 128;
25531 layout_type (float128_type_node);
25532 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25534 /* This macro is built by i386-builtin-types.awk. */
25535 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25539 ix86_init_builtins (void)
25543 ix86_init_builtin_types ();
25545 /* TFmode support builtins. */
25546 def_builtin_const (0, "__builtin_infq",
25547 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25548 def_builtin_const (0, "__builtin_huge_valq",
25549 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25551 /* We will expand them to normal call if SSE2 isn't available since
25552 they are used by libgcc. */
25553 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25554 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25555 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25556 TREE_READONLY (t) = 1;
25557 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25559 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25560 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25561 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25562 TREE_READONLY (t) = 1;
25563 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25565 ix86_init_mmx_sse_builtins ();
25568 ix86_init_builtins_va_builtins_abi ();
25570 #ifdef SUBTARGET_INIT_BUILTINS
25571 SUBTARGET_INIT_BUILTINS;
25575 /* Return the ix86 builtin for CODE. */
25578 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25580 if (code >= IX86_BUILTIN_MAX)
25581 return error_mark_node;
25583 return ix86_builtins[code];
25586 /* Errors in the source file can cause expand_expr to return const0_rtx
25587 where we expect a vector. To avoid crashing, use one of the vector
25588 clear instructions. */
25590 safe_vector_operand (rtx x, enum machine_mode mode)
25592 if (x == const0_rtx)
25593 x = CONST0_RTX (mode);
25597 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25600 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
25603 tree arg0 = CALL_EXPR_ARG (exp, 0);
25604 tree arg1 = CALL_EXPR_ARG (exp, 1);
25605 rtx op0 = expand_normal (arg0);
25606 rtx op1 = expand_normal (arg1);
25607 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25608 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25609 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25611 if (VECTOR_MODE_P (mode0))
25612 op0 = safe_vector_operand (op0, mode0);
25613 if (VECTOR_MODE_P (mode1))
25614 op1 = safe_vector_operand (op1, mode1);
25616 if (optimize || !target
25617 || GET_MODE (target) != tmode
25618 || !insn_data[icode].operand[0].predicate (target, tmode))
25619 target = gen_reg_rtx (tmode);
25621 if (GET_MODE (op1) == SImode && mode1 == TImode)
25623 rtx x = gen_reg_rtx (V4SImode);
25624 emit_insn (gen_sse2_loadd (x, op1));
25625 op1 = gen_lowpart (TImode, x);
25628 if (!insn_data[icode].operand[1].predicate (op0, mode0))
25629 op0 = copy_to_mode_reg (mode0, op0);
25630 if (!insn_data[icode].operand[2].predicate (op1, mode1))
25631 op1 = copy_to_mode_reg (mode1, op1);
25633 pat = GEN_FCN (icode) (target, op0, op1);
25642 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
25645 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
25646 enum ix86_builtin_func_type m_type,
25647 enum rtx_code sub_code)
25652 bool comparison_p = false;
25654 bool last_arg_constant = false;
25655 int num_memory = 0;
25658 enum machine_mode mode;
25661 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25665 case MULTI_ARG_4_DF2_DI_I:
25666 case MULTI_ARG_4_DF2_DI_I1:
25667 case MULTI_ARG_4_SF2_SI_I:
25668 case MULTI_ARG_4_SF2_SI_I1:
25670 last_arg_constant = true;
25673 case MULTI_ARG_3_SF:
25674 case MULTI_ARG_3_DF:
25675 case MULTI_ARG_3_SF2:
25676 case MULTI_ARG_3_DF2:
25677 case MULTI_ARG_3_DI:
25678 case MULTI_ARG_3_SI:
25679 case MULTI_ARG_3_SI_DI:
25680 case MULTI_ARG_3_HI:
25681 case MULTI_ARG_3_HI_SI:
25682 case MULTI_ARG_3_QI:
25683 case MULTI_ARG_3_DI2:
25684 case MULTI_ARG_3_SI2:
25685 case MULTI_ARG_3_HI2:
25686 case MULTI_ARG_3_QI2:
25690 case MULTI_ARG_2_SF:
25691 case MULTI_ARG_2_DF:
25692 case MULTI_ARG_2_DI:
25693 case MULTI_ARG_2_SI:
25694 case MULTI_ARG_2_HI:
25695 case MULTI_ARG_2_QI:
25699 case MULTI_ARG_2_DI_IMM:
25700 case MULTI_ARG_2_SI_IMM:
25701 case MULTI_ARG_2_HI_IMM:
25702 case MULTI_ARG_2_QI_IMM:
25704 last_arg_constant = true;
25707 case MULTI_ARG_1_SF:
25708 case MULTI_ARG_1_DF:
25709 case MULTI_ARG_1_SF2:
25710 case MULTI_ARG_1_DF2:
25711 case MULTI_ARG_1_DI:
25712 case MULTI_ARG_1_SI:
25713 case MULTI_ARG_1_HI:
25714 case MULTI_ARG_1_QI:
25715 case MULTI_ARG_1_SI_DI:
25716 case MULTI_ARG_1_HI_DI:
25717 case MULTI_ARG_1_HI_SI:
25718 case MULTI_ARG_1_QI_DI:
25719 case MULTI_ARG_1_QI_SI:
25720 case MULTI_ARG_1_QI_HI:
25724 case MULTI_ARG_2_DI_CMP:
25725 case MULTI_ARG_2_SI_CMP:
25726 case MULTI_ARG_2_HI_CMP:
25727 case MULTI_ARG_2_QI_CMP:
25729 comparison_p = true;
25732 case MULTI_ARG_2_SF_TF:
25733 case MULTI_ARG_2_DF_TF:
25734 case MULTI_ARG_2_DI_TF:
25735 case MULTI_ARG_2_SI_TF:
25736 case MULTI_ARG_2_HI_TF:
25737 case MULTI_ARG_2_QI_TF:
25743 gcc_unreachable ();
25746 if (optimize || !target
25747 || GET_MODE (target) != tmode
25748 || !insn_data[icode].operand[0].predicate (target, tmode))
25749 target = gen_reg_rtx (tmode);
25751 gcc_assert (nargs <= 4);
25753 for (i = 0; i < nargs; i++)
25755 tree arg = CALL_EXPR_ARG (exp, i);
25756 rtx op = expand_normal (arg);
25757 int adjust = (comparison_p) ? 1 : 0;
25758 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
25760 if (last_arg_constant && i == nargs - 1)
25762 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
25764 enum insn_code new_icode = icode;
25767 case CODE_FOR_xop_vpermil2v2df3:
25768 case CODE_FOR_xop_vpermil2v4sf3:
25769 case CODE_FOR_xop_vpermil2v4df3:
25770 case CODE_FOR_xop_vpermil2v8sf3:
25771 error ("the last argument must be a 2-bit immediate");
25772 return gen_reg_rtx (tmode);
25773 case CODE_FOR_xop_rotlv2di3:
25774 new_icode = CODE_FOR_rotlv2di3;
25776 case CODE_FOR_xop_rotlv4si3:
25777 new_icode = CODE_FOR_rotlv4si3;
25779 case CODE_FOR_xop_rotlv8hi3:
25780 new_icode = CODE_FOR_rotlv8hi3;
25782 case CODE_FOR_xop_rotlv16qi3:
25783 new_icode = CODE_FOR_rotlv16qi3;
25785 if (CONST_INT_P (op))
25787 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
25788 op = GEN_INT (INTVAL (op) & mask);
25789 gcc_checking_assert
25790 (insn_data[icode].operand[i + 1].predicate (op, mode));
25794 gcc_checking_assert
25796 && insn_data[new_icode].operand[0].mode == tmode
25797 && insn_data[new_icode].operand[1].mode == tmode
25798 && insn_data[new_icode].operand[2].mode == mode
25799 && insn_data[new_icode].operand[0].predicate
25800 == insn_data[icode].operand[0].predicate
25801 && insn_data[new_icode].operand[1].predicate
25802 == insn_data[icode].operand[1].predicate);
25808 gcc_unreachable ();
25815 if (VECTOR_MODE_P (mode))
25816 op = safe_vector_operand (op, mode);
25818 /* If we aren't optimizing, only allow one memory operand to be
25820 if (memory_operand (op, mode))
25823 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
25826 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
25828 op = force_reg (mode, op);
25832 args[i].mode = mode;
25838 pat = GEN_FCN (icode) (target, args[0].op);
25843 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
25844 GEN_INT ((int)sub_code));
25845 else if (! comparison_p)
25846 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25849 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
25853 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
25858 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25862 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
25866 gcc_unreachable ();
25876 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
25877 insns with vec_merge. */
25880 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
25884 tree arg0 = CALL_EXPR_ARG (exp, 0);
25885 rtx op1, op0 = expand_normal (arg0);
25886 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25887 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25889 if (optimize || !target
25890 || GET_MODE (target) != tmode
25891 || !insn_data[icode].operand[0].predicate (target, tmode))
25892 target = gen_reg_rtx (tmode);
25894 if (VECTOR_MODE_P (mode0))
25895 op0 = safe_vector_operand (op0, mode0);
25897 if ((optimize && !register_operand (op0, mode0))
25898 || !insn_data[icode].operand[1].predicate (op0, mode0))
25899 op0 = copy_to_mode_reg (mode0, op0);
25902 if (!insn_data[icode].operand[2].predicate (op1, mode0))
25903 op1 = copy_to_mode_reg (mode0, op1);
25905 pat = GEN_FCN (icode) (target, op0, op1);
25912 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
25915 ix86_expand_sse_compare (const struct builtin_description *d,
25916 tree exp, rtx target, bool swap)
25919 tree arg0 = CALL_EXPR_ARG (exp, 0);
25920 tree arg1 = CALL_EXPR_ARG (exp, 1);
25921 rtx op0 = expand_normal (arg0);
25922 rtx op1 = expand_normal (arg1);
25924 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
25925 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
25926 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
25927 enum rtx_code comparison = d->comparison;
25929 if (VECTOR_MODE_P (mode0))
25930 op0 = safe_vector_operand (op0, mode0);
25931 if (VECTOR_MODE_P (mode1))
25932 op1 = safe_vector_operand (op1, mode1);
25934 /* Swap operands if we have a comparison that isn't available in
25938 rtx tmp = gen_reg_rtx (mode1);
25939 emit_move_insn (tmp, op1);
25944 if (optimize || !target
25945 || GET_MODE (target) != tmode
25946 || !insn_data[d->icode].operand[0].predicate (target, tmode))
25947 target = gen_reg_rtx (tmode);
25949 if ((optimize && !register_operand (op0, mode0))
25950 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
25951 op0 = copy_to_mode_reg (mode0, op0);
25952 if ((optimize && !register_operand (op1, mode1))
25953 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
25954 op1 = copy_to_mode_reg (mode1, op1);
25956 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
25957 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
25964 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
25967 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
25971 tree arg0 = CALL_EXPR_ARG (exp, 0);
25972 tree arg1 = CALL_EXPR_ARG (exp, 1);
25973 rtx op0 = expand_normal (arg0);
25974 rtx op1 = expand_normal (arg1);
25975 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
25976 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
25977 enum rtx_code comparison = d->comparison;
25979 if (VECTOR_MODE_P (mode0))
25980 op0 = safe_vector_operand (op0, mode0);
25981 if (VECTOR_MODE_P (mode1))
25982 op1 = safe_vector_operand (op1, mode1);
25984 /* Swap operands if we have a comparison that isn't available in
25986 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
25993 target = gen_reg_rtx (SImode);
25994 emit_move_insn (target, const0_rtx);
25995 target = gen_rtx_SUBREG (QImode, target, 0);
25997 if ((optimize && !register_operand (op0, mode0))
25998 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25999 op0 = copy_to_mode_reg (mode0, op0);
26000 if ((optimize && !register_operand (op1, mode1))
26001 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26002 op1 = copy_to_mode_reg (mode1, op1);
26004 pat = GEN_FCN (d->icode) (op0, op1);
26008 emit_insn (gen_rtx_SET (VOIDmode,
26009 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26010 gen_rtx_fmt_ee (comparison, QImode,
26014 return SUBREG_REG (target);
26017 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
26020 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
26024 tree arg0 = CALL_EXPR_ARG (exp, 0);
26025 rtx op1, op0 = expand_normal (arg0);
26026 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26027 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26029 if (optimize || target == 0
26030 || GET_MODE (target) != tmode
26031 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26032 target = gen_reg_rtx (tmode);
26034 if (VECTOR_MODE_P (mode0))
26035 op0 = safe_vector_operand (op0, mode0);
26037 if ((optimize && !register_operand (op0, mode0))
26038 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26039 op0 = copy_to_mode_reg (mode0, op0);
26041 op1 = GEN_INT (d->comparison);
26043 pat = GEN_FCN (d->icode) (target, op0, op1);
26050 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
26053 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
26057 tree arg0 = CALL_EXPR_ARG (exp, 0);
26058 tree arg1 = CALL_EXPR_ARG (exp, 1);
26059 rtx op0 = expand_normal (arg0);
26060 rtx op1 = expand_normal (arg1);
26061 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26062 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26063 enum rtx_code comparison = d->comparison;
26065 if (VECTOR_MODE_P (mode0))
26066 op0 = safe_vector_operand (op0, mode0);
26067 if (VECTOR_MODE_P (mode1))
26068 op1 = safe_vector_operand (op1, mode1);
26070 target = gen_reg_rtx (SImode);
26071 emit_move_insn (target, const0_rtx);
26072 target = gen_rtx_SUBREG (QImode, target, 0);
26074 if ((optimize && !register_operand (op0, mode0))
26075 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26076 op0 = copy_to_mode_reg (mode0, op0);
26077 if ((optimize && !register_operand (op1, mode1))
26078 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26079 op1 = copy_to_mode_reg (mode1, op1);
26081 pat = GEN_FCN (d->icode) (op0, op1);
26085 emit_insn (gen_rtx_SET (VOIDmode,
26086 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26087 gen_rtx_fmt_ee (comparison, QImode,
26091 return SUBREG_REG (target);
26094 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26097 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
26098 tree exp, rtx target)
26101 tree arg0 = CALL_EXPR_ARG (exp, 0);
26102 tree arg1 = CALL_EXPR_ARG (exp, 1);
26103 tree arg2 = CALL_EXPR_ARG (exp, 2);
26104 tree arg3 = CALL_EXPR_ARG (exp, 3);
26105 tree arg4 = CALL_EXPR_ARG (exp, 4);
26106 rtx scratch0, scratch1;
26107 rtx op0 = expand_normal (arg0);
26108 rtx op1 = expand_normal (arg1);
26109 rtx op2 = expand_normal (arg2);
26110 rtx op3 = expand_normal (arg3);
26111 rtx op4 = expand_normal (arg4);
26112 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
26114 tmode0 = insn_data[d->icode].operand[0].mode;
26115 tmode1 = insn_data[d->icode].operand[1].mode;
26116 modev2 = insn_data[d->icode].operand[2].mode;
26117 modei3 = insn_data[d->icode].operand[3].mode;
26118 modev4 = insn_data[d->icode].operand[4].mode;
26119 modei5 = insn_data[d->icode].operand[5].mode;
26120 modeimm = insn_data[d->icode].operand[6].mode;
26122 if (VECTOR_MODE_P (modev2))
26123 op0 = safe_vector_operand (op0, modev2);
26124 if (VECTOR_MODE_P (modev4))
26125 op2 = safe_vector_operand (op2, modev4);
26127 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26128 op0 = copy_to_mode_reg (modev2, op0);
26129 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
26130 op1 = copy_to_mode_reg (modei3, op1);
26131 if ((optimize && !register_operand (op2, modev4))
26132 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
26133 op2 = copy_to_mode_reg (modev4, op2);
26134 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
26135 op3 = copy_to_mode_reg (modei5, op3);
26137 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
26139 error ("the fifth argument must be an 8-bit immediate");
26143 if (d->code == IX86_BUILTIN_PCMPESTRI128)
26145 if (optimize || !target
26146 || GET_MODE (target) != tmode0
26147 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26148 target = gen_reg_rtx (tmode0);
26150 scratch1 = gen_reg_rtx (tmode1);
26152 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
26154 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
26156 if (optimize || !target
26157 || GET_MODE (target) != tmode1
26158 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26159 target = gen_reg_rtx (tmode1);
26161 scratch0 = gen_reg_rtx (tmode0);
26163 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
26167 gcc_assert (d->flag);
26169 scratch0 = gen_reg_rtx (tmode0);
26170 scratch1 = gen_reg_rtx (tmode1);
26172 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
26182 target = gen_reg_rtx (SImode);
26183 emit_move_insn (target, const0_rtx);
26184 target = gen_rtx_SUBREG (QImode, target, 0);
26187 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26188 gen_rtx_fmt_ee (EQ, QImode,
26189 gen_rtx_REG ((enum machine_mode) d->flag,
26192 return SUBREG_REG (target);
26199 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26202 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
26203 tree exp, rtx target)
26206 tree arg0 = CALL_EXPR_ARG (exp, 0);
26207 tree arg1 = CALL_EXPR_ARG (exp, 1);
26208 tree arg2 = CALL_EXPR_ARG (exp, 2);
26209 rtx scratch0, scratch1;
26210 rtx op0 = expand_normal (arg0);
26211 rtx op1 = expand_normal (arg1);
26212 rtx op2 = expand_normal (arg2);
26213 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26215 tmode0 = insn_data[d->icode].operand[0].mode;
26216 tmode1 = insn_data[d->icode].operand[1].mode;
26217 modev2 = insn_data[d->icode].operand[2].mode;
26218 modev3 = insn_data[d->icode].operand[3].mode;
26219 modeimm = insn_data[d->icode].operand[4].mode;
26221 if (VECTOR_MODE_P (modev2))
26222 op0 = safe_vector_operand (op0, modev2);
26223 if (VECTOR_MODE_P (modev3))
26224 op1 = safe_vector_operand (op1, modev3);
26226 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26227 op0 = copy_to_mode_reg (modev2, op0);
26228 if ((optimize && !register_operand (op1, modev3))
26229 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26230 op1 = copy_to_mode_reg (modev3, op1);
26232 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26234 error ("the third argument must be an 8-bit immediate");
26238 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26240 if (optimize || !target
26241 || GET_MODE (target) != tmode0
26242 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26243 target = gen_reg_rtx (tmode0);
26245 scratch1 = gen_reg_rtx (tmode1);
26247 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26249 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26251 if (optimize || !target
26252 || GET_MODE (target) != tmode1
26253 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26254 target = gen_reg_rtx (tmode1);
26256 scratch0 = gen_reg_rtx (tmode0);
26258 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26262 gcc_assert (d->flag);
26264 scratch0 = gen_reg_rtx (tmode0);
26265 scratch1 = gen_reg_rtx (tmode1);
26267 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26277 target = gen_reg_rtx (SImode);
26278 emit_move_insn (target, const0_rtx);
26279 target = gen_rtx_SUBREG (QImode, target, 0);
26282 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26283 gen_rtx_fmt_ee (EQ, QImode,
26284 gen_rtx_REG ((enum machine_mode) d->flag,
26287 return SUBREG_REG (target);
26293 /* Subroutine of ix86_expand_builtin to take care of insns with
26294 variable number of operands. */
26297 ix86_expand_args_builtin (const struct builtin_description *d,
26298 tree exp, rtx target)
26300 rtx pat, real_target;
26301 unsigned int i, nargs;
26302 unsigned int nargs_constant = 0;
26303 int num_memory = 0;
26307 enum machine_mode mode;
26309 bool last_arg_count = false;
26310 enum insn_code icode = d->icode;
26311 const struct insn_data_d *insn_p = &insn_data[icode];
26312 enum machine_mode tmode = insn_p->operand[0].mode;
26313 enum machine_mode rmode = VOIDmode;
26315 enum rtx_code comparison = d->comparison;
26317 switch ((enum ix86_builtin_func_type) d->flag)
26319 case V2DF_FTYPE_V2DF_ROUND:
26320 case V4DF_FTYPE_V4DF_ROUND:
26321 case V4SF_FTYPE_V4SF_ROUND:
26322 case V8SF_FTYPE_V8SF_ROUND:
26323 return ix86_expand_sse_round (d, exp, target);
26324 case INT_FTYPE_V8SF_V8SF_PTEST:
26325 case INT_FTYPE_V4DI_V4DI_PTEST:
26326 case INT_FTYPE_V4DF_V4DF_PTEST:
26327 case INT_FTYPE_V4SF_V4SF_PTEST:
26328 case INT_FTYPE_V2DI_V2DI_PTEST:
26329 case INT_FTYPE_V2DF_V2DF_PTEST:
26330 return ix86_expand_sse_ptest (d, exp, target);
26331 case FLOAT128_FTYPE_FLOAT128:
26332 case FLOAT_FTYPE_FLOAT:
26333 case INT_FTYPE_INT:
26334 case UINT64_FTYPE_INT:
26335 case UINT16_FTYPE_UINT16:
26336 case INT64_FTYPE_INT64:
26337 case INT64_FTYPE_V4SF:
26338 case INT64_FTYPE_V2DF:
26339 case INT_FTYPE_V16QI:
26340 case INT_FTYPE_V8QI:
26341 case INT_FTYPE_V8SF:
26342 case INT_FTYPE_V4DF:
26343 case INT_FTYPE_V4SF:
26344 case INT_FTYPE_V2DF:
26345 case V16QI_FTYPE_V16QI:
26346 case V8SI_FTYPE_V8SF:
26347 case V8SI_FTYPE_V4SI:
26348 case V8HI_FTYPE_V8HI:
26349 case V8HI_FTYPE_V16QI:
26350 case V8QI_FTYPE_V8QI:
26351 case V8SF_FTYPE_V8SF:
26352 case V8SF_FTYPE_V8SI:
26353 case V8SF_FTYPE_V4SF:
26354 case V8SF_FTYPE_V8HI:
26355 case V4SI_FTYPE_V4SI:
26356 case V4SI_FTYPE_V16QI:
26357 case V4SI_FTYPE_V4SF:
26358 case V4SI_FTYPE_V8SI:
26359 case V4SI_FTYPE_V8HI:
26360 case V4SI_FTYPE_V4DF:
26361 case V4SI_FTYPE_V2DF:
26362 case V4HI_FTYPE_V4HI:
26363 case V4DF_FTYPE_V4DF:
26364 case V4DF_FTYPE_V4SI:
26365 case V4DF_FTYPE_V4SF:
26366 case V4DF_FTYPE_V2DF:
26367 case V4SF_FTYPE_V4SF:
26368 case V4SF_FTYPE_V4SI:
26369 case V4SF_FTYPE_V8SF:
26370 case V4SF_FTYPE_V4DF:
26371 case V4SF_FTYPE_V8HI:
26372 case V4SF_FTYPE_V2DF:
26373 case V2DI_FTYPE_V2DI:
26374 case V2DI_FTYPE_V16QI:
26375 case V2DI_FTYPE_V8HI:
26376 case V2DI_FTYPE_V4SI:
26377 case V2DF_FTYPE_V2DF:
26378 case V2DF_FTYPE_V4SI:
26379 case V2DF_FTYPE_V4DF:
26380 case V2DF_FTYPE_V4SF:
26381 case V2DF_FTYPE_V2SI:
26382 case V2SI_FTYPE_V2SI:
26383 case V2SI_FTYPE_V4SF:
26384 case V2SI_FTYPE_V2SF:
26385 case V2SI_FTYPE_V2DF:
26386 case V2SF_FTYPE_V2SF:
26387 case V2SF_FTYPE_V2SI:
26390 case V4SF_FTYPE_V4SF_VEC_MERGE:
26391 case V2DF_FTYPE_V2DF_VEC_MERGE:
26392 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26393 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26394 case V16QI_FTYPE_V16QI_V16QI:
26395 case V16QI_FTYPE_V8HI_V8HI:
26396 case V8QI_FTYPE_V8QI_V8QI:
26397 case V8QI_FTYPE_V4HI_V4HI:
26398 case V8HI_FTYPE_V8HI_V8HI:
26399 case V8HI_FTYPE_V16QI_V16QI:
26400 case V8HI_FTYPE_V4SI_V4SI:
26401 case V8SF_FTYPE_V8SF_V8SF:
26402 case V8SF_FTYPE_V8SF_V8SI:
26403 case V4SI_FTYPE_V4SI_V4SI:
26404 case V4SI_FTYPE_V8HI_V8HI:
26405 case V4SI_FTYPE_V4SF_V4SF:
26406 case V4SI_FTYPE_V2DF_V2DF:
26407 case V4HI_FTYPE_V4HI_V4HI:
26408 case V4HI_FTYPE_V8QI_V8QI:
26409 case V4HI_FTYPE_V2SI_V2SI:
26410 case V4DF_FTYPE_V4DF_V4DF:
26411 case V4DF_FTYPE_V4DF_V4DI:
26412 case V4SF_FTYPE_V4SF_V4SF:
26413 case V4SF_FTYPE_V4SF_V4SI:
26414 case V4SF_FTYPE_V4SF_V2SI:
26415 case V4SF_FTYPE_V4SF_V2DF:
26416 case V4SF_FTYPE_V4SF_DI:
26417 case V4SF_FTYPE_V4SF_SI:
26418 case V2DI_FTYPE_V2DI_V2DI:
26419 case V2DI_FTYPE_V16QI_V16QI:
26420 case V2DI_FTYPE_V4SI_V4SI:
26421 case V2DI_FTYPE_V2DI_V16QI:
26422 case V2DI_FTYPE_V2DF_V2DF:
26423 case V2SI_FTYPE_V2SI_V2SI:
26424 case V2SI_FTYPE_V4HI_V4HI:
26425 case V2SI_FTYPE_V2SF_V2SF:
26426 case V2DF_FTYPE_V2DF_V2DF:
26427 case V2DF_FTYPE_V2DF_V4SF:
26428 case V2DF_FTYPE_V2DF_V2DI:
26429 case V2DF_FTYPE_V2DF_DI:
26430 case V2DF_FTYPE_V2DF_SI:
26431 case V2SF_FTYPE_V2SF_V2SF:
26432 case V1DI_FTYPE_V1DI_V1DI:
26433 case V1DI_FTYPE_V8QI_V8QI:
26434 case V1DI_FTYPE_V2SI_V2SI:
26435 if (comparison == UNKNOWN)
26436 return ix86_expand_binop_builtin (icode, exp, target);
26439 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26440 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26441 gcc_assert (comparison != UNKNOWN);
26445 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26446 case V8HI_FTYPE_V8HI_SI_COUNT:
26447 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26448 case V4SI_FTYPE_V4SI_SI_COUNT:
26449 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26450 case V4HI_FTYPE_V4HI_SI_COUNT:
26451 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26452 case V2DI_FTYPE_V2DI_SI_COUNT:
26453 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26454 case V2SI_FTYPE_V2SI_SI_COUNT:
26455 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26456 case V1DI_FTYPE_V1DI_SI_COUNT:
26458 last_arg_count = true;
26460 case UINT64_FTYPE_UINT64_UINT64:
26461 case UINT_FTYPE_UINT_UINT:
26462 case UINT_FTYPE_UINT_USHORT:
26463 case UINT_FTYPE_UINT_UCHAR:
26464 case UINT16_FTYPE_UINT16_INT:
26465 case UINT8_FTYPE_UINT8_INT:
26468 case V2DI_FTYPE_V2DI_INT_CONVERT:
26471 nargs_constant = 1;
26473 case V8HI_FTYPE_V8HI_INT:
26474 case V8HI_FTYPE_V8SF_INT:
26475 case V8HI_FTYPE_V4SF_INT:
26476 case V8SF_FTYPE_V8SF_INT:
26477 case V4SI_FTYPE_V4SI_INT:
26478 case V4SI_FTYPE_V8SI_INT:
26479 case V4HI_FTYPE_V4HI_INT:
26480 case V4DF_FTYPE_V4DF_INT:
26481 case V4SF_FTYPE_V4SF_INT:
26482 case V4SF_FTYPE_V8SF_INT:
26483 case V2DI_FTYPE_V2DI_INT:
26484 case V2DF_FTYPE_V2DF_INT:
26485 case V2DF_FTYPE_V4DF_INT:
26487 nargs_constant = 1;
26489 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26490 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26491 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26492 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26493 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26496 case V16QI_FTYPE_V16QI_V16QI_INT:
26497 case V8HI_FTYPE_V8HI_V8HI_INT:
26498 case V8SI_FTYPE_V8SI_V8SI_INT:
26499 case V8SI_FTYPE_V8SI_V4SI_INT:
26500 case V8SF_FTYPE_V8SF_V8SF_INT:
26501 case V8SF_FTYPE_V8SF_V4SF_INT:
26502 case V4SI_FTYPE_V4SI_V4SI_INT:
26503 case V4DF_FTYPE_V4DF_V4DF_INT:
26504 case V4DF_FTYPE_V4DF_V2DF_INT:
26505 case V4SF_FTYPE_V4SF_V4SF_INT:
26506 case V2DI_FTYPE_V2DI_V2DI_INT:
26507 case V2DF_FTYPE_V2DF_V2DF_INT:
26509 nargs_constant = 1;
26511 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26514 nargs_constant = 1;
26516 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26519 nargs_constant = 1;
26521 case V2DI_FTYPE_V2DI_UINT_UINT:
26523 nargs_constant = 2;
26525 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26526 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26527 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26528 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26530 nargs_constant = 1;
26532 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26534 nargs_constant = 2;
26537 gcc_unreachable ();
26540 gcc_assert (nargs <= ARRAY_SIZE (args));
26542 if (comparison != UNKNOWN)
26544 gcc_assert (nargs == 2);
26545 return ix86_expand_sse_compare (d, exp, target, swap);
26548 if (rmode == VOIDmode || rmode == tmode)
26552 || GET_MODE (target) != tmode
26553 || !insn_p->operand[0].predicate (target, tmode))
26554 target = gen_reg_rtx (tmode);
26555 real_target = target;
26559 target = gen_reg_rtx (rmode);
26560 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26563 for (i = 0; i < nargs; i++)
26565 tree arg = CALL_EXPR_ARG (exp, i);
26566 rtx op = expand_normal (arg);
26567 enum machine_mode mode = insn_p->operand[i + 1].mode;
26568 bool match = insn_p->operand[i + 1].predicate (op, mode);
26570 if (last_arg_count && (i + 1) == nargs)
26572 /* SIMD shift insns take either an 8-bit immediate or
26573 register as count. But builtin functions take int as
26574 count. If count doesn't match, we put it in register. */
26577 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26578 if (!insn_p->operand[i + 1].predicate (op, mode))
26579 op = copy_to_reg (op);
26582 else if ((nargs - i) <= nargs_constant)
26587 case CODE_FOR_sse4_1_roundpd:
26588 case CODE_FOR_sse4_1_roundps:
26589 case CODE_FOR_sse4_1_roundsd:
26590 case CODE_FOR_sse4_1_roundss:
26591 case CODE_FOR_sse4_1_blendps:
26592 case CODE_FOR_avx_blendpd256:
26593 case CODE_FOR_avx_vpermilv4df:
26594 case CODE_FOR_avx_roundpd256:
26595 case CODE_FOR_avx_roundps256:
26596 error ("the last argument must be a 4-bit immediate");
26599 case CODE_FOR_sse4_1_blendpd:
26600 case CODE_FOR_avx_vpermilv2df:
26601 case CODE_FOR_xop_vpermil2v2df3:
26602 case CODE_FOR_xop_vpermil2v4sf3:
26603 case CODE_FOR_xop_vpermil2v4df3:
26604 case CODE_FOR_xop_vpermil2v8sf3:
26605 error ("the last argument must be a 2-bit immediate");
26608 case CODE_FOR_avx_vextractf128v4df:
26609 case CODE_FOR_avx_vextractf128v8sf:
26610 case CODE_FOR_avx_vextractf128v8si:
26611 case CODE_FOR_avx_vinsertf128v4df:
26612 case CODE_FOR_avx_vinsertf128v8sf:
26613 case CODE_FOR_avx_vinsertf128v8si:
26614 error ("the last argument must be a 1-bit immediate");
26617 case CODE_FOR_avx_vmcmpv2df3:
26618 case CODE_FOR_avx_vmcmpv4sf3:
26619 case CODE_FOR_avx_cmpv2df3:
26620 case CODE_FOR_avx_cmpv4sf3:
26621 case CODE_FOR_avx_cmpv4df3:
26622 case CODE_FOR_avx_cmpv8sf3:
26623 error ("the last argument must be a 5-bit immediate");
26627 switch (nargs_constant)
26630 if ((nargs - i) == nargs_constant)
26632 error ("the next to last argument must be an 8-bit immediate");
26636 error ("the last argument must be an 8-bit immediate");
26639 gcc_unreachable ();
26646 if (VECTOR_MODE_P (mode))
26647 op = safe_vector_operand (op, mode);
26649 /* If we aren't optimizing, only allow one memory operand to
26651 if (memory_operand (op, mode))
26654 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
26656 if (optimize || !match || num_memory > 1)
26657 op = copy_to_mode_reg (mode, op);
26661 op = copy_to_reg (op);
26662 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
26667 args[i].mode = mode;
26673 pat = GEN_FCN (icode) (real_target, args[0].op);
26676 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
26679 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26683 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26684 args[2].op, args[3].op);
26687 gcc_unreachable ();
26697 /* Subroutine of ix86_expand_builtin to take care of special insns
26698 with variable number of operands. */
26701 ix86_expand_special_args_builtin (const struct builtin_description *d,
26702 tree exp, rtx target)
26706 unsigned int i, nargs, arg_adjust, memory;
26710 enum machine_mode mode;
26712 enum insn_code icode = d->icode;
26713 bool last_arg_constant = false;
26714 const struct insn_data_d *insn_p = &insn_data[icode];
26715 enum machine_mode tmode = insn_p->operand[0].mode;
26716 enum { load, store } klass;
26718 switch ((enum ix86_builtin_func_type) d->flag)
26720 case VOID_FTYPE_VOID:
26721 if (icode == CODE_FOR_avx_vzeroupper)
26722 target = GEN_INT (vzeroupper_intrinsic);
26723 emit_insn (GEN_FCN (icode) (target));
26725 case VOID_FTYPE_UINT64:
26726 case VOID_FTYPE_UNSIGNED:
26732 case UINT64_FTYPE_VOID:
26733 case UNSIGNED_FTYPE_VOID:
26738 case UINT64_FTYPE_PUNSIGNED:
26739 case V2DI_FTYPE_PV2DI:
26740 case V32QI_FTYPE_PCCHAR:
26741 case V16QI_FTYPE_PCCHAR:
26742 case V8SF_FTYPE_PCV4SF:
26743 case V8SF_FTYPE_PCFLOAT:
26744 case V4SF_FTYPE_PCFLOAT:
26745 case V4DF_FTYPE_PCV2DF:
26746 case V4DF_FTYPE_PCDOUBLE:
26747 case V2DF_FTYPE_PCDOUBLE:
26748 case VOID_FTYPE_PVOID:
26753 case VOID_FTYPE_PV2SF_V4SF:
26754 case VOID_FTYPE_PV4DI_V4DI:
26755 case VOID_FTYPE_PV2DI_V2DI:
26756 case VOID_FTYPE_PCHAR_V32QI:
26757 case VOID_FTYPE_PCHAR_V16QI:
26758 case VOID_FTYPE_PFLOAT_V8SF:
26759 case VOID_FTYPE_PFLOAT_V4SF:
26760 case VOID_FTYPE_PDOUBLE_V4DF:
26761 case VOID_FTYPE_PDOUBLE_V2DF:
26762 case VOID_FTYPE_PULONGLONG_ULONGLONG:
26763 case VOID_FTYPE_PINT_INT:
26766 /* Reserve memory operand for target. */
26767 memory = ARRAY_SIZE (args);
26769 case V4SF_FTYPE_V4SF_PCV2SF:
26770 case V2DF_FTYPE_V2DF_PCDOUBLE:
26775 case V8SF_FTYPE_PCV8SF_V8SI:
26776 case V4DF_FTYPE_PCV4DF_V4DI:
26777 case V4SF_FTYPE_PCV4SF_V4SI:
26778 case V2DF_FTYPE_PCV2DF_V2DI:
26783 case VOID_FTYPE_PV8SF_V8SI_V8SF:
26784 case VOID_FTYPE_PV4DF_V4DI_V4DF:
26785 case VOID_FTYPE_PV4SF_V4SI_V4SF:
26786 case VOID_FTYPE_PV2DF_V2DI_V2DF:
26789 /* Reserve memory operand for target. */
26790 memory = ARRAY_SIZE (args);
26792 case VOID_FTYPE_UINT_UINT_UINT:
26793 case VOID_FTYPE_UINT64_UINT_UINT:
26794 case UCHAR_FTYPE_UINT_UINT_UINT:
26795 case UCHAR_FTYPE_UINT64_UINT_UINT:
26798 memory = ARRAY_SIZE (args);
26799 last_arg_constant = true;
26802 gcc_unreachable ();
26805 gcc_assert (nargs <= ARRAY_SIZE (args));
26807 if (klass == store)
26809 arg = CALL_EXPR_ARG (exp, 0);
26810 op = expand_normal (arg);
26811 gcc_assert (target == 0);
26814 if (GET_MODE (op) != Pmode)
26815 op = convert_to_mode (Pmode, op, 1);
26816 target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
26819 target = force_reg (tmode, op);
26827 || GET_MODE (target) != tmode
26828 || !insn_p->operand[0].predicate (target, tmode))
26829 target = gen_reg_rtx (tmode);
26832 for (i = 0; i < nargs; i++)
26834 enum machine_mode mode = insn_p->operand[i + 1].mode;
26837 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
26838 op = expand_normal (arg);
26839 match = insn_p->operand[i + 1].predicate (op, mode);
26841 if (last_arg_constant && (i + 1) == nargs)
26845 if (icode == CODE_FOR_lwp_lwpvalsi3
26846 || icode == CODE_FOR_lwp_lwpinssi3
26847 || icode == CODE_FOR_lwp_lwpvaldi3
26848 || icode == CODE_FOR_lwp_lwpinsdi3)
26849 error ("the last argument must be a 32-bit immediate");
26851 error ("the last argument must be an 8-bit immediate");
26859 /* This must be the memory operand. */
26860 if (GET_MODE (op) != Pmode)
26861 op = convert_to_mode (Pmode, op, 1);
26862 op = gen_rtx_MEM (mode, force_reg (Pmode, op));
26863 gcc_assert (GET_MODE (op) == mode
26864 || GET_MODE (op) == VOIDmode);
26868 /* This must be register. */
26869 if (VECTOR_MODE_P (mode))
26870 op = safe_vector_operand (op, mode);
26872 gcc_assert (GET_MODE (op) == mode
26873 || GET_MODE (op) == VOIDmode);
26874 op = copy_to_mode_reg (mode, op);
26879 args[i].mode = mode;
26885 pat = GEN_FCN (icode) (target);
26888 pat = GEN_FCN (icode) (target, args[0].op);
26891 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26894 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26897 gcc_unreachable ();
26903 return klass == store ? 0 : target;
26906 /* Return the integer constant in ARG. Constrain it to be in the range
26907 of the subparts of VEC_TYPE; issue an error if not. */
26910 get_element_number (tree vec_type, tree arg)
26912 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
26914 if (!host_integerp (arg, 1)
26915 || (elt = tree_low_cst (arg, 1), elt > max))
26917 error ("selector must be an integer constant in the range 0..%wi", max);
26924 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26925 ix86_expand_vector_init. We DO have language-level syntax for this, in
26926 the form of (type){ init-list }. Except that since we can't place emms
26927 instructions from inside the compiler, we can't allow the use of MMX
26928 registers unless the user explicitly asks for it. So we do *not* define
26929 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
26930 we have builtins invoked by mmintrin.h that gives us license to emit
26931 these sorts of instructions. */
26934 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
26936 enum machine_mode tmode = TYPE_MODE (type);
26937 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
26938 int i, n_elt = GET_MODE_NUNITS (tmode);
26939 rtvec v = rtvec_alloc (n_elt);
26941 gcc_assert (VECTOR_MODE_P (tmode));
26942 gcc_assert (call_expr_nargs (exp) == n_elt);
26944 for (i = 0; i < n_elt; ++i)
26946 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
26947 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
26950 if (!target || !register_operand (target, tmode))
26951 target = gen_reg_rtx (tmode);
26953 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
26957 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26958 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
26959 had a language-level syntax for referencing vector elements. */
26962 ix86_expand_vec_ext_builtin (tree exp, rtx target)
26964 enum machine_mode tmode, mode0;
26969 arg0 = CALL_EXPR_ARG (exp, 0);
26970 arg1 = CALL_EXPR_ARG (exp, 1);
26972 op0 = expand_normal (arg0);
26973 elt = get_element_number (TREE_TYPE (arg0), arg1);
26975 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26976 mode0 = TYPE_MODE (TREE_TYPE (arg0));
26977 gcc_assert (VECTOR_MODE_P (mode0));
26979 op0 = force_reg (mode0, op0);
26981 if (optimize || !target || !register_operand (target, tmode))
26982 target = gen_reg_rtx (tmode);
26984 ix86_expand_vector_extract (true, target, op0, elt);
26989 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26990 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
26991 a language-level syntax for referencing vector elements. */
26994 ix86_expand_vec_set_builtin (tree exp)
26996 enum machine_mode tmode, mode1;
26997 tree arg0, arg1, arg2;
26999 rtx op0, op1, target;
27001 arg0 = CALL_EXPR_ARG (exp, 0);
27002 arg1 = CALL_EXPR_ARG (exp, 1);
27003 arg2 = CALL_EXPR_ARG (exp, 2);
27005 tmode = TYPE_MODE (TREE_TYPE (arg0));
27006 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27007 gcc_assert (VECTOR_MODE_P (tmode));
27009 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
27010 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
27011 elt = get_element_number (TREE_TYPE (arg0), arg2);
27013 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
27014 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
27016 op0 = force_reg (tmode, op0);
27017 op1 = force_reg (mode1, op1);
27019 /* OP0 is the source of these builtin functions and shouldn't be
27020 modified. Create a copy, use it and return it as target. */
27021 target = gen_reg_rtx (tmode);
27022 emit_move_insn (target, op0);
27023 ix86_expand_vector_set (true, target, op1, elt);
27028 /* Expand an expression EXP that calls a built-in function,
27029 with result going to TARGET if that's convenient
27030 (and in mode MODE if that's convenient).
27031 SUBTARGET may be used as the target for computing one of EXP's operands.
27032 IGNORE is nonzero if the value is to be ignored. */
27035 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
27036 enum machine_mode mode ATTRIBUTE_UNUSED,
27037 int ignore ATTRIBUTE_UNUSED)
27039 const struct builtin_description *d;
27041 enum insn_code icode;
27042 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
27043 tree arg0, arg1, arg2;
27044 rtx op0, op1, op2, pat;
27045 enum machine_mode mode0, mode1, mode2;
27046 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
27048 /* Determine whether the builtin function is available under the current ISA.
27049 Originally the builtin was not created if it wasn't applicable to the
27050 current ISA based on the command line switches. With function specific
27051 options, we need to check in the context of the function making the call
27052 whether it is supported. */
27053 if (ix86_builtins_isa[fcode].isa
27054 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
27056 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
27057 NULL, (enum fpmath_unit) 0, false);
27060 error ("%qE needs unknown isa option", fndecl);
27063 gcc_assert (opts != NULL);
27064 error ("%qE needs isa option %s", fndecl, opts);
27072 case IX86_BUILTIN_MASKMOVQ:
27073 case IX86_BUILTIN_MASKMOVDQU:
27074 icode = (fcode == IX86_BUILTIN_MASKMOVQ
27075 ? CODE_FOR_mmx_maskmovq
27076 : CODE_FOR_sse2_maskmovdqu);
27077 /* Note the arg order is different from the operand order. */
27078 arg1 = CALL_EXPR_ARG (exp, 0);
27079 arg2 = CALL_EXPR_ARG (exp, 1);
27080 arg0 = CALL_EXPR_ARG (exp, 2);
27081 op0 = expand_normal (arg0);
27082 op1 = expand_normal (arg1);
27083 op2 = expand_normal (arg2);
27084 mode0 = insn_data[icode].operand[0].mode;
27085 mode1 = insn_data[icode].operand[1].mode;
27086 mode2 = insn_data[icode].operand[2].mode;
27088 if (GET_MODE (op0) != Pmode)
27089 op0 = convert_to_mode (Pmode, op0, 1);
27090 op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
27092 if (!insn_data[icode].operand[0].predicate (op0, mode0))
27093 op0 = copy_to_mode_reg (mode0, op0);
27094 if (!insn_data[icode].operand[1].predicate (op1, mode1))
27095 op1 = copy_to_mode_reg (mode1, op1);
27096 if (!insn_data[icode].operand[2].predicate (op2, mode2))
27097 op2 = copy_to_mode_reg (mode2, op2);
27098 pat = GEN_FCN (icode) (op0, op1, op2);
27104 case IX86_BUILTIN_LDMXCSR:
27105 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
27106 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27107 emit_move_insn (target, op0);
27108 emit_insn (gen_sse_ldmxcsr (target));
27111 case IX86_BUILTIN_STMXCSR:
27112 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27113 emit_insn (gen_sse_stmxcsr (target));
27114 return copy_to_mode_reg (SImode, target);
27116 case IX86_BUILTIN_CLFLUSH:
27117 arg0 = CALL_EXPR_ARG (exp, 0);
27118 op0 = expand_normal (arg0);
27119 icode = CODE_FOR_sse2_clflush;
27120 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27122 if (GET_MODE (op0) != Pmode)
27123 op0 = convert_to_mode (Pmode, op0, 1);
27124 op0 = force_reg (Pmode, op0);
27127 emit_insn (gen_sse2_clflush (op0));
27130 case IX86_BUILTIN_MONITOR:
27131 arg0 = CALL_EXPR_ARG (exp, 0);
27132 arg1 = CALL_EXPR_ARG (exp, 1);
27133 arg2 = CALL_EXPR_ARG (exp, 2);
27134 op0 = expand_normal (arg0);
27135 op1 = expand_normal (arg1);
27136 op2 = expand_normal (arg2);
27139 if (GET_MODE (op0) != Pmode)
27140 op0 = convert_to_mode (Pmode, op0, 1);
27141 op0 = force_reg (Pmode, op0);
27144 op1 = copy_to_mode_reg (SImode, op1);
27146 op2 = copy_to_mode_reg (SImode, op2);
27147 emit_insn (ix86_gen_monitor (op0, op1, op2));
27150 case IX86_BUILTIN_MWAIT:
27151 arg0 = CALL_EXPR_ARG (exp, 0);
27152 arg1 = CALL_EXPR_ARG (exp, 1);
27153 op0 = expand_normal (arg0);
27154 op1 = expand_normal (arg1);
27156 op0 = copy_to_mode_reg (SImode, op0);
27158 op1 = copy_to_mode_reg (SImode, op1);
27159 emit_insn (gen_sse3_mwait (op0, op1));
27162 case IX86_BUILTIN_VEC_INIT_V2SI:
27163 case IX86_BUILTIN_VEC_INIT_V4HI:
27164 case IX86_BUILTIN_VEC_INIT_V8QI:
27165 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
27167 case IX86_BUILTIN_VEC_EXT_V2DF:
27168 case IX86_BUILTIN_VEC_EXT_V2DI:
27169 case IX86_BUILTIN_VEC_EXT_V4SF:
27170 case IX86_BUILTIN_VEC_EXT_V4SI:
27171 case IX86_BUILTIN_VEC_EXT_V8HI:
27172 case IX86_BUILTIN_VEC_EXT_V2SI:
27173 case IX86_BUILTIN_VEC_EXT_V4HI:
27174 case IX86_BUILTIN_VEC_EXT_V16QI:
27175 return ix86_expand_vec_ext_builtin (exp, target);
27177 case IX86_BUILTIN_VEC_SET_V2DI:
27178 case IX86_BUILTIN_VEC_SET_V4SF:
27179 case IX86_BUILTIN_VEC_SET_V4SI:
27180 case IX86_BUILTIN_VEC_SET_V8HI:
27181 case IX86_BUILTIN_VEC_SET_V4HI:
27182 case IX86_BUILTIN_VEC_SET_V16QI:
27183 return ix86_expand_vec_set_builtin (exp);
27185 case IX86_BUILTIN_VEC_PERM_V2DF:
27186 case IX86_BUILTIN_VEC_PERM_V4SF:
27187 case IX86_BUILTIN_VEC_PERM_V2DI:
27188 case IX86_BUILTIN_VEC_PERM_V4SI:
27189 case IX86_BUILTIN_VEC_PERM_V8HI:
27190 case IX86_BUILTIN_VEC_PERM_V16QI:
27191 case IX86_BUILTIN_VEC_PERM_V2DI_U:
27192 case IX86_BUILTIN_VEC_PERM_V4SI_U:
27193 case IX86_BUILTIN_VEC_PERM_V8HI_U:
27194 case IX86_BUILTIN_VEC_PERM_V16QI_U:
27195 case IX86_BUILTIN_VEC_PERM_V4DF:
27196 case IX86_BUILTIN_VEC_PERM_V8SF:
27197 return ix86_expand_vec_perm_builtin (exp);
27199 case IX86_BUILTIN_INFQ:
27200 case IX86_BUILTIN_HUGE_VALQ:
27202 REAL_VALUE_TYPE inf;
27206 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
27208 tmp = validize_mem (force_const_mem (mode, tmp));
27211 target = gen_reg_rtx (mode);
27213 emit_move_insn (target, tmp);
27217 case IX86_BUILTIN_LLWPCB:
27218 arg0 = CALL_EXPR_ARG (exp, 0);
27219 op0 = expand_normal (arg0);
27220 icode = CODE_FOR_lwp_llwpcb;
27221 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27223 if (GET_MODE (op0) != Pmode)
27224 op0 = convert_to_mode (Pmode, op0, 1);
27225 op0 = force_reg (Pmode, op0);
27227 emit_insn (gen_lwp_llwpcb (op0));
27230 case IX86_BUILTIN_SLWPCB:
27231 icode = CODE_FOR_lwp_slwpcb;
27233 || !insn_data[icode].operand[0].predicate (target, Pmode))
27234 target = gen_reg_rtx (Pmode);
27235 emit_insn (gen_lwp_slwpcb (target));
27238 case IX86_BUILTIN_BEXTRI32:
27239 case IX86_BUILTIN_BEXTRI64:
27240 arg0 = CALL_EXPR_ARG (exp, 0);
27241 arg1 = CALL_EXPR_ARG (exp, 1);
27242 op0 = expand_normal (arg0);
27243 op1 = expand_normal (arg1);
27244 icode = (fcode == IX86_BUILTIN_BEXTRI32
27245 ? CODE_FOR_tbm_bextri_si
27246 : CODE_FOR_tbm_bextri_di);
27247 if (!CONST_INT_P (op1))
27249 error ("last argument must be an immediate");
27254 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
27255 unsigned char lsb_index = INTVAL (op1) & 0xFF;
27256 op1 = GEN_INT (length);
27257 op2 = GEN_INT (lsb_index);
27258 pat = GEN_FCN (icode) (target, op0, op1, op2);
27264 case IX86_BUILTIN_RDRAND16_STEP:
27265 icode = CODE_FOR_rdrandhi_1;
27269 case IX86_BUILTIN_RDRAND32_STEP:
27270 icode = CODE_FOR_rdrandsi_1;
27274 case IX86_BUILTIN_RDRAND64_STEP:
27275 icode = CODE_FOR_rdranddi_1;
27279 op0 = gen_reg_rtx (mode0);
27280 emit_insn (GEN_FCN (icode) (op0));
27282 arg0 = CALL_EXPR_ARG (exp, 0);
27283 op1 = expand_normal (arg0);
27284 if (!address_operand (op1, VOIDmode))
27286 op1 = convert_memory_address (Pmode, op1);
27287 op1 = copy_addr_to_reg (op1);
27289 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
27291 op1 = gen_reg_rtx (SImode);
27292 emit_move_insn (op1, CONST1_RTX (SImode));
27294 /* Emit SImode conditional move. */
27295 if (mode0 == HImode)
27297 op2 = gen_reg_rtx (SImode);
27298 emit_insn (gen_zero_extendhisi2 (op2, op0));
27300 else if (mode0 == SImode)
27303 op2 = gen_rtx_SUBREG (SImode, op0, 0);
27306 target = gen_reg_rtx (SImode);
27308 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
27310 emit_insn (gen_rtx_SET (VOIDmode, target,
27311 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
27318 for (i = 0, d = bdesc_special_args;
27319 i < ARRAY_SIZE (bdesc_special_args);
27321 if (d->code == fcode)
27322 return ix86_expand_special_args_builtin (d, exp, target);
27324 for (i = 0, d = bdesc_args;
27325 i < ARRAY_SIZE (bdesc_args);
27327 if (d->code == fcode)
27330 case IX86_BUILTIN_FABSQ:
27331 case IX86_BUILTIN_COPYSIGNQ:
27333 /* Emit a normal call if SSE2 isn't available. */
27334 return expand_call (exp, target, ignore);
27336 return ix86_expand_args_builtin (d, exp, target);
27339 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27340 if (d->code == fcode)
27341 return ix86_expand_sse_comi (d, exp, target);
27343 for (i = 0, d = bdesc_pcmpestr;
27344 i < ARRAY_SIZE (bdesc_pcmpestr);
27346 if (d->code == fcode)
27347 return ix86_expand_sse_pcmpestr (d, exp, target);
27349 for (i = 0, d = bdesc_pcmpistr;
27350 i < ARRAY_SIZE (bdesc_pcmpistr);
27352 if (d->code == fcode)
27353 return ix86_expand_sse_pcmpistr (d, exp, target);
27355 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27356 if (d->code == fcode)
27357 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27358 (enum ix86_builtin_func_type)
27359 d->flag, d->comparison);
27361 gcc_unreachable ();
27364 /* Returns a function decl for a vectorized version of the builtin function
27365 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27366 if it is not available. */
27369 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27372 enum machine_mode in_mode, out_mode;
27374 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27376 if (TREE_CODE (type_out) != VECTOR_TYPE
27377 || TREE_CODE (type_in) != VECTOR_TYPE
27378 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27381 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27382 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27383 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27384 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27388 case BUILT_IN_SQRT:
27389 if (out_mode == DFmode && in_mode == DFmode)
27391 if (out_n == 2 && in_n == 2)
27392 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27393 else if (out_n == 4 && in_n == 4)
27394 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27398 case BUILT_IN_SQRTF:
27399 if (out_mode == SFmode && in_mode == SFmode)
27401 if (out_n == 4 && in_n == 4)
27402 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27403 else if (out_n == 8 && in_n == 8)
27404 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27408 case BUILT_IN_LRINT:
27409 if (out_mode == SImode && out_n == 4
27410 && in_mode == DFmode && in_n == 2)
27411 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27414 case BUILT_IN_LRINTF:
27415 if (out_mode == SImode && in_mode == SFmode)
27417 if (out_n == 4 && in_n == 4)
27418 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27419 else if (out_n == 8 && in_n == 8)
27420 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27424 case BUILT_IN_COPYSIGN:
27425 if (out_mode == DFmode && in_mode == DFmode)
27427 if (out_n == 2 && in_n == 2)
27428 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27429 else if (out_n == 4 && in_n == 4)
27430 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27434 case BUILT_IN_COPYSIGNF:
27435 if (out_mode == SFmode && in_mode == SFmode)
27437 if (out_n == 4 && in_n == 4)
27438 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27439 else if (out_n == 8 && in_n == 8)
27440 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27444 case BUILT_IN_FLOOR:
27445 /* The round insn does not trap on denormals. */
27446 if (flag_trapping_math || !TARGET_ROUND)
27449 if (out_mode == DFmode && in_mode == DFmode)
27451 if (out_n == 2 && in_n == 2)
27452 return ix86_builtins[IX86_BUILTIN_FLOORPD];
27453 else if (out_n == 4 && in_n == 4)
27454 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
27458 case BUILT_IN_FLOORF:
27459 /* The round insn does not trap on denormals. */
27460 if (flag_trapping_math || !TARGET_ROUND)
27463 if (out_mode == SFmode && in_mode == SFmode)
27465 if (out_n == 4 && in_n == 4)
27466 return ix86_builtins[IX86_BUILTIN_FLOORPS];
27467 else if (out_n == 8 && in_n == 8)
27468 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
27472 case BUILT_IN_CEIL:
27473 /* The round insn does not trap on denormals. */
27474 if (flag_trapping_math || !TARGET_ROUND)
27477 if (out_mode == DFmode && in_mode == DFmode)
27479 if (out_n == 2 && in_n == 2)
27480 return ix86_builtins[IX86_BUILTIN_CEILPD];
27481 else if (out_n == 4 && in_n == 4)
27482 return ix86_builtins[IX86_BUILTIN_CEILPD256];
27486 case BUILT_IN_CEILF:
27487 /* The round insn does not trap on denormals. */
27488 if (flag_trapping_math || !TARGET_ROUND)
27491 if (out_mode == SFmode && in_mode == SFmode)
27493 if (out_n == 4 && in_n == 4)
27494 return ix86_builtins[IX86_BUILTIN_CEILPS];
27495 else if (out_n == 8 && in_n == 8)
27496 return ix86_builtins[IX86_BUILTIN_CEILPS256];
27500 case BUILT_IN_TRUNC:
27501 /* The round insn does not trap on denormals. */
27502 if (flag_trapping_math || !TARGET_ROUND)
27505 if (out_mode == DFmode && in_mode == DFmode)
27507 if (out_n == 2 && in_n == 2)
27508 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
27509 else if (out_n == 4 && in_n == 4)
27510 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
27514 case BUILT_IN_TRUNCF:
27515 /* The round insn does not trap on denormals. */
27516 if (flag_trapping_math || !TARGET_ROUND)
27519 if (out_mode == SFmode && in_mode == SFmode)
27521 if (out_n == 4 && in_n == 4)
27522 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
27523 else if (out_n == 8 && in_n == 8)
27524 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
27528 case BUILT_IN_RINT:
27529 /* The round insn does not trap on denormals. */
27530 if (flag_trapping_math || !TARGET_ROUND)
27533 if (out_mode == DFmode && in_mode == DFmode)
27535 if (out_n == 2 && in_n == 2)
27536 return ix86_builtins[IX86_BUILTIN_RINTPD];
27537 else if (out_n == 4 && in_n == 4)
27538 return ix86_builtins[IX86_BUILTIN_RINTPD256];
27542 case BUILT_IN_RINTF:
27543 /* The round insn does not trap on denormals. */
27544 if (flag_trapping_math || !TARGET_ROUND)
27547 if (out_mode == SFmode && in_mode == SFmode)
27549 if (out_n == 4 && in_n == 4)
27550 return ix86_builtins[IX86_BUILTIN_RINTPS];
27551 else if (out_n == 8 && in_n == 8)
27552 return ix86_builtins[IX86_BUILTIN_RINTPS256];
27557 if (out_mode == DFmode && in_mode == DFmode)
27559 if (out_n == 2 && in_n == 2)
27560 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27561 if (out_n == 4 && in_n == 4)
27562 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27566 case BUILT_IN_FMAF:
27567 if (out_mode == SFmode && in_mode == SFmode)
27569 if (out_n == 4 && in_n == 4)
27570 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27571 if (out_n == 8 && in_n == 8)
27572 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27580 /* Dispatch to a handler for a vectorization library. */
27581 if (ix86_veclib_handler)
27582 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27588 /* Handler for an SVML-style interface to
27589 a library with vectorized intrinsics. */
27592 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27595 tree fntype, new_fndecl, args;
27598 enum machine_mode el_mode, in_mode;
27601 /* The SVML is suitable for unsafe math only. */
27602 if (!flag_unsafe_math_optimizations)
27605 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27606 n = TYPE_VECTOR_SUBPARTS (type_out);
27607 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27608 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27609 if (el_mode != in_mode
27617 case BUILT_IN_LOG10:
27619 case BUILT_IN_TANH:
27621 case BUILT_IN_ATAN:
27622 case BUILT_IN_ATAN2:
27623 case BUILT_IN_ATANH:
27624 case BUILT_IN_CBRT:
27625 case BUILT_IN_SINH:
27627 case BUILT_IN_ASINH:
27628 case BUILT_IN_ASIN:
27629 case BUILT_IN_COSH:
27631 case BUILT_IN_ACOSH:
27632 case BUILT_IN_ACOS:
27633 if (el_mode != DFmode || n != 2)
27637 case BUILT_IN_EXPF:
27638 case BUILT_IN_LOGF:
27639 case BUILT_IN_LOG10F:
27640 case BUILT_IN_POWF:
27641 case BUILT_IN_TANHF:
27642 case BUILT_IN_TANF:
27643 case BUILT_IN_ATANF:
27644 case BUILT_IN_ATAN2F:
27645 case BUILT_IN_ATANHF:
27646 case BUILT_IN_CBRTF:
27647 case BUILT_IN_SINHF:
27648 case BUILT_IN_SINF:
27649 case BUILT_IN_ASINHF:
27650 case BUILT_IN_ASINF:
27651 case BUILT_IN_COSHF:
27652 case BUILT_IN_COSF:
27653 case BUILT_IN_ACOSHF:
27654 case BUILT_IN_ACOSF:
27655 if (el_mode != SFmode || n != 4)
27663 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27665 if (fn == BUILT_IN_LOGF)
27666 strcpy (name, "vmlsLn4");
27667 else if (fn == BUILT_IN_LOG)
27668 strcpy (name, "vmldLn2");
27671 sprintf (name, "vmls%s", bname+10);
27672 name[strlen (name)-1] = '4';
27675 sprintf (name, "vmld%s2", bname+10);
27677 /* Convert to uppercase. */
27681 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27682 args = TREE_CHAIN (args))
27686 fntype = build_function_type_list (type_out, type_in, NULL);
27688 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27690 /* Build a function declaration for the vectorized function. */
27691 new_fndecl = build_decl (BUILTINS_LOCATION,
27692 FUNCTION_DECL, get_identifier (name), fntype);
27693 TREE_PUBLIC (new_fndecl) = 1;
27694 DECL_EXTERNAL (new_fndecl) = 1;
27695 DECL_IS_NOVOPS (new_fndecl) = 1;
27696 TREE_READONLY (new_fndecl) = 1;
27701 /* Handler for an ACML-style interface to
27702 a library with vectorized intrinsics. */
27705 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
27707 char name[20] = "__vr.._";
27708 tree fntype, new_fndecl, args;
27711 enum machine_mode el_mode, in_mode;
27714 /* The ACML is 64bits only and suitable for unsafe math only as
27715 it does not correctly support parts of IEEE with the required
27716 precision such as denormals. */
27718 || !flag_unsafe_math_optimizations)
27721 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27722 n = TYPE_VECTOR_SUBPARTS (type_out);
27723 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27724 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27725 if (el_mode != in_mode
27735 case BUILT_IN_LOG2:
27736 case BUILT_IN_LOG10:
27739 if (el_mode != DFmode
27744 case BUILT_IN_SINF:
27745 case BUILT_IN_COSF:
27746 case BUILT_IN_EXPF:
27747 case BUILT_IN_POWF:
27748 case BUILT_IN_LOGF:
27749 case BUILT_IN_LOG2F:
27750 case BUILT_IN_LOG10F:
27753 if (el_mode != SFmode
27762 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27763 sprintf (name + 7, "%s", bname+10);
27766 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27767 args = TREE_CHAIN (args))
27771 fntype = build_function_type_list (type_out, type_in, NULL);
27773 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27775 /* Build a function declaration for the vectorized function. */
27776 new_fndecl = build_decl (BUILTINS_LOCATION,
27777 FUNCTION_DECL, get_identifier (name), fntype);
27778 TREE_PUBLIC (new_fndecl) = 1;
27779 DECL_EXTERNAL (new_fndecl) = 1;
27780 DECL_IS_NOVOPS (new_fndecl) = 1;
27781 TREE_READONLY (new_fndecl) = 1;
27787 /* Returns a decl of a function that implements conversion of an integer vector
27788 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
27789 are the types involved when converting according to CODE.
27790 Return NULL_TREE if it is not available. */
27793 ix86_vectorize_builtin_conversion (unsigned int code,
27794 tree dest_type, tree src_type)
27802 switch (TYPE_MODE (src_type))
27805 switch (TYPE_MODE (dest_type))
27808 return (TYPE_UNSIGNED (src_type)
27809 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
27810 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
27812 return (TYPE_UNSIGNED (src_type)
27814 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
27820 switch (TYPE_MODE (dest_type))
27823 return (TYPE_UNSIGNED (src_type)
27825 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
27834 case FIX_TRUNC_EXPR:
27835 switch (TYPE_MODE (dest_type))
27838 switch (TYPE_MODE (src_type))
27841 return (TYPE_UNSIGNED (dest_type)
27843 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
27845 return (TYPE_UNSIGNED (dest_type)
27847 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
27854 switch (TYPE_MODE (src_type))
27857 return (TYPE_UNSIGNED (dest_type)
27859 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
27876 /* Returns a code for a target-specific builtin that implements
27877 reciprocal of the function, or NULL_TREE if not available. */
27880 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
27881 bool sqrt ATTRIBUTE_UNUSED)
27883 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
27884 && flag_finite_math_only && !flag_trapping_math
27885 && flag_unsafe_math_optimizations))
27889 /* Machine dependent builtins. */
27892 /* Vectorized version of sqrt to rsqrt conversion. */
27893 case IX86_BUILTIN_SQRTPS_NR:
27894 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
27896 case IX86_BUILTIN_SQRTPS_NR256:
27897 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
27903 /* Normal builtins. */
27906 /* Sqrt to rsqrt conversion. */
27907 case BUILT_IN_SQRTF:
27908 return ix86_builtins[IX86_BUILTIN_RSQRTF];
27915 /* Helper for avx_vpermilps256_operand et al. This is also used by
27916 the expansion functions to turn the parallel back into a mask.
27917 The return value is 0 for no match and the imm8+1 for a match. */
27920 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
27922 unsigned i, nelt = GET_MODE_NUNITS (mode);
27924 unsigned char ipar[8];
27926 if (XVECLEN (par, 0) != (int) nelt)
27929 /* Validate that all of the elements are constants, and not totally
27930 out of range. Copy the data into an integral array to make the
27931 subsequent checks easier. */
27932 for (i = 0; i < nelt; ++i)
27934 rtx er = XVECEXP (par, 0, i);
27935 unsigned HOST_WIDE_INT ei;
27937 if (!CONST_INT_P (er))
27948 /* In the 256-bit DFmode case, we can only move elements within
27950 for (i = 0; i < 2; ++i)
27954 mask |= ipar[i] << i;
27956 for (i = 2; i < 4; ++i)
27960 mask |= (ipar[i] - 2) << i;
27965 /* In the 256-bit SFmode case, we have full freedom of movement
27966 within the low 128-bit lane, but the high 128-bit lane must
27967 mirror the exact same pattern. */
27968 for (i = 0; i < 4; ++i)
27969 if (ipar[i] + 4 != ipar[i + 4])
27976 /* In the 128-bit case, we've full freedom in the placement of
27977 the elements from the source operand. */
27978 for (i = 0; i < nelt; ++i)
27979 mask |= ipar[i] << (i * (nelt / 2));
27983 gcc_unreachable ();
27986 /* Make sure success has a non-zero value by adding one. */
27990 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
27991 the expansion functions to turn the parallel back into a mask.
27992 The return value is 0 for no match and the imm8+1 for a match. */
27995 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
27997 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
27999 unsigned char ipar[8];
28001 if (XVECLEN (par, 0) != (int) nelt)
28004 /* Validate that all of the elements are constants, and not totally
28005 out of range. Copy the data into an integral array to make the
28006 subsequent checks easier. */
28007 for (i = 0; i < nelt; ++i)
28009 rtx er = XVECEXP (par, 0, i);
28010 unsigned HOST_WIDE_INT ei;
28012 if (!CONST_INT_P (er))
28015 if (ei >= 2 * nelt)
28020 /* Validate that the halves of the permute are halves. */
28021 for (i = 0; i < nelt2 - 1; ++i)
28022 if (ipar[i] + 1 != ipar[i + 1])
28024 for (i = nelt2; i < nelt - 1; ++i)
28025 if (ipar[i] + 1 != ipar[i + 1])
28028 /* Reconstruct the mask. */
28029 for (i = 0; i < 2; ++i)
28031 unsigned e = ipar[i * nelt2];
28035 mask |= e << (i * 4);
28038 /* Make sure success has a non-zero value by adding one. */
28043 /* Store OPERAND to the memory after reload is completed. This means
28044 that we can't easily use assign_stack_local. */
28046 ix86_force_to_memory (enum machine_mode mode, rtx operand)
28050 gcc_assert (reload_completed);
28051 if (ix86_using_red_zone ())
28053 result = gen_rtx_MEM (mode,
28054 gen_rtx_PLUS (Pmode,
28056 GEN_INT (-RED_ZONE_SIZE)));
28057 emit_move_insn (result, operand);
28059 else if (TARGET_64BIT)
28065 operand = gen_lowpart (DImode, operand);
28069 gen_rtx_SET (VOIDmode,
28070 gen_rtx_MEM (DImode,
28071 gen_rtx_PRE_DEC (DImode,
28072 stack_pointer_rtx)),
28076 gcc_unreachable ();
28078 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28087 split_double_mode (mode, &operand, 1, operands, operands + 1);
28089 gen_rtx_SET (VOIDmode,
28090 gen_rtx_MEM (SImode,
28091 gen_rtx_PRE_DEC (Pmode,
28092 stack_pointer_rtx)),
28095 gen_rtx_SET (VOIDmode,
28096 gen_rtx_MEM (SImode,
28097 gen_rtx_PRE_DEC (Pmode,
28098 stack_pointer_rtx)),
28103 /* Store HImodes as SImodes. */
28104 operand = gen_lowpart (SImode, operand);
28108 gen_rtx_SET (VOIDmode,
28109 gen_rtx_MEM (GET_MODE (operand),
28110 gen_rtx_PRE_DEC (SImode,
28111 stack_pointer_rtx)),
28115 gcc_unreachable ();
28117 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28122 /* Free operand from the memory. */
28124 ix86_free_from_memory (enum machine_mode mode)
28126 if (!ix86_using_red_zone ())
28130 if (mode == DImode || TARGET_64BIT)
28134 /* Use LEA to deallocate stack space. In peephole2 it will be converted
28135 to pop or add instruction if registers are available. */
28136 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
28137 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28142 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
28144 Put float CONST_DOUBLE in the constant pool instead of fp regs.
28145 QImode must go into class Q_REGS.
28146 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
28147 movdf to do mem-to-mem moves through integer regs. */
28150 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
28152 enum machine_mode mode = GET_MODE (x);
28154 /* We're only allowed to return a subclass of CLASS. Many of the
28155 following checks fail for NO_REGS, so eliminate that early. */
28156 if (regclass == NO_REGS)
28159 /* All classes can load zeros. */
28160 if (x == CONST0_RTX (mode))
28163 /* Force constants into memory if we are loading a (nonzero) constant into
28164 an MMX or SSE register. This is because there are no MMX/SSE instructions
28165 to load from a constant. */
28167 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
28170 /* Prefer SSE regs only, if we can use them for math. */
28171 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
28172 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
28174 /* Floating-point constants need more complex checks. */
28175 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
28177 /* General regs can load everything. */
28178 if (reg_class_subset_p (regclass, GENERAL_REGS))
28181 /* Floats can load 0 and 1 plus some others. Note that we eliminated
28182 zero above. We only want to wind up preferring 80387 registers if
28183 we plan on doing computation with them. */
28185 && standard_80387_constant_p (x) > 0)
28187 /* Limit class to non-sse. */
28188 if (regclass == FLOAT_SSE_REGS)
28190 if (regclass == FP_TOP_SSE_REGS)
28192 if (regclass == FP_SECOND_SSE_REGS)
28193 return FP_SECOND_REG;
28194 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
28201 /* Generally when we see PLUS here, it's the function invariant
28202 (plus soft-fp const_int). Which can only be computed into general
28204 if (GET_CODE (x) == PLUS)
28205 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
28207 /* QImode constants are easy to load, but non-constant QImode data
28208 must go into Q_REGS. */
28209 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
28211 if (reg_class_subset_p (regclass, Q_REGS))
28213 if (reg_class_subset_p (Q_REGS, regclass))
28221 /* Discourage putting floating-point values in SSE registers unless
28222 SSE math is being used, and likewise for the 387 registers. */
28224 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
28226 enum machine_mode mode = GET_MODE (x);
28228 /* Restrict the output reload class to the register bank that we are doing
28229 math on. If we would like not to return a subset of CLASS, reject this
28230 alternative: if reload cannot do this, it will still use its choice. */
28231 mode = GET_MODE (x);
28232 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
28233 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
28235 if (X87_FLOAT_MODE_P (mode))
28237 if (regclass == FP_TOP_SSE_REGS)
28239 else if (regclass == FP_SECOND_SSE_REGS)
28240 return FP_SECOND_REG;
28242 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
28249 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
28250 enum machine_mode mode,
28251 secondary_reload_info *sri ATTRIBUTE_UNUSED)
28253 /* Double-word spills from general registers to non-offsettable memory
28254 references (zero-extended addresses) go through XMM register. */
28257 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
28258 && rclass == GENERAL_REGS
28259 && !offsettable_memref_p (x))
28262 /* QImode spills from non-QI registers require
28263 intermediate register on 32bit targets. */
28265 && !in_p && mode == QImode
28266 && (rclass == GENERAL_REGS
28267 || rclass == LEGACY_REGS
28268 || rclass == INDEX_REGS))
28277 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28278 regno = true_regnum (x);
28280 /* Return Q_REGS if the operand is in memory. */
28285 /* This condition handles corner case where an expression involving
28286 pointers gets vectorized. We're trying to use the address of a
28287 stack slot as a vector initializer.
28289 (set (reg:V2DI 74 [ vect_cst_.2 ])
28290 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
28292 Eventually frame gets turned into sp+offset like this:
28294 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28295 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28296 (const_int 392 [0x188]))))
28298 That later gets turned into:
28300 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28301 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28302 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
28304 We'll have the following reload recorded:
28306 Reload 0: reload_in (DI) =
28307 (plus:DI (reg/f:DI 7 sp)
28308 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
28309 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28310 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
28311 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
28312 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28313 reload_reg_rtx: (reg:V2DI 22 xmm1)
28315 Which isn't going to work since SSE instructions can't handle scalar
28316 additions. Returning GENERAL_REGS forces the addition into integer
28317 register and reload can handle subsequent reloads without problems. */
28319 if (in_p && GET_CODE (x) == PLUS
28320 && SSE_CLASS_P (rclass)
28321 && SCALAR_INT_MODE_P (mode))
28322 return GENERAL_REGS;
28327 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28330 ix86_class_likely_spilled_p (reg_class_t rclass)
28341 case SSE_FIRST_REG:
28343 case FP_SECOND_REG:
28353 /* If we are copying between general and FP registers, we need a memory
28354 location. The same is true for SSE and MMX registers.
28356 To optimize register_move_cost performance, allow inline variant.
28358 The macro can't work reliably when one of the CLASSES is class containing
28359 registers from multiple units (SSE, MMX, integer). We avoid this by never
28360 combining those units in single alternative in the machine description.
28361 Ensure that this constraint holds to avoid unexpected surprises.
28363 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28364 enforce these sanity checks. */
28367 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28368 enum machine_mode mode, int strict)
28370 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
28371 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
28372 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
28373 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
28374 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
28375 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
28377 gcc_assert (!strict);
28381 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
28384 /* ??? This is a lie. We do have moves between mmx/general, and for
28385 mmx/sse2. But by saying we need secondary memory we discourage the
28386 register allocator from using the mmx registers unless needed. */
28387 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
28390 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28392 /* SSE1 doesn't have any direct moves from other classes. */
28396 /* If the target says that inter-unit moves are more expensive
28397 than moving through memory, then don't generate them. */
28398 if (!TARGET_INTER_UNIT_MOVES)
28401 /* Between SSE and general, we have moves no larger than word size. */
28402 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
28410 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28411 enum machine_mode mode, int strict)
28413 return inline_secondary_memory_needed (class1, class2, mode, strict);
28416 /* Implement the TARGET_CLASS_MAX_NREGS hook.
28418 On the 80386, this is the size of MODE in words,
28419 except in the FP regs, where a single reg is always enough. */
28421 static unsigned char
28422 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
28424 if (MAYBE_INTEGER_CLASS_P (rclass))
28426 if (mode == XFmode)
28427 return (TARGET_64BIT ? 2 : 3);
28428 else if (mode == XCmode)
28429 return (TARGET_64BIT ? 4 : 6);
28431 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
28435 if (COMPLEX_MODE_P (mode))
28442 /* Return true if the registers in CLASS cannot represent the change from
28443 modes FROM to TO. */
28446 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
28447 enum reg_class regclass)
28452 /* x87 registers can't do subreg at all, as all values are reformatted
28453 to extended precision. */
28454 if (MAYBE_FLOAT_CLASS_P (regclass))
28457 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
28459 /* Vector registers do not support QI or HImode loads. If we don't
28460 disallow a change to these modes, reload will assume it's ok to
28461 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28462 the vec_dupv4hi pattern. */
28463 if (GET_MODE_SIZE (from) < 4)
28466 /* Vector registers do not support subreg with nonzero offsets, which
28467 are otherwise valid for integer registers. Since we can't see
28468 whether we have a nonzero offset from here, prohibit all
28469 nonparadoxical subregs changing size. */
28470 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
28477 /* Return the cost of moving data of mode M between a
28478 register and memory. A value of 2 is the default; this cost is
28479 relative to those in `REGISTER_MOVE_COST'.
28481 This function is used extensively by register_move_cost that is used to
28482 build tables at startup. Make it inline in this case.
28483 When IN is 2, return maximum of in and out move cost.
28485 If moving between registers and memory is more expensive than
28486 between two registers, you should define this macro to express the
28489 Model also increased moving costs of QImode registers in non
28493 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28497 if (FLOAT_CLASS_P (regclass))
28515 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28516 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28518 if (SSE_CLASS_P (regclass))
28521 switch (GET_MODE_SIZE (mode))
28536 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28537 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28539 if (MMX_CLASS_P (regclass))
28542 switch (GET_MODE_SIZE (mode))
28554 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28555 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28557 switch (GET_MODE_SIZE (mode))
28560 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28563 return ix86_cost->int_store[0];
28564 if (TARGET_PARTIAL_REG_DEPENDENCY
28565 && optimize_function_for_speed_p (cfun))
28566 cost = ix86_cost->movzbl_load;
28568 cost = ix86_cost->int_load[0];
28570 return MAX (cost, ix86_cost->int_store[0]);
28576 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28578 return ix86_cost->movzbl_load;
28580 return ix86_cost->int_store[0] + 4;
28585 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28586 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28588 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28589 if (mode == TFmode)
28592 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28594 cost = ix86_cost->int_load[2];
28596 cost = ix86_cost->int_store[2];
28597 return (cost * (((int) GET_MODE_SIZE (mode)
28598 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28603 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28606 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28610 /* Return the cost of moving data from a register in class CLASS1 to
28611 one in class CLASS2.
28613 It is not required that the cost always equal 2 when FROM is the same as TO;
28614 on some machines it is expensive to move between registers if they are not
28615 general registers. */
28618 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28619 reg_class_t class2_i)
28621 enum reg_class class1 = (enum reg_class) class1_i;
28622 enum reg_class class2 = (enum reg_class) class2_i;
28624 /* In case we require secondary memory, compute cost of the store followed
28625 by load. In order to avoid bad register allocation choices, we need
28626 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28628 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28632 cost += inline_memory_move_cost (mode, class1, 2);
28633 cost += inline_memory_move_cost (mode, class2, 2);
28635 /* In case of copying from general_purpose_register we may emit multiple
28636 stores followed by single load causing memory size mismatch stall.
28637 Count this as arbitrarily high cost of 20. */
28638 if (targetm.class_max_nregs (class1, mode)
28639 > targetm.class_max_nregs (class2, mode))
28642 /* In the case of FP/MMX moves, the registers actually overlap, and we
28643 have to switch modes in order to treat them differently. */
28644 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28645 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28651 /* Moves between SSE/MMX and integer unit are expensive. */
28652 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28653 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28655 /* ??? By keeping returned value relatively high, we limit the number
28656 of moves between integer and MMX/SSE registers for all targets.
28657 Additionally, high value prevents problem with x86_modes_tieable_p(),
28658 where integer modes in MMX/SSE registers are not tieable
28659 because of missing QImode and HImode moves to, from or between
28660 MMX/SSE registers. */
28661 return MAX (8, ix86_cost->mmxsse_to_integer);
28663 if (MAYBE_FLOAT_CLASS_P (class1))
28664 return ix86_cost->fp_move;
28665 if (MAYBE_SSE_CLASS_P (class1))
28666 return ix86_cost->sse_move;
28667 if (MAYBE_MMX_CLASS_P (class1))
28668 return ix86_cost->mmx_move;
28672 /* Return TRUE if hard register REGNO can hold a value of machine-mode
28676 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28678 /* Flags and only flags can only hold CCmode values. */
28679 if (CC_REGNO_P (regno))
28680 return GET_MODE_CLASS (mode) == MODE_CC;
28681 if (GET_MODE_CLASS (mode) == MODE_CC
28682 || GET_MODE_CLASS (mode) == MODE_RANDOM
28683 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28685 if (FP_REGNO_P (regno))
28686 return VALID_FP_MODE_P (mode);
28687 if (SSE_REGNO_P (regno))
28689 /* We implement the move patterns for all vector modes into and
28690 out of SSE registers, even when no operation instructions
28691 are available. OImode move is available only when AVX is
28693 return ((TARGET_AVX && mode == OImode)
28694 || VALID_AVX256_REG_MODE (mode)
28695 || VALID_SSE_REG_MODE (mode)
28696 || VALID_SSE2_REG_MODE (mode)
28697 || VALID_MMX_REG_MODE (mode)
28698 || VALID_MMX_REG_MODE_3DNOW (mode));
28700 if (MMX_REGNO_P (regno))
28702 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28703 so if the register is available at all, then we can move data of
28704 the given mode into or out of it. */
28705 return (VALID_MMX_REG_MODE (mode)
28706 || VALID_MMX_REG_MODE_3DNOW (mode));
28709 if (mode == QImode)
28711 /* Take care for QImode values - they can be in non-QI regs,
28712 but then they do cause partial register stalls. */
28713 if (regno <= BX_REG || TARGET_64BIT)
28715 if (!TARGET_PARTIAL_REG_STALL)
28717 return !can_create_pseudo_p ();
28719 /* We handle both integer and floats in the general purpose registers. */
28720 else if (VALID_INT_MODE_P (mode))
28722 else if (VALID_FP_MODE_P (mode))
28724 else if (VALID_DFP_MODE_P (mode))
28726 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
28727 on to use that value in smaller contexts, this can easily force a
28728 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
28729 supporting DImode, allow it. */
28730 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
28736 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
28737 tieable integer mode. */
28740 ix86_tieable_integer_mode_p (enum machine_mode mode)
28749 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
28752 return TARGET_64BIT;
28759 /* Return true if MODE1 is accessible in a register that can hold MODE2
28760 without copying. That is, all register classes that can hold MODE2
28761 can also hold MODE1. */
28764 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
28766 if (mode1 == mode2)
28769 if (ix86_tieable_integer_mode_p (mode1)
28770 && ix86_tieable_integer_mode_p (mode2))
28773 /* MODE2 being XFmode implies fp stack or general regs, which means we
28774 can tie any smaller floating point modes to it. Note that we do not
28775 tie this with TFmode. */
28776 if (mode2 == XFmode)
28777 return mode1 == SFmode || mode1 == DFmode;
28779 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
28780 that we can tie it with SFmode. */
28781 if (mode2 == DFmode)
28782 return mode1 == SFmode;
28784 /* If MODE2 is only appropriate for an SSE register, then tie with
28785 any other mode acceptable to SSE registers. */
28786 if (GET_MODE_SIZE (mode2) == 16
28787 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
28788 return (GET_MODE_SIZE (mode1) == 16
28789 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
28791 /* If MODE2 is appropriate for an MMX register, then tie
28792 with any other mode acceptable to MMX registers. */
28793 if (GET_MODE_SIZE (mode2) == 8
28794 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
28795 return (GET_MODE_SIZE (mode1) == 8
28796 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
28801 /* Compute a (partial) cost for rtx X. Return true if the complete
28802 cost has been computed, and false if subexpressions should be
28803 scanned. In either case, *TOTAL contains the cost result. */
28806 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
28808 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
28809 enum machine_mode mode = GET_MODE (x);
28810 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
28818 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
28820 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
28822 else if (flag_pic && SYMBOLIC_CONST (x)
28824 || (!GET_CODE (x) != LABEL_REF
28825 && (GET_CODE (x) != SYMBOL_REF
28826 || !SYMBOL_REF_LOCAL_P (x)))))
28833 if (mode == VOIDmode)
28836 switch (standard_80387_constant_p (x))
28841 default: /* Other constants */
28846 /* Start with (MEM (SYMBOL_REF)), since that's where
28847 it'll probably end up. Add a penalty for size. */
28848 *total = (COSTS_N_INSNS (1)
28849 + (flag_pic != 0 && !TARGET_64BIT)
28850 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
28856 /* The zero extensions is often completely free on x86_64, so make
28857 it as cheap as possible. */
28858 if (TARGET_64BIT && mode == DImode
28859 && GET_MODE (XEXP (x, 0)) == SImode)
28861 else if (TARGET_ZERO_EXTEND_WITH_AND)
28862 *total = cost->add;
28864 *total = cost->movzx;
28868 *total = cost->movsx;
28872 if (CONST_INT_P (XEXP (x, 1))
28873 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
28875 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28878 *total = cost->add;
28881 if ((value == 2 || value == 3)
28882 && cost->lea <= cost->shift_const)
28884 *total = cost->lea;
28894 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
28896 if (CONST_INT_P (XEXP (x, 1)))
28898 if (INTVAL (XEXP (x, 1)) > 32)
28899 *total = cost->shift_const + COSTS_N_INSNS (2);
28901 *total = cost->shift_const * 2;
28905 if (GET_CODE (XEXP (x, 1)) == AND)
28906 *total = cost->shift_var * 2;
28908 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
28913 if (CONST_INT_P (XEXP (x, 1)))
28914 *total = cost->shift_const;
28916 *total = cost->shift_var;
28924 gcc_assert (FLOAT_MODE_P (mode));
28925 gcc_assert (TARGET_FMA || TARGET_FMA4);
28927 /* ??? SSE scalar/vector cost should be used here. */
28928 /* ??? Bald assumption that fma has the same cost as fmul. */
28929 *total = cost->fmul;
28930 *total += rtx_cost (XEXP (x, 1), FMA, speed);
28932 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
28934 if (GET_CODE (sub) == NEG)
28935 sub = XEXP (sub, 0);
28936 *total += rtx_cost (sub, FMA, speed);
28939 if (GET_CODE (sub) == NEG)
28940 sub = XEXP (sub, 0);
28941 *total += rtx_cost (sub, FMA, speed);
28946 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28948 /* ??? SSE scalar cost should be used here. */
28949 *total = cost->fmul;
28952 else if (X87_FLOAT_MODE_P (mode))
28954 *total = cost->fmul;
28957 else if (FLOAT_MODE_P (mode))
28959 /* ??? SSE vector cost should be used here. */
28960 *total = cost->fmul;
28965 rtx op0 = XEXP (x, 0);
28966 rtx op1 = XEXP (x, 1);
28968 if (CONST_INT_P (XEXP (x, 1)))
28970 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28971 for (nbits = 0; value != 0; value &= value - 1)
28975 /* This is arbitrary. */
28978 /* Compute costs correctly for widening multiplication. */
28979 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
28980 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
28981 == GET_MODE_SIZE (mode))
28983 int is_mulwiden = 0;
28984 enum machine_mode inner_mode = GET_MODE (op0);
28986 if (GET_CODE (op0) == GET_CODE (op1))
28987 is_mulwiden = 1, op1 = XEXP (op1, 0);
28988 else if (CONST_INT_P (op1))
28990 if (GET_CODE (op0) == SIGN_EXTEND)
28991 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
28994 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
28998 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
29001 *total = (cost->mult_init[MODE_INDEX (mode)]
29002 + nbits * cost->mult_bit
29003 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
29012 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29013 /* ??? SSE cost should be used here. */
29014 *total = cost->fdiv;
29015 else if (X87_FLOAT_MODE_P (mode))
29016 *total = cost->fdiv;
29017 else if (FLOAT_MODE_P (mode))
29018 /* ??? SSE vector cost should be used here. */
29019 *total = cost->fdiv;
29021 *total = cost->divide[MODE_INDEX (mode)];
29025 if (GET_MODE_CLASS (mode) == MODE_INT
29026 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
29028 if (GET_CODE (XEXP (x, 0)) == PLUS
29029 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
29030 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
29031 && CONSTANT_P (XEXP (x, 1)))
29033 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
29034 if (val == 2 || val == 4 || val == 8)
29036 *total = cost->lea;
29037 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
29038 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
29039 outer_code, speed);
29040 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29044 else if (GET_CODE (XEXP (x, 0)) == MULT
29045 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
29047 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
29048 if (val == 2 || val == 4 || val == 8)
29050 *total = cost->lea;
29051 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
29052 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29056 else if (GET_CODE (XEXP (x, 0)) == PLUS)
29058 *total = cost->lea;
29059 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
29060 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
29061 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29068 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29070 /* ??? SSE cost should be used here. */
29071 *total = cost->fadd;
29074 else if (X87_FLOAT_MODE_P (mode))
29076 *total = cost->fadd;
29079 else if (FLOAT_MODE_P (mode))
29081 /* ??? SSE vector cost should be used here. */
29082 *total = cost->fadd;
29090 if (!TARGET_64BIT && mode == DImode)
29092 *total = (cost->add * 2
29093 + (rtx_cost (XEXP (x, 0), outer_code, speed)
29094 << (GET_MODE (XEXP (x, 0)) != DImode))
29095 + (rtx_cost (XEXP (x, 1), outer_code, speed)
29096 << (GET_MODE (XEXP (x, 1)) != DImode)));
29102 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29104 /* ??? SSE cost should be used here. */
29105 *total = cost->fchs;
29108 else if (X87_FLOAT_MODE_P (mode))
29110 *total = cost->fchs;
29113 else if (FLOAT_MODE_P (mode))
29115 /* ??? SSE vector cost should be used here. */
29116 *total = cost->fchs;
29122 if (!TARGET_64BIT && mode == DImode)
29123 *total = cost->add * 2;
29125 *total = cost->add;
29129 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
29130 && XEXP (XEXP (x, 0), 1) == const1_rtx
29131 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
29132 && XEXP (x, 1) == const0_rtx)
29134 /* This kind of construct is implemented using test[bwl].
29135 Treat it as if we had an AND. */
29136 *total = (cost->add
29137 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
29138 + rtx_cost (const1_rtx, outer_code, speed));
29144 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
29149 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29150 /* ??? SSE cost should be used here. */
29151 *total = cost->fabs;
29152 else if (X87_FLOAT_MODE_P (mode))
29153 *total = cost->fabs;
29154 else if (FLOAT_MODE_P (mode))
29155 /* ??? SSE vector cost should be used here. */
29156 *total = cost->fabs;
29160 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29161 /* ??? SSE cost should be used here. */
29162 *total = cost->fsqrt;
29163 else if (X87_FLOAT_MODE_P (mode))
29164 *total = cost->fsqrt;
29165 else if (FLOAT_MODE_P (mode))
29166 /* ??? SSE vector cost should be used here. */
29167 *total = cost->fsqrt;
29171 if (XINT (x, 1) == UNSPEC_TP)
29178 case VEC_DUPLICATE:
29179 /* ??? Assume all of these vector manipulation patterns are
29180 recognizable. In which case they all pretty much have the
29182 *total = COSTS_N_INSNS (1);
29192 static int current_machopic_label_num;
29194 /* Given a symbol name and its associated stub, write out the
29195 definition of the stub. */
29198 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29200 unsigned int length;
29201 char *binder_name, *symbol_name, lazy_ptr_name[32];
29202 int label = ++current_machopic_label_num;
29204 /* For 64-bit we shouldn't get here. */
29205 gcc_assert (!TARGET_64BIT);
29207 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29208 symb = targetm.strip_name_encoding (symb);
29210 length = strlen (stub);
29211 binder_name = XALLOCAVEC (char, length + 32);
29212 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
29214 length = strlen (symb);
29215 symbol_name = XALLOCAVEC (char, length + 32);
29216 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29218 sprintf (lazy_ptr_name, "L%d$lz", label);
29220 if (MACHOPIC_ATT_STUB)
29221 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
29222 else if (MACHOPIC_PURE)
29223 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
29225 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
29227 fprintf (file, "%s:\n", stub);
29228 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29230 if (MACHOPIC_ATT_STUB)
29232 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
29234 else if (MACHOPIC_PURE)
29237 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29238 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
29239 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
29240 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
29241 label, lazy_ptr_name, label);
29242 fprintf (file, "\tjmp\t*%%ecx\n");
29245 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
29247 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
29248 it needs no stub-binding-helper. */
29249 if (MACHOPIC_ATT_STUB)
29252 fprintf (file, "%s:\n", binder_name);
29256 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
29257 fprintf (file, "\tpushl\t%%ecx\n");
29260 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
29262 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
29264 /* N.B. Keep the correspondence of these
29265 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29266 old-pic/new-pic/non-pic stubs; altering this will break
29267 compatibility with existing dylibs. */
29270 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29271 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
29274 /* 16-byte -mdynamic-no-pic stub. */
29275 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
29277 fprintf (file, "%s:\n", lazy_ptr_name);
29278 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29279 fprintf (file, ASM_LONG "%s\n", binder_name);
29281 #endif /* TARGET_MACHO */
29283 /* Order the registers for register allocator. */
29286 x86_order_regs_for_local_alloc (void)
29291 /* First allocate the local general purpose registers. */
29292 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29293 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29294 reg_alloc_order [pos++] = i;
29296 /* Global general purpose registers. */
29297 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29298 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29299 reg_alloc_order [pos++] = i;
29301 /* x87 registers come first in case we are doing FP math
29303 if (!TARGET_SSE_MATH)
29304 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29305 reg_alloc_order [pos++] = i;
29307 /* SSE registers. */
29308 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29309 reg_alloc_order [pos++] = i;
29310 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29311 reg_alloc_order [pos++] = i;
29313 /* x87 registers. */
29314 if (TARGET_SSE_MATH)
29315 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29316 reg_alloc_order [pos++] = i;
29318 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29319 reg_alloc_order [pos++] = i;
29321 /* Initialize the rest of array as we do not allocate some registers
29323 while (pos < FIRST_PSEUDO_REGISTER)
29324 reg_alloc_order [pos++] = 0;
29327 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29328 in struct attribute_spec handler. */
29330 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
29332 int flags ATTRIBUTE_UNUSED,
29333 bool *no_add_attrs)
29335 if (TREE_CODE (*node) != FUNCTION_TYPE
29336 && TREE_CODE (*node) != METHOD_TYPE
29337 && TREE_CODE (*node) != FIELD_DECL
29338 && TREE_CODE (*node) != TYPE_DECL)
29340 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29342 *no_add_attrs = true;
29347 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
29349 *no_add_attrs = true;
29352 if (is_attribute_p ("callee_pop_aggregate_return", name))
29356 cst = TREE_VALUE (args);
29357 if (TREE_CODE (cst) != INTEGER_CST)
29359 warning (OPT_Wattributes,
29360 "%qE attribute requires an integer constant argument",
29362 *no_add_attrs = true;
29364 else if (compare_tree_int (cst, 0) != 0
29365 && compare_tree_int (cst, 1) != 0)
29367 warning (OPT_Wattributes,
29368 "argument to %qE attribute is neither zero, nor one",
29370 *no_add_attrs = true;
29379 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29380 struct attribute_spec.handler. */
29382 ix86_handle_abi_attribute (tree *node, tree name,
29383 tree args ATTRIBUTE_UNUSED,
29384 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29386 if (TREE_CODE (*node) != FUNCTION_TYPE
29387 && TREE_CODE (*node) != METHOD_TYPE
29388 && TREE_CODE (*node) != FIELD_DECL
29389 && TREE_CODE (*node) != TYPE_DECL)
29391 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29393 *no_add_attrs = true;
29397 /* Can combine regparm with all attributes but fastcall. */
29398 if (is_attribute_p ("ms_abi", name))
29400 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
29402 error ("ms_abi and sysv_abi attributes are not compatible");
29407 else if (is_attribute_p ("sysv_abi", name))
29409 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
29411 error ("ms_abi and sysv_abi attributes are not compatible");
29420 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29421 struct attribute_spec.handler. */
29423 ix86_handle_struct_attribute (tree *node, tree name,
29424 tree args ATTRIBUTE_UNUSED,
29425 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29428 if (DECL_P (*node))
29430 if (TREE_CODE (*node) == TYPE_DECL)
29431 type = &TREE_TYPE (*node);
29436 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29437 || TREE_CODE (*type) == UNION_TYPE)))
29439 warning (OPT_Wattributes, "%qE attribute ignored",
29441 *no_add_attrs = true;
29444 else if ((is_attribute_p ("ms_struct", name)
29445 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29446 || ((is_attribute_p ("gcc_struct", name)
29447 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29449 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29451 *no_add_attrs = true;
29458 ix86_handle_fndecl_attribute (tree *node, tree name,
29459 tree args ATTRIBUTE_UNUSED,
29460 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29462 if (TREE_CODE (*node) != FUNCTION_DECL)
29464 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29466 *no_add_attrs = true;
29472 ix86_ms_bitfield_layout_p (const_tree record_type)
29474 return ((TARGET_MS_BITFIELD_LAYOUT
29475 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29476 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
29479 /* Returns an expression indicating where the this parameter is
29480 located on entry to the FUNCTION. */
29483 x86_this_parameter (tree function)
29485 tree type = TREE_TYPE (function);
29486 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
29491 const int *parm_regs;
29493 if (ix86_function_type_abi (type) == MS_ABI)
29494 parm_regs = x86_64_ms_abi_int_parameter_registers;
29496 parm_regs = x86_64_int_parameter_registers;
29497 return gen_rtx_REG (DImode, parm_regs[aggr]);
29500 nregs = ix86_function_regparm (type, function);
29502 if (nregs > 0 && !stdarg_p (type))
29505 unsigned int ccvt = ix86_get_callcvt (type);
29507 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29508 regno = aggr ? DX_REG : CX_REG;
29509 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29513 return gen_rtx_MEM (SImode,
29514 plus_constant (stack_pointer_rtx, 4));
29523 return gen_rtx_MEM (SImode,
29524 plus_constant (stack_pointer_rtx, 4));
29527 return gen_rtx_REG (SImode, regno);
29530 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29533 /* Determine whether x86_output_mi_thunk can succeed. */
29536 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29537 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29538 HOST_WIDE_INT vcall_offset, const_tree function)
29540 /* 64-bit can handle anything. */
29544 /* For 32-bit, everything's fine if we have one free register. */
29545 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29548 /* Need a free register for vcall_offset. */
29552 /* Need a free register for GOT references. */
29553 if (flag_pic && !targetm.binds_local_p (function))
29556 /* Otherwise ok. */
29560 /* Output the assembler code for a thunk function. THUNK_DECL is the
29561 declaration for the thunk function itself, FUNCTION is the decl for
29562 the target function. DELTA is an immediate constant offset to be
29563 added to THIS. If VCALL_OFFSET is nonzero, the word at
29564 *(*this + vcall_offset) should be added to THIS. */
29567 x86_output_mi_thunk (FILE *file,
29568 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29569 HOST_WIDE_INT vcall_offset, tree function)
29571 rtx this_param = x86_this_parameter (function);
29572 rtx this_reg, tmp, fnaddr;
29574 emit_note (NOTE_INSN_PROLOGUE_END);
29576 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29577 pull it in now and let DELTA benefit. */
29578 if (REG_P (this_param))
29579 this_reg = this_param;
29580 else if (vcall_offset)
29582 /* Put the this parameter into %eax. */
29583 this_reg = gen_rtx_REG (Pmode, AX_REG);
29584 emit_move_insn (this_reg, this_param);
29587 this_reg = NULL_RTX;
29589 /* Adjust the this parameter by a fixed constant. */
29592 rtx delta_rtx = GEN_INT (delta);
29593 rtx delta_dst = this_reg ? this_reg : this_param;
29597 if (!x86_64_general_operand (delta_rtx, Pmode))
29599 tmp = gen_rtx_REG (Pmode, R10_REG);
29600 emit_move_insn (tmp, delta_rtx);
29605 emit_insn (ix86_gen_add3 (delta_dst, delta_dst, delta_rtx));
29608 /* Adjust the this parameter by a value stored in the vtable. */
29611 rtx vcall_addr, vcall_mem, this_mem;
29612 unsigned int tmp_regno;
29615 tmp_regno = R10_REG;
29618 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
29619 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
29620 tmp_regno = AX_REG;
29622 tmp_regno = CX_REG;
29624 tmp = gen_rtx_REG (Pmode, tmp_regno);
29626 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
29627 if (Pmode != ptr_mode)
29628 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
29629 emit_move_insn (tmp, this_mem);
29631 /* Adjust the this parameter. */
29632 vcall_addr = plus_constant (tmp, vcall_offset);
29634 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
29636 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
29637 emit_move_insn (tmp2, GEN_INT (vcall_offset));
29638 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
29641 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
29642 if (Pmode != ptr_mode)
29643 emit_insn (gen_addsi_1_zext (this_reg,
29644 gen_rtx_REG (ptr_mode,
29648 emit_insn (ix86_gen_add3 (this_reg, this_reg, vcall_mem));
29651 /* If necessary, drop THIS back to its stack slot. */
29652 if (this_reg && this_reg != this_param)
29653 emit_move_insn (this_param, this_reg);
29655 fnaddr = XEXP (DECL_RTL (function), 0);
29658 if (!flag_pic || targetm.binds_local_p (function)
29659 || cfun->machine->call_abi == MS_ABI)
29663 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
29664 tmp = gen_rtx_CONST (Pmode, tmp);
29665 fnaddr = gen_rtx_MEM (Pmode, tmp);
29670 if (!flag_pic || targetm.binds_local_p (function))
29673 else if (TARGET_MACHO)
29675 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
29676 fnaddr = XEXP (fnaddr, 0);
29678 #endif /* TARGET_MACHO */
29681 tmp = gen_rtx_REG (Pmode, CX_REG);
29682 output_set_got (tmp, NULL_RTX);
29684 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
29685 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
29686 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
29690 /* Our sibling call patterns do not allow memories, because we have no
29691 predicate that can distinguish between frame and non-frame memory.
29692 For our purposes here, we can get away with (ab)using a jump pattern,
29693 because we're going to do no optimization. */
29694 if (MEM_P (fnaddr))
29695 emit_jump_insn (gen_indirect_jump (fnaddr));
29698 tmp = gen_rtx_MEM (QImode, fnaddr);
29699 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
29700 tmp = emit_call_insn (tmp);
29701 SIBLING_CALL_P (tmp) = 1;
29705 /* Emit just enough of rest_of_compilation to get the insns emitted.
29706 Note that use_thunk calls assemble_start_function et al. */
29707 tmp = get_insns ();
29708 insn_locators_alloc ();
29709 shorten_branches (tmp);
29710 final_start_function (tmp, file, 1);
29711 final (tmp, file, 1);
29712 final_end_function ();
29716 x86_file_start (void)
29718 default_file_start ();
29720 darwin_file_start ();
29722 if (X86_FILE_START_VERSION_DIRECTIVE)
29723 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
29724 if (X86_FILE_START_FLTUSED)
29725 fputs ("\t.global\t__fltused\n", asm_out_file);
29726 if (ix86_asm_dialect == ASM_INTEL)
29727 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
29731 x86_field_alignment (tree field, int computed)
29733 enum machine_mode mode;
29734 tree type = TREE_TYPE (field);
29736 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
29738 mode = TYPE_MODE (strip_array_types (type));
29739 if (mode == DFmode || mode == DCmode
29740 || GET_MODE_CLASS (mode) == MODE_INT
29741 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
29742 return MIN (32, computed);
29746 /* Output assembler code to FILE to increment profiler label # LABELNO
29747 for profiling a function entry. */
29749 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
29751 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
29756 #ifndef NO_PROFILE_COUNTERS
29757 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
29760 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
29761 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
29763 fprintf (file, "\tcall\t%s\n", mcount_name);
29767 #ifndef NO_PROFILE_COUNTERS
29768 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
29771 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
29775 #ifndef NO_PROFILE_COUNTERS
29776 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
29779 fprintf (file, "\tcall\t%s\n", mcount_name);
29783 /* We don't have exact information about the insn sizes, but we may assume
29784 quite safely that we are informed about all 1 byte insns and memory
29785 address sizes. This is enough to eliminate unnecessary padding in
29789 min_insn_size (rtx insn)
29793 if (!INSN_P (insn) || !active_insn_p (insn))
29796 /* Discard alignments we've emit and jump instructions. */
29797 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
29798 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
29800 if (JUMP_TABLE_DATA_P (insn))
29803 /* Important case - calls are always 5 bytes.
29804 It is common to have many calls in the row. */
29806 && symbolic_reference_mentioned_p (PATTERN (insn))
29807 && !SIBLING_CALL_P (insn))
29809 len = get_attr_length (insn);
29813 /* For normal instructions we rely on get_attr_length being exact,
29814 with a few exceptions. */
29815 if (!JUMP_P (insn))
29817 enum attr_type type = get_attr_type (insn);
29822 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
29823 || asm_noperands (PATTERN (insn)) >= 0)
29830 /* Otherwise trust get_attr_length. */
29834 l = get_attr_length_address (insn);
29835 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
29844 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29846 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
29850 ix86_avoid_jump_mispredicts (void)
29852 rtx insn, start = get_insns ();
29853 int nbytes = 0, njumps = 0;
29856 /* Look for all minimal intervals of instructions containing 4 jumps.
29857 The intervals are bounded by START and INSN. NBYTES is the total
29858 size of instructions in the interval including INSN and not including
29859 START. When the NBYTES is smaller than 16 bytes, it is possible
29860 that the end of START and INSN ends up in the same 16byte page.
29862 The smallest offset in the page INSN can start is the case where START
29863 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
29864 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
29866 for (insn = start; insn; insn = NEXT_INSN (insn))
29870 if (LABEL_P (insn))
29872 int align = label_to_alignment (insn);
29873 int max_skip = label_to_max_skip (insn);
29877 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
29878 already in the current 16 byte page, because otherwise
29879 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
29880 bytes to reach 16 byte boundary. */
29882 || (align <= 3 && max_skip != (1 << align) - 1))
29885 fprintf (dump_file, "Label %i with max_skip %i\n",
29886 INSN_UID (insn), max_skip);
29889 while (nbytes + max_skip >= 16)
29891 start = NEXT_INSN (start);
29892 if ((JUMP_P (start)
29893 && GET_CODE (PATTERN (start)) != ADDR_VEC
29894 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29896 njumps--, isjump = 1;
29899 nbytes -= min_insn_size (start);
29905 min_size = min_insn_size (insn);
29906 nbytes += min_size;
29908 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
29909 INSN_UID (insn), min_size);
29911 && GET_CODE (PATTERN (insn)) != ADDR_VEC
29912 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
29920 start = NEXT_INSN (start);
29921 if ((JUMP_P (start)
29922 && GET_CODE (PATTERN (start)) != ADDR_VEC
29923 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29925 njumps--, isjump = 1;
29928 nbytes -= min_insn_size (start);
29930 gcc_assert (njumps >= 0);
29932 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
29933 INSN_UID (start), INSN_UID (insn), nbytes);
29935 if (njumps == 3 && isjump && nbytes < 16)
29937 int padsize = 15 - nbytes + min_insn_size (insn);
29940 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
29941 INSN_UID (insn), padsize);
29942 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
29948 /* AMD Athlon works faster
29949 when RET is not destination of conditional jump or directly preceded
29950 by other jump instruction. We avoid the penalty by inserting NOP just
29951 before the RET instructions in such cases. */
29953 ix86_pad_returns (void)
29958 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29960 basic_block bb = e->src;
29961 rtx ret = BB_END (bb);
29963 bool replace = false;
29965 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
29966 || optimize_bb_for_size_p (bb))
29968 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
29969 if (active_insn_p (prev) || LABEL_P (prev))
29971 if (prev && LABEL_P (prev))
29976 FOR_EACH_EDGE (e, ei, bb->preds)
29977 if (EDGE_FREQUENCY (e) && e->src->index >= 0
29978 && !(e->flags & EDGE_FALLTHRU))
29983 prev = prev_active_insn (ret);
29985 && ((JUMP_P (prev) && any_condjump_p (prev))
29988 /* Empty functions get branch mispredict even when
29989 the jump destination is not visible to us. */
29990 if (!prev && !optimize_function_for_size_p (cfun))
29995 emit_jump_insn_before (gen_return_internal_long (), ret);
30001 /* Count the minimum number of instructions in BB. Return 4 if the
30002 number of instructions >= 4. */
30005 ix86_count_insn_bb (basic_block bb)
30008 int insn_count = 0;
30010 /* Count number of instructions in this block. Return 4 if the number
30011 of instructions >= 4. */
30012 FOR_BB_INSNS (bb, insn)
30014 /* Only happen in exit blocks. */
30016 && GET_CODE (PATTERN (insn)) == RETURN)
30019 if (NONDEBUG_INSN_P (insn)
30020 && GET_CODE (PATTERN (insn)) != USE
30021 && GET_CODE (PATTERN (insn)) != CLOBBER)
30024 if (insn_count >= 4)
30033 /* Count the minimum number of instructions in code path in BB.
30034 Return 4 if the number of instructions >= 4. */
30037 ix86_count_insn (basic_block bb)
30041 int min_prev_count;
30043 /* Only bother counting instructions along paths with no
30044 more than 2 basic blocks between entry and exit. Given
30045 that BB has an edge to exit, determine if a predecessor
30046 of BB has an edge from entry. If so, compute the number
30047 of instructions in the predecessor block. If there
30048 happen to be multiple such blocks, compute the minimum. */
30049 min_prev_count = 4;
30050 FOR_EACH_EDGE (e, ei, bb->preds)
30053 edge_iterator prev_ei;
30055 if (e->src == ENTRY_BLOCK_PTR)
30057 min_prev_count = 0;
30060 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
30062 if (prev_e->src == ENTRY_BLOCK_PTR)
30064 int count = ix86_count_insn_bb (e->src);
30065 if (count < min_prev_count)
30066 min_prev_count = count;
30072 if (min_prev_count < 4)
30073 min_prev_count += ix86_count_insn_bb (bb);
30075 return min_prev_count;
30078 /* Pad short funtion to 4 instructions. */
30081 ix86_pad_short_function (void)
30086 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30088 rtx ret = BB_END (e->src);
30089 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
30091 int insn_count = ix86_count_insn (e->src);
30093 /* Pad short function. */
30094 if (insn_count < 4)
30098 /* Find epilogue. */
30101 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
30102 insn = PREV_INSN (insn);
30107 /* Two NOPs count as one instruction. */
30108 insn_count = 2 * (4 - insn_count);
30109 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
30115 /* Implement machine specific optimizations. We implement padding of returns
30116 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
30120 /* We are freeing block_for_insn in the toplev to keep compatibility
30121 with old MDEP_REORGS that are not CFG based. Recompute it now. */
30122 compute_bb_for_insn ();
30124 /* Run the vzeroupper optimization if needed. */
30125 if (TARGET_VZEROUPPER)
30126 move_or_delete_vzeroupper ();
30128 if (optimize && optimize_function_for_speed_p (cfun))
30130 if (TARGET_PAD_SHORT_FUNCTION)
30131 ix86_pad_short_function ();
30132 else if (TARGET_PAD_RETURNS)
30133 ix86_pad_returns ();
30134 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30135 if (TARGET_FOUR_JUMP_LIMIT)
30136 ix86_avoid_jump_mispredicts ();
30141 /* Return nonzero when QImode register that must be represented via REX prefix
30144 x86_extended_QIreg_mentioned_p (rtx insn)
30147 extract_insn_cached (insn);
30148 for (i = 0; i < recog_data.n_operands; i++)
30149 if (REG_P (recog_data.operand[i])
30150 && REGNO (recog_data.operand[i]) > BX_REG)
30155 /* Return nonzero when P points to register encoded via REX prefix.
30156 Called via for_each_rtx. */
30158 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
30160 unsigned int regno;
30163 regno = REGNO (*p);
30164 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
30167 /* Return true when INSN mentions register that must be encoded using REX
30170 x86_extended_reg_mentioned_p (rtx insn)
30172 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
30173 extended_reg_mentioned_1, NULL);
30176 /* If profitable, negate (without causing overflow) integer constant
30177 of mode MODE at location LOC. Return true in this case. */
30179 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
30183 if (!CONST_INT_P (*loc))
30189 /* DImode x86_64 constants must fit in 32 bits. */
30190 gcc_assert (x86_64_immediate_operand (*loc, mode));
30201 gcc_unreachable ();
30204 /* Avoid overflows. */
30205 if (mode_signbit_p (mode, *loc))
30208 val = INTVAL (*loc);
30210 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
30211 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
30212 if ((val < 0 && val != -128)
30215 *loc = GEN_INT (-val);
30222 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
30223 optabs would emit if we didn't have TFmode patterns. */
30226 x86_emit_floatuns (rtx operands[2])
30228 rtx neglab, donelab, i0, i1, f0, in, out;
30229 enum machine_mode mode, inmode;
30231 inmode = GET_MODE (operands[1]);
30232 gcc_assert (inmode == SImode || inmode == DImode);
30235 in = force_reg (inmode, operands[1]);
30236 mode = GET_MODE (out);
30237 neglab = gen_label_rtx ();
30238 donelab = gen_label_rtx ();
30239 f0 = gen_reg_rtx (mode);
30241 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
30243 expand_float (out, in, 0);
30245 emit_jump_insn (gen_jump (donelab));
30248 emit_label (neglab);
30250 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
30252 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
30254 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
30256 expand_float (f0, i0, 0);
30258 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
30260 emit_label (donelab);
30263 /* AVX does not support 32-byte integer vector operations,
30264 thus the longest vector we are faced with is V16QImode. */
30265 #define MAX_VECT_LEN 16
30267 struct expand_vec_perm_d
30269 rtx target, op0, op1;
30270 unsigned char perm[MAX_VECT_LEN];
30271 enum machine_mode vmode;
30272 unsigned char nelt;
30276 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
30277 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
30279 /* Get a vector mode of the same size as the original but with elements
30280 twice as wide. This is only guaranteed to apply to integral vectors. */
30282 static inline enum machine_mode
30283 get_mode_wider_vector (enum machine_mode o)
30285 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30286 enum machine_mode n = GET_MODE_WIDER_MODE (o);
30287 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
30288 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
30292 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30293 with all elements equal to VAR. Return true if successful. */
30296 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
30297 rtx target, rtx val)
30320 /* First attempt to recognize VAL as-is. */
30321 dup = gen_rtx_VEC_DUPLICATE (mode, val);
30322 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30323 if (recog_memoized (insn) < 0)
30326 /* If that fails, force VAL into a register. */
30329 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30330 seq = get_insns ();
30333 emit_insn_before (seq, insn);
30335 ok = recog_memoized (insn) >= 0;
30344 if (TARGET_SSE || TARGET_3DNOW_A)
30348 val = gen_lowpart (SImode, val);
30349 x = gen_rtx_TRUNCATE (HImode, val);
30350 x = gen_rtx_VEC_DUPLICATE (mode, x);
30351 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30364 struct expand_vec_perm_d dperm;
30368 memset (&dperm, 0, sizeof (dperm));
30369 dperm.target = target;
30370 dperm.vmode = mode;
30371 dperm.nelt = GET_MODE_NUNITS (mode);
30372 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
30374 /* Extend to SImode using a paradoxical SUBREG. */
30375 tmp1 = gen_reg_rtx (SImode);
30376 emit_move_insn (tmp1, gen_lowpart (SImode, val));
30378 /* Insert the SImode value as low element of a V4SImode vector. */
30379 tmp2 = gen_lowpart (V4SImode, dperm.op0);
30380 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
30382 ok = (expand_vec_perm_1 (&dperm)
30383 || expand_vec_perm_broadcast_1 (&dperm));
30395 /* Replicate the value once into the next wider mode and recurse. */
30397 enum machine_mode smode, wsmode, wvmode;
30400 smode = GET_MODE_INNER (mode);
30401 wvmode = get_mode_wider_vector (mode);
30402 wsmode = GET_MODE_INNER (wvmode);
30404 val = convert_modes (wsmode, smode, val, true);
30405 x = expand_simple_binop (wsmode, ASHIFT, val,
30406 GEN_INT (GET_MODE_BITSIZE (smode)),
30407 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30408 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
30410 x = gen_lowpart (wvmode, target);
30411 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
30419 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
30420 rtx x = gen_reg_rtx (hvmode);
30422 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
30425 x = gen_rtx_VEC_CONCAT (mode, x, x);
30426 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30435 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30436 whose ONE_VAR element is VAR, and other elements are zero. Return true
30440 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
30441 rtx target, rtx var, int one_var)
30443 enum machine_mode vsimode;
30446 bool use_vector_set = false;
30451 /* For SSE4.1, we normally use vector set. But if the second
30452 element is zero and inter-unit moves are OK, we use movq
30454 use_vector_set = (TARGET_64BIT
30456 && !(TARGET_INTER_UNIT_MOVES
30462 use_vector_set = TARGET_SSE4_1;
30465 use_vector_set = TARGET_SSE2;
30468 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
30475 use_vector_set = TARGET_AVX;
30478 /* Use ix86_expand_vector_set in 64bit mode only. */
30479 use_vector_set = TARGET_AVX && TARGET_64BIT;
30485 if (use_vector_set)
30487 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
30488 var = force_reg (GET_MODE_INNER (mode), var);
30489 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30505 var = force_reg (GET_MODE_INNER (mode), var);
30506 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
30507 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30512 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
30513 new_target = gen_reg_rtx (mode);
30515 new_target = target;
30516 var = force_reg (GET_MODE_INNER (mode), var);
30517 x = gen_rtx_VEC_DUPLICATE (mode, var);
30518 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
30519 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
30522 /* We need to shuffle the value to the correct position, so
30523 create a new pseudo to store the intermediate result. */
30525 /* With SSE2, we can use the integer shuffle insns. */
30526 if (mode != V4SFmode && TARGET_SSE2)
30528 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
30530 GEN_INT (one_var == 1 ? 0 : 1),
30531 GEN_INT (one_var == 2 ? 0 : 1),
30532 GEN_INT (one_var == 3 ? 0 : 1)));
30533 if (target != new_target)
30534 emit_move_insn (target, new_target);
30538 /* Otherwise convert the intermediate result to V4SFmode and
30539 use the SSE1 shuffle instructions. */
30540 if (mode != V4SFmode)
30542 tmp = gen_reg_rtx (V4SFmode);
30543 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30548 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30550 GEN_INT (one_var == 1 ? 0 : 1),
30551 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30552 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30554 if (mode != V4SFmode)
30555 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30556 else if (tmp != target)
30557 emit_move_insn (target, tmp);
30559 else if (target != new_target)
30560 emit_move_insn (target, new_target);
30565 vsimode = V4SImode;
30571 vsimode = V2SImode;
30577 /* Zero extend the variable element to SImode and recurse. */
30578 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30580 x = gen_reg_rtx (vsimode);
30581 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30583 gcc_unreachable ();
30585 emit_move_insn (target, gen_lowpart (mode, x));
30593 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30594 consisting of the values in VALS. It is known that all elements
30595 except ONE_VAR are constants. Return true if successful. */
30598 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30599 rtx target, rtx vals, int one_var)
30601 rtx var = XVECEXP (vals, 0, one_var);
30602 enum machine_mode wmode;
30605 const_vec = copy_rtx (vals);
30606 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30607 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30615 /* For the two element vectors, it's just as easy to use
30616 the general case. */
30620 /* Use ix86_expand_vector_set in 64bit mode only. */
30643 /* There's no way to set one QImode entry easily. Combine
30644 the variable value with its adjacent constant value, and
30645 promote to an HImode set. */
30646 x = XVECEXP (vals, 0, one_var ^ 1);
30649 var = convert_modes (HImode, QImode, var, true);
30650 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30651 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30652 x = GEN_INT (INTVAL (x) & 0xff);
30656 var = convert_modes (HImode, QImode, var, true);
30657 x = gen_int_mode (INTVAL (x) << 8, HImode);
30659 if (x != const0_rtx)
30660 var = expand_simple_binop (HImode, IOR, var, x, var,
30661 1, OPTAB_LIB_WIDEN);
30663 x = gen_reg_rtx (wmode);
30664 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30665 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30667 emit_move_insn (target, gen_lowpart (mode, x));
30674 emit_move_insn (target, const_vec);
30675 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30679 /* A subroutine of ix86_expand_vector_init_general. Use vector
30680 concatenate to handle the most general case: all values variable,
30681 and none identical. */
30684 ix86_expand_vector_init_concat (enum machine_mode mode,
30685 rtx target, rtx *ops, int n)
30687 enum machine_mode cmode, hmode = VOIDmode;
30688 rtx first[8], second[4];
30728 gcc_unreachable ();
30731 if (!register_operand (ops[1], cmode))
30732 ops[1] = force_reg (cmode, ops[1]);
30733 if (!register_operand (ops[0], cmode))
30734 ops[0] = force_reg (cmode, ops[0]);
30735 emit_insn (gen_rtx_SET (VOIDmode, target,
30736 gen_rtx_VEC_CONCAT (mode, ops[0],
30756 gcc_unreachable ();
30772 gcc_unreachable ();
30777 /* FIXME: We process inputs backward to help RA. PR 36222. */
30780 for (; i > 0; i -= 2, j--)
30782 first[j] = gen_reg_rtx (cmode);
30783 v = gen_rtvec (2, ops[i - 1], ops[i]);
30784 ix86_expand_vector_init (false, first[j],
30785 gen_rtx_PARALLEL (cmode, v));
30791 gcc_assert (hmode != VOIDmode);
30792 for (i = j = 0; i < n; i += 2, j++)
30794 second[j] = gen_reg_rtx (hmode);
30795 ix86_expand_vector_init_concat (hmode, second [j],
30799 ix86_expand_vector_init_concat (mode, target, second, n);
30802 ix86_expand_vector_init_concat (mode, target, first, n);
30806 gcc_unreachable ();
30810 /* A subroutine of ix86_expand_vector_init_general. Use vector
30811 interleave to handle the most general case: all values variable,
30812 and none identical. */
30815 ix86_expand_vector_init_interleave (enum machine_mode mode,
30816 rtx target, rtx *ops, int n)
30818 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
30821 rtx (*gen_load_even) (rtx, rtx, rtx);
30822 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
30823 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
30828 gen_load_even = gen_vec_setv8hi;
30829 gen_interleave_first_low = gen_vec_interleave_lowv4si;
30830 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30831 inner_mode = HImode;
30832 first_imode = V4SImode;
30833 second_imode = V2DImode;
30834 third_imode = VOIDmode;
30837 gen_load_even = gen_vec_setv16qi;
30838 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
30839 gen_interleave_second_low = gen_vec_interleave_lowv4si;
30840 inner_mode = QImode;
30841 first_imode = V8HImode;
30842 second_imode = V4SImode;
30843 third_imode = V2DImode;
30846 gcc_unreachable ();
30849 for (i = 0; i < n; i++)
30851 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
30852 op0 = gen_reg_rtx (SImode);
30853 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
30855 /* Insert the SImode value as low element of V4SImode vector. */
30856 op1 = gen_reg_rtx (V4SImode);
30857 op0 = gen_rtx_VEC_MERGE (V4SImode,
30858 gen_rtx_VEC_DUPLICATE (V4SImode,
30860 CONST0_RTX (V4SImode),
30862 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
30864 /* Cast the V4SImode vector back to a vector in orignal mode. */
30865 op0 = gen_reg_rtx (mode);
30866 emit_move_insn (op0, gen_lowpart (mode, op1));
30868 /* Load even elements into the second positon. */
30869 emit_insn (gen_load_even (op0,
30870 force_reg (inner_mode,
30874 /* Cast vector to FIRST_IMODE vector. */
30875 ops[i] = gen_reg_rtx (first_imode);
30876 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
30879 /* Interleave low FIRST_IMODE vectors. */
30880 for (i = j = 0; i < n; i += 2, j++)
30882 op0 = gen_reg_rtx (first_imode);
30883 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
30885 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
30886 ops[j] = gen_reg_rtx (second_imode);
30887 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
30890 /* Interleave low SECOND_IMODE vectors. */
30891 switch (second_imode)
30894 for (i = j = 0; i < n / 2; i += 2, j++)
30896 op0 = gen_reg_rtx (second_imode);
30897 emit_insn (gen_interleave_second_low (op0, ops[i],
30900 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
30902 ops[j] = gen_reg_rtx (third_imode);
30903 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
30905 second_imode = V2DImode;
30906 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30910 op0 = gen_reg_rtx (second_imode);
30911 emit_insn (gen_interleave_second_low (op0, ops[0],
30914 /* Cast the SECOND_IMODE vector back to a vector on original
30916 emit_insn (gen_rtx_SET (VOIDmode, target,
30917 gen_lowpart (mode, op0)));
30921 gcc_unreachable ();
30925 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
30926 all values variable, and none identical. */
30929 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
30930 rtx target, rtx vals)
30932 rtx ops[32], op0, op1;
30933 enum machine_mode half_mode = VOIDmode;
30940 if (!mmx_ok && !TARGET_SSE)
30952 n = GET_MODE_NUNITS (mode);
30953 for (i = 0; i < n; i++)
30954 ops[i] = XVECEXP (vals, 0, i);
30955 ix86_expand_vector_init_concat (mode, target, ops, n);
30959 half_mode = V16QImode;
30963 half_mode = V8HImode;
30967 n = GET_MODE_NUNITS (mode);
30968 for (i = 0; i < n; i++)
30969 ops[i] = XVECEXP (vals, 0, i);
30970 op0 = gen_reg_rtx (half_mode);
30971 op1 = gen_reg_rtx (half_mode);
30972 ix86_expand_vector_init_interleave (half_mode, op0, ops,
30974 ix86_expand_vector_init_interleave (half_mode, op1,
30975 &ops [n >> 1], n >> 2);
30976 emit_insn (gen_rtx_SET (VOIDmode, target,
30977 gen_rtx_VEC_CONCAT (mode, op0, op1)));
30981 if (!TARGET_SSE4_1)
30989 /* Don't use ix86_expand_vector_init_interleave if we can't
30990 move from GPR to SSE register directly. */
30991 if (!TARGET_INTER_UNIT_MOVES)
30994 n = GET_MODE_NUNITS (mode);
30995 for (i = 0; i < n; i++)
30996 ops[i] = XVECEXP (vals, 0, i);
30997 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
31005 gcc_unreachable ();
31009 int i, j, n_elts, n_words, n_elt_per_word;
31010 enum machine_mode inner_mode;
31011 rtx words[4], shift;
31013 inner_mode = GET_MODE_INNER (mode);
31014 n_elts = GET_MODE_NUNITS (mode);
31015 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
31016 n_elt_per_word = n_elts / n_words;
31017 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
31019 for (i = 0; i < n_words; ++i)
31021 rtx word = NULL_RTX;
31023 for (j = 0; j < n_elt_per_word; ++j)
31025 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
31026 elt = convert_modes (word_mode, inner_mode, elt, true);
31032 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
31033 word, 1, OPTAB_LIB_WIDEN);
31034 word = expand_simple_binop (word_mode, IOR, word, elt,
31035 word, 1, OPTAB_LIB_WIDEN);
31043 emit_move_insn (target, gen_lowpart (mode, words[0]));
31044 else if (n_words == 2)
31046 rtx tmp = gen_reg_rtx (mode);
31047 emit_clobber (tmp);
31048 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
31049 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
31050 emit_move_insn (target, tmp);
31052 else if (n_words == 4)
31054 rtx tmp = gen_reg_rtx (V4SImode);
31055 gcc_assert (word_mode == SImode);
31056 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
31057 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
31058 emit_move_insn (target, gen_lowpart (mode, tmp));
31061 gcc_unreachable ();
31065 /* Initialize vector TARGET via VALS. Suppress the use of MMX
31066 instructions unless MMX_OK is true. */
31069 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
31071 enum machine_mode mode = GET_MODE (target);
31072 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31073 int n_elts = GET_MODE_NUNITS (mode);
31074 int n_var = 0, one_var = -1;
31075 bool all_same = true, all_const_zero = true;
31079 for (i = 0; i < n_elts; ++i)
31081 x = XVECEXP (vals, 0, i);
31082 if (!(CONST_INT_P (x)
31083 || GET_CODE (x) == CONST_DOUBLE
31084 || GET_CODE (x) == CONST_FIXED))
31085 n_var++, one_var = i;
31086 else if (x != CONST0_RTX (inner_mode))
31087 all_const_zero = false;
31088 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
31092 /* Constants are best loaded from the constant pool. */
31095 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
31099 /* If all values are identical, broadcast the value. */
31101 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
31102 XVECEXP (vals, 0, 0)))
31105 /* Values where only one field is non-constant are best loaded from
31106 the pool and overwritten via move later. */
31110 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
31111 XVECEXP (vals, 0, one_var),
31115 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
31119 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
31123 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
31125 enum machine_mode mode = GET_MODE (target);
31126 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31127 enum machine_mode half_mode;
31128 bool use_vec_merge = false;
31130 static rtx (*gen_extract[6][2]) (rtx, rtx)
31132 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
31133 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
31134 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
31135 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
31136 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
31137 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
31139 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
31141 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
31142 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
31143 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
31144 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
31145 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
31146 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
31156 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31157 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
31159 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31161 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31162 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31168 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
31172 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31173 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
31175 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31177 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31178 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31185 /* For the two element vectors, we implement a VEC_CONCAT with
31186 the extraction of the other element. */
31188 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
31189 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
31192 op0 = val, op1 = tmp;
31194 op0 = tmp, op1 = val;
31196 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
31197 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31202 use_vec_merge = TARGET_SSE4_1;
31209 use_vec_merge = true;
31213 /* tmp = target = A B C D */
31214 tmp = copy_to_reg (target);
31215 /* target = A A B B */
31216 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
31217 /* target = X A B B */
31218 ix86_expand_vector_set (false, target, val, 0);
31219 /* target = A X C D */
31220 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31221 const1_rtx, const0_rtx,
31222 GEN_INT (2+4), GEN_INT (3+4)));
31226 /* tmp = target = A B C D */
31227 tmp = copy_to_reg (target);
31228 /* tmp = X B C D */
31229 ix86_expand_vector_set (false, tmp, val, 0);
31230 /* target = A B X D */
31231 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31232 const0_rtx, const1_rtx,
31233 GEN_INT (0+4), GEN_INT (3+4)));
31237 /* tmp = target = A B C D */
31238 tmp = copy_to_reg (target);
31239 /* tmp = X B C D */
31240 ix86_expand_vector_set (false, tmp, val, 0);
31241 /* target = A B X D */
31242 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31243 const0_rtx, const1_rtx,
31244 GEN_INT (2+4), GEN_INT (0+4)));
31248 gcc_unreachable ();
31253 use_vec_merge = TARGET_SSE4_1;
31257 /* Element 0 handled by vec_merge below. */
31260 use_vec_merge = true;
31266 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31267 store into element 0, then shuffle them back. */
31271 order[0] = GEN_INT (elt);
31272 order[1] = const1_rtx;
31273 order[2] = const2_rtx;
31274 order[3] = GEN_INT (3);
31275 order[elt] = const0_rtx;
31277 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31278 order[1], order[2], order[3]));
31280 ix86_expand_vector_set (false, target, val, 0);
31282 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31283 order[1], order[2], order[3]));
31287 /* For SSE1, we have to reuse the V4SF code. */
31288 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
31289 gen_lowpart (SFmode, val), elt);
31294 use_vec_merge = TARGET_SSE2;
31297 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31301 use_vec_merge = TARGET_SSE4_1;
31308 half_mode = V16QImode;
31314 half_mode = V8HImode;
31320 half_mode = V4SImode;
31326 half_mode = V2DImode;
31332 half_mode = V4SFmode;
31338 half_mode = V2DFmode;
31344 /* Compute offset. */
31348 gcc_assert (i <= 1);
31350 /* Extract the half. */
31351 tmp = gen_reg_rtx (half_mode);
31352 emit_insn (gen_extract[j][i] (tmp, target));
31354 /* Put val in tmp at elt. */
31355 ix86_expand_vector_set (false, tmp, val, elt);
31358 emit_insn (gen_insert[j][i] (target, target, tmp));
31367 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
31368 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
31369 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31373 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31375 emit_move_insn (mem, target);
31377 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31378 emit_move_insn (tmp, val);
31380 emit_move_insn (target, mem);
31385 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
31387 enum machine_mode mode = GET_MODE (vec);
31388 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31389 bool use_vec_extr = false;
31402 use_vec_extr = true;
31406 use_vec_extr = TARGET_SSE4_1;
31418 tmp = gen_reg_rtx (mode);
31419 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
31420 GEN_INT (elt), GEN_INT (elt),
31421 GEN_INT (elt+4), GEN_INT (elt+4)));
31425 tmp = gen_reg_rtx (mode);
31426 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
31430 gcc_unreachable ();
31433 use_vec_extr = true;
31438 use_vec_extr = TARGET_SSE4_1;
31452 tmp = gen_reg_rtx (mode);
31453 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
31454 GEN_INT (elt), GEN_INT (elt),
31455 GEN_INT (elt), GEN_INT (elt)));
31459 tmp = gen_reg_rtx (mode);
31460 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
31464 gcc_unreachable ();
31467 use_vec_extr = true;
31472 /* For SSE1, we have to reuse the V4SF code. */
31473 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
31474 gen_lowpart (V4SFmode, vec), elt);
31480 use_vec_extr = TARGET_SSE2;
31483 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31487 use_vec_extr = TARGET_SSE4_1;
31491 /* ??? Could extract the appropriate HImode element and shift. */
31498 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
31499 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
31501 /* Let the rtl optimizers know about the zero extension performed. */
31502 if (inner_mode == QImode || inner_mode == HImode)
31504 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
31505 target = gen_lowpart (SImode, target);
31508 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31512 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31514 emit_move_insn (mem, vec);
31516 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31517 emit_move_insn (target, tmp);
31521 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31522 pattern to reduce; DEST is the destination; IN is the input vector. */
31525 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
31527 rtx tmp1, tmp2, tmp3;
31529 tmp1 = gen_reg_rtx (V4SFmode);
31530 tmp2 = gen_reg_rtx (V4SFmode);
31531 tmp3 = gen_reg_rtx (V4SFmode);
31533 emit_insn (gen_sse_movhlps (tmp1, in, in));
31534 emit_insn (fn (tmp2, tmp1, in));
31536 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
31537 const1_rtx, const1_rtx,
31538 GEN_INT (1+4), GEN_INT (1+4)));
31539 emit_insn (fn (dest, tmp2, tmp3));
31542 /* Target hook for scalar_mode_supported_p. */
31544 ix86_scalar_mode_supported_p (enum machine_mode mode)
31546 if (DECIMAL_FLOAT_MODE_P (mode))
31547 return default_decimal_float_supported_p ();
31548 else if (mode == TFmode)
31551 return default_scalar_mode_supported_p (mode);
31554 /* Implements target hook vector_mode_supported_p. */
31556 ix86_vector_mode_supported_p (enum machine_mode mode)
31558 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31560 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31562 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31564 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31566 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31571 /* Target hook for c_mode_for_suffix. */
31572 static enum machine_mode
31573 ix86_c_mode_for_suffix (char suffix)
31583 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31585 We do this in the new i386 backend to maintain source compatibility
31586 with the old cc0-based compiler. */
31589 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31590 tree inputs ATTRIBUTE_UNUSED,
31593 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31595 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31600 /* Implements target vector targetm.asm.encode_section_info. */
31602 static void ATTRIBUTE_UNUSED
31603 ix86_encode_section_info (tree decl, rtx rtl, int first)
31605 default_encode_section_info (decl, rtl, first);
31607 if (TREE_CODE (decl) == VAR_DECL
31608 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31609 && ix86_in_large_data_p (decl))
31610 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31613 /* Worker function for REVERSE_CONDITION. */
31616 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31618 return (mode != CCFPmode && mode != CCFPUmode
31619 ? reverse_condition (code)
31620 : reverse_condition_maybe_unordered (code));
31623 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31627 output_387_reg_move (rtx insn, rtx *operands)
31629 if (REG_P (operands[0]))
31631 if (REG_P (operands[1])
31632 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31634 if (REGNO (operands[0]) == FIRST_STACK_REG)
31635 return output_387_ffreep (operands, 0);
31636 return "fstp\t%y0";
31638 if (STACK_TOP_P (operands[0]))
31639 return "fld%Z1\t%y1";
31642 else if (MEM_P (operands[0]))
31644 gcc_assert (REG_P (operands[1]));
31645 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31646 return "fstp%Z0\t%y0";
31649 /* There is no non-popping store to memory for XFmode.
31650 So if we need one, follow the store with a load. */
31651 if (GET_MODE (operands[0]) == XFmode)
31652 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31654 return "fst%Z0\t%y0";
31661 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31662 FP status register is set. */
31665 ix86_emit_fp_unordered_jump (rtx label)
31667 rtx reg = gen_reg_rtx (HImode);
31670 emit_insn (gen_x86_fnstsw_1 (reg));
31672 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31674 emit_insn (gen_x86_sahf_1 (reg));
31676 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31677 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31681 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31683 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31684 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
31687 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
31688 gen_rtx_LABEL_REF (VOIDmode, label),
31690 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
31692 emit_jump_insn (temp);
31693 predict_jump (REG_BR_PROB_BASE * 10 / 100);
31696 /* Output code to perform a log1p XFmode calculation. */
31698 void ix86_emit_i387_log1p (rtx op0, rtx op1)
31700 rtx label1 = gen_label_rtx ();
31701 rtx label2 = gen_label_rtx ();
31703 rtx tmp = gen_reg_rtx (XFmode);
31704 rtx tmp2 = gen_reg_rtx (XFmode);
31707 emit_insn (gen_absxf2 (tmp, op1));
31708 test = gen_rtx_GE (VOIDmode, tmp,
31709 CONST_DOUBLE_FROM_REAL_VALUE (
31710 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
31712 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
31714 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31715 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
31716 emit_jump (label2);
31718 emit_label (label1);
31719 emit_move_insn (tmp, CONST1_RTX (XFmode));
31720 emit_insn (gen_addxf3 (tmp, op1, tmp));
31721 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31722 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
31724 emit_label (label2);
31727 /* Output code to perform a Newton-Rhapson approximation of a single precision
31728 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
31730 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
31732 rtx x0, x1, e0, e1;
31734 x0 = gen_reg_rtx (mode);
31735 e0 = gen_reg_rtx (mode);
31736 e1 = gen_reg_rtx (mode);
31737 x1 = gen_reg_rtx (mode);
31739 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
31741 /* x0 = rcp(b) estimate */
31742 emit_insn (gen_rtx_SET (VOIDmode, x0,
31743 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
31746 emit_insn (gen_rtx_SET (VOIDmode, e0,
31747 gen_rtx_MULT (mode, x0, b)));
31750 emit_insn (gen_rtx_SET (VOIDmode, e0,
31751 gen_rtx_MULT (mode, x0, e0)));
31754 emit_insn (gen_rtx_SET (VOIDmode, e1,
31755 gen_rtx_PLUS (mode, x0, x0)));
31758 emit_insn (gen_rtx_SET (VOIDmode, x1,
31759 gen_rtx_MINUS (mode, e1, e0)));
31762 emit_insn (gen_rtx_SET (VOIDmode, res,
31763 gen_rtx_MULT (mode, a, x1)));
31766 /* Output code to perform a Newton-Rhapson approximation of a
31767 single precision floating point [reciprocal] square root. */
31769 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
31772 rtx x0, e0, e1, e2, e3, mthree, mhalf;
31775 x0 = gen_reg_rtx (mode);
31776 e0 = gen_reg_rtx (mode);
31777 e1 = gen_reg_rtx (mode);
31778 e2 = gen_reg_rtx (mode);
31779 e3 = gen_reg_rtx (mode);
31781 real_from_integer (&r, VOIDmode, -3, -1, 0);
31782 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31784 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
31785 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31787 if (VECTOR_MODE_P (mode))
31789 mthree = ix86_build_const_vector (mode, true, mthree);
31790 mhalf = ix86_build_const_vector (mode, true, mhalf);
31793 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
31794 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
31796 /* x0 = rsqrt(a) estimate */
31797 emit_insn (gen_rtx_SET (VOIDmode, x0,
31798 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
31801 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
31806 zero = gen_reg_rtx (mode);
31807 mask = gen_reg_rtx (mode);
31809 zero = force_reg (mode, CONST0_RTX(mode));
31810 emit_insn (gen_rtx_SET (VOIDmode, mask,
31811 gen_rtx_NE (mode, zero, a)));
31813 emit_insn (gen_rtx_SET (VOIDmode, x0,
31814 gen_rtx_AND (mode, x0, mask)));
31818 emit_insn (gen_rtx_SET (VOIDmode, e0,
31819 gen_rtx_MULT (mode, x0, a)));
31821 emit_insn (gen_rtx_SET (VOIDmode, e1,
31822 gen_rtx_MULT (mode, e0, x0)));
31825 mthree = force_reg (mode, mthree);
31826 emit_insn (gen_rtx_SET (VOIDmode, e2,
31827 gen_rtx_PLUS (mode, e1, mthree)));
31829 mhalf = force_reg (mode, mhalf);
31831 /* e3 = -.5 * x0 */
31832 emit_insn (gen_rtx_SET (VOIDmode, e3,
31833 gen_rtx_MULT (mode, x0, mhalf)));
31835 /* e3 = -.5 * e0 */
31836 emit_insn (gen_rtx_SET (VOIDmode, e3,
31837 gen_rtx_MULT (mode, e0, mhalf)));
31838 /* ret = e2 * e3 */
31839 emit_insn (gen_rtx_SET (VOIDmode, res,
31840 gen_rtx_MULT (mode, e2, e3)));
31843 #ifdef TARGET_SOLARIS
31844 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
31847 i386_solaris_elf_named_section (const char *name, unsigned int flags,
31850 /* With Binutils 2.15, the "@unwind" marker must be specified on
31851 every occurrence of the ".eh_frame" section, not just the first
31854 && strcmp (name, ".eh_frame") == 0)
31856 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
31857 flags & SECTION_WRITE ? "aw" : "a");
31862 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
31864 solaris_elf_asm_comdat_section (name, flags, decl);
31869 default_elf_asm_named_section (name, flags, decl);
31871 #endif /* TARGET_SOLARIS */
31873 /* Return the mangling of TYPE if it is an extended fundamental type. */
31875 static const char *
31876 ix86_mangle_type (const_tree type)
31878 type = TYPE_MAIN_VARIANT (type);
31880 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
31881 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
31884 switch (TYPE_MODE (type))
31887 /* __float128 is "g". */
31890 /* "long double" or __float80 is "e". */
31897 /* For 32-bit code we can save PIC register setup by using
31898 __stack_chk_fail_local hidden function instead of calling
31899 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
31900 register, so it is better to call __stack_chk_fail directly. */
31902 static tree ATTRIBUTE_UNUSED
31903 ix86_stack_protect_fail (void)
31905 return TARGET_64BIT
31906 ? default_external_stack_protect_fail ()
31907 : default_hidden_stack_protect_fail ();
31910 /* Select a format to encode pointers in exception handling data. CODE
31911 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
31912 true if the symbol may be affected by dynamic relocations.
31914 ??? All x86 object file formats are capable of representing this.
31915 After all, the relocation needed is the same as for the call insn.
31916 Whether or not a particular assembler allows us to enter such, I
31917 guess we'll have to see. */
31919 asm_preferred_eh_data_format (int code, int global)
31923 int type = DW_EH_PE_sdata8;
31925 || ix86_cmodel == CM_SMALL_PIC
31926 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
31927 type = DW_EH_PE_sdata4;
31928 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
31930 if (ix86_cmodel == CM_SMALL
31931 || (ix86_cmodel == CM_MEDIUM && code))
31932 return DW_EH_PE_udata4;
31933 return DW_EH_PE_absptr;
31936 /* Expand copysign from SIGN to the positive value ABS_VALUE
31937 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
31940 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
31942 enum machine_mode mode = GET_MODE (sign);
31943 rtx sgn = gen_reg_rtx (mode);
31944 if (mask == NULL_RTX)
31946 enum machine_mode vmode;
31948 if (mode == SFmode)
31950 else if (mode == DFmode)
31955 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
31956 if (!VECTOR_MODE_P (mode))
31958 /* We need to generate a scalar mode mask in this case. */
31959 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31960 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31961 mask = gen_reg_rtx (mode);
31962 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31966 mask = gen_rtx_NOT (mode, mask);
31967 emit_insn (gen_rtx_SET (VOIDmode, sgn,
31968 gen_rtx_AND (mode, mask, sign)));
31969 emit_insn (gen_rtx_SET (VOIDmode, result,
31970 gen_rtx_IOR (mode, abs_value, sgn)));
31973 /* Expand fabs (OP0) and return a new rtx that holds the result. The
31974 mask for masking out the sign-bit is stored in *SMASK, if that is
31977 ix86_expand_sse_fabs (rtx op0, rtx *smask)
31979 enum machine_mode vmode, mode = GET_MODE (op0);
31982 xa = gen_reg_rtx (mode);
31983 if (mode == SFmode)
31985 else if (mode == DFmode)
31989 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
31990 if (!VECTOR_MODE_P (mode))
31992 /* We need to generate a scalar mode mask in this case. */
31993 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31994 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31995 mask = gen_reg_rtx (mode);
31996 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31998 emit_insn (gen_rtx_SET (VOIDmode, xa,
31999 gen_rtx_AND (mode, op0, mask)));
32007 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
32008 swapping the operands if SWAP_OPERANDS is true. The expanded
32009 code is a forward jump to a newly created label in case the
32010 comparison is true. The generated label rtx is returned. */
32012 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
32013 bool swap_operands)
32024 label = gen_label_rtx ();
32025 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
32026 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32027 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
32028 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
32029 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
32030 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
32031 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32032 JUMP_LABEL (tmp) = label;
32037 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
32038 using comparison code CODE. Operands are swapped for the comparison if
32039 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
32041 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
32042 bool swap_operands)
32044 rtx (*insn)(rtx, rtx, rtx, rtx);
32045 enum machine_mode mode = GET_MODE (op0);
32046 rtx mask = gen_reg_rtx (mode);
32055 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
32057 emit_insn (insn (mask, op0, op1,
32058 gen_rtx_fmt_ee (code, mode, op0, op1)));
32062 /* Generate and return a rtx of mode MODE for 2**n where n is the number
32063 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
32065 ix86_gen_TWO52 (enum machine_mode mode)
32067 REAL_VALUE_TYPE TWO52r;
32070 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
32071 TWO52 = const_double_from_real_value (TWO52r, mode);
32072 TWO52 = force_reg (mode, TWO52);
32077 /* Expand SSE sequence for computing lround from OP1 storing
32080 ix86_expand_lround (rtx op0, rtx op1)
32082 /* C code for the stuff we're doing below:
32083 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
32086 enum machine_mode mode = GET_MODE (op1);
32087 const struct real_format *fmt;
32088 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32091 /* load nextafter (0.5, 0.0) */
32092 fmt = REAL_MODE_FORMAT (mode);
32093 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32094 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32096 /* adj = copysign (0.5, op1) */
32097 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
32098 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
32100 /* adj = op1 + adj */
32101 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
32103 /* op0 = (imode)adj */
32104 expand_fix (op0, adj, 0);
32107 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
32110 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
32112 /* C code for the stuff we're doing below (for do_floor):
32114 xi -= (double)xi > op1 ? 1 : 0;
32117 enum machine_mode fmode = GET_MODE (op1);
32118 enum machine_mode imode = GET_MODE (op0);
32119 rtx ireg, freg, label, tmp;
32121 /* reg = (long)op1 */
32122 ireg = gen_reg_rtx (imode);
32123 expand_fix (ireg, op1, 0);
32125 /* freg = (double)reg */
32126 freg = gen_reg_rtx (fmode);
32127 expand_float (freg, ireg, 0);
32129 /* ireg = (freg > op1) ? ireg - 1 : ireg */
32130 label = ix86_expand_sse_compare_and_jump (UNLE,
32131 freg, op1, !do_floor);
32132 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
32133 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
32134 emit_move_insn (ireg, tmp);
32136 emit_label (label);
32137 LABEL_NUSES (label) = 1;
32139 emit_move_insn (op0, ireg);
32142 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
32143 result in OPERAND0. */
32145 ix86_expand_rint (rtx operand0, rtx operand1)
32147 /* C code for the stuff we're doing below:
32148 xa = fabs (operand1);
32149 if (!isless (xa, 2**52))
32151 xa = xa + 2**52 - 2**52;
32152 return copysign (xa, operand1);
32154 enum machine_mode mode = GET_MODE (operand0);
32155 rtx res, xa, label, TWO52, mask;
32157 res = gen_reg_rtx (mode);
32158 emit_move_insn (res, operand1);
32160 /* xa = abs (operand1) */
32161 xa = ix86_expand_sse_fabs (res, &mask);
32163 /* if (!isless (xa, TWO52)) goto label; */
32164 TWO52 = ix86_gen_TWO52 (mode);
32165 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32167 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32168 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32170 ix86_sse_copysign_to_positive (res, xa, res, mask);
32172 emit_label (label);
32173 LABEL_NUSES (label) = 1;
32175 emit_move_insn (operand0, res);
32178 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32181 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
32183 /* C code for the stuff we expand below.
32184 double xa = fabs (x), x2;
32185 if (!isless (xa, TWO52))
32187 xa = xa + TWO52 - TWO52;
32188 x2 = copysign (xa, x);
32197 enum machine_mode mode = GET_MODE (operand0);
32198 rtx xa, TWO52, tmp, label, one, res, mask;
32200 TWO52 = ix86_gen_TWO52 (mode);
32202 /* Temporary for holding the result, initialized to the input
32203 operand to ease control flow. */
32204 res = gen_reg_rtx (mode);
32205 emit_move_insn (res, operand1);
32207 /* xa = abs (operand1) */
32208 xa = ix86_expand_sse_fabs (res, &mask);
32210 /* if (!isless (xa, TWO52)) goto label; */
32211 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32213 /* xa = xa + TWO52 - TWO52; */
32214 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32215 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32217 /* xa = copysign (xa, operand1) */
32218 ix86_sse_copysign_to_positive (xa, xa, res, mask);
32220 /* generate 1.0 or -1.0 */
32221 one = force_reg (mode,
32222 const_double_from_real_value (do_floor
32223 ? dconst1 : dconstm1, mode));
32225 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32226 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32227 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32228 gen_rtx_AND (mode, one, tmp)));
32229 /* We always need to subtract here to preserve signed zero. */
32230 tmp = expand_simple_binop (mode, MINUS,
32231 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32232 emit_move_insn (res, tmp);
32234 emit_label (label);
32235 LABEL_NUSES (label) = 1;
32237 emit_move_insn (operand0, res);
32240 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32243 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
32245 /* C code for the stuff we expand below.
32246 double xa = fabs (x), x2;
32247 if (!isless (xa, TWO52))
32249 x2 = (double)(long)x;
32256 if (HONOR_SIGNED_ZEROS (mode))
32257 return copysign (x2, x);
32260 enum machine_mode mode = GET_MODE (operand0);
32261 rtx xa, xi, TWO52, tmp, label, one, res, mask;
32263 TWO52 = ix86_gen_TWO52 (mode);
32265 /* Temporary for holding the result, initialized to the input
32266 operand to ease control flow. */
32267 res = gen_reg_rtx (mode);
32268 emit_move_insn (res, operand1);
32270 /* xa = abs (operand1) */
32271 xa = ix86_expand_sse_fabs (res, &mask);
32273 /* if (!isless (xa, TWO52)) goto label; */
32274 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32276 /* xa = (double)(long)x */
32277 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32278 expand_fix (xi, res, 0);
32279 expand_float (xa, xi, 0);
32282 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32284 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32285 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32286 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32287 gen_rtx_AND (mode, one, tmp)));
32288 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
32289 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32290 emit_move_insn (res, tmp);
32292 if (HONOR_SIGNED_ZEROS (mode))
32293 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32295 emit_label (label);
32296 LABEL_NUSES (label) = 1;
32298 emit_move_insn (operand0, res);
32301 /* Expand SSE sequence for computing round from OPERAND1 storing
32302 into OPERAND0. Sequence that works without relying on DImode truncation
32303 via cvttsd2siq that is only available on 64bit targets. */
32305 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
32307 /* C code for the stuff we expand below.
32308 double xa = fabs (x), xa2, x2;
32309 if (!isless (xa, TWO52))
32311 Using the absolute value and copying back sign makes
32312 -0.0 -> -0.0 correct.
32313 xa2 = xa + TWO52 - TWO52;
32318 else if (dxa > 0.5)
32320 x2 = copysign (xa2, x);
32323 enum machine_mode mode = GET_MODE (operand0);
32324 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
32326 TWO52 = ix86_gen_TWO52 (mode);
32328 /* Temporary for holding the result, initialized to the input
32329 operand to ease control flow. */
32330 res = gen_reg_rtx (mode);
32331 emit_move_insn (res, operand1);
32333 /* xa = abs (operand1) */
32334 xa = ix86_expand_sse_fabs (res, &mask);
32336 /* if (!isless (xa, TWO52)) goto label; */
32337 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32339 /* xa2 = xa + TWO52 - TWO52; */
32340 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32341 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
32343 /* dxa = xa2 - xa; */
32344 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
32346 /* generate 0.5, 1.0 and -0.5 */
32347 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
32348 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
32349 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
32353 tmp = gen_reg_rtx (mode);
32354 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32355 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
32356 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32357 gen_rtx_AND (mode, one, tmp)));
32358 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32359 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32360 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
32361 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32362 gen_rtx_AND (mode, one, tmp)));
32363 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32365 /* res = copysign (xa2, operand1) */
32366 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
32368 emit_label (label);
32369 LABEL_NUSES (label) = 1;
32371 emit_move_insn (operand0, res);
32374 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32377 ix86_expand_trunc (rtx operand0, rtx operand1)
32379 /* C code for SSE variant we expand below.
32380 double xa = fabs (x), x2;
32381 if (!isless (xa, TWO52))
32383 x2 = (double)(long)x;
32384 if (HONOR_SIGNED_ZEROS (mode))
32385 return copysign (x2, x);
32388 enum machine_mode mode = GET_MODE (operand0);
32389 rtx xa, xi, TWO52, label, res, mask;
32391 TWO52 = ix86_gen_TWO52 (mode);
32393 /* Temporary for holding the result, initialized to the input
32394 operand to ease control flow. */
32395 res = gen_reg_rtx (mode);
32396 emit_move_insn (res, operand1);
32398 /* xa = abs (operand1) */
32399 xa = ix86_expand_sse_fabs (res, &mask);
32401 /* if (!isless (xa, TWO52)) goto label; */
32402 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32404 /* x = (double)(long)x */
32405 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32406 expand_fix (xi, res, 0);
32407 expand_float (res, xi, 0);
32409 if (HONOR_SIGNED_ZEROS (mode))
32410 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32412 emit_label (label);
32413 LABEL_NUSES (label) = 1;
32415 emit_move_insn (operand0, res);
32418 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32421 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
32423 enum machine_mode mode = GET_MODE (operand0);
32424 rtx xa, mask, TWO52, label, one, res, smask, tmp;
32426 /* C code for SSE variant we expand below.
32427 double xa = fabs (x), x2;
32428 if (!isless (xa, TWO52))
32430 xa2 = xa + TWO52 - TWO52;
32434 x2 = copysign (xa2, x);
32438 TWO52 = ix86_gen_TWO52 (mode);
32440 /* Temporary for holding the result, initialized to the input
32441 operand to ease control flow. */
32442 res = gen_reg_rtx (mode);
32443 emit_move_insn (res, operand1);
32445 /* xa = abs (operand1) */
32446 xa = ix86_expand_sse_fabs (res, &smask);
32448 /* if (!isless (xa, TWO52)) goto label; */
32449 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32451 /* res = xa + TWO52 - TWO52; */
32452 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32453 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
32454 emit_move_insn (res, tmp);
32457 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32459 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32460 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
32461 emit_insn (gen_rtx_SET (VOIDmode, mask,
32462 gen_rtx_AND (mode, mask, one)));
32463 tmp = expand_simple_binop (mode, MINUS,
32464 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
32465 emit_move_insn (res, tmp);
32467 /* res = copysign (res, operand1) */
32468 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
32470 emit_label (label);
32471 LABEL_NUSES (label) = 1;
32473 emit_move_insn (operand0, res);
32476 /* Expand SSE sequence for computing round from OPERAND1 storing
32479 ix86_expand_round (rtx operand0, rtx operand1)
32481 /* C code for the stuff we're doing below:
32482 double xa = fabs (x);
32483 if (!isless (xa, TWO52))
32485 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32486 return copysign (xa, x);
32488 enum machine_mode mode = GET_MODE (operand0);
32489 rtx res, TWO52, xa, label, xi, half, mask;
32490 const struct real_format *fmt;
32491 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32493 /* Temporary for holding the result, initialized to the input
32494 operand to ease control flow. */
32495 res = gen_reg_rtx (mode);
32496 emit_move_insn (res, operand1);
32498 TWO52 = ix86_gen_TWO52 (mode);
32499 xa = ix86_expand_sse_fabs (res, &mask);
32500 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32502 /* load nextafter (0.5, 0.0) */
32503 fmt = REAL_MODE_FORMAT (mode);
32504 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32505 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32507 /* xa = xa + 0.5 */
32508 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32509 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
32511 /* xa = (double)(int64_t)xa */
32512 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32513 expand_fix (xi, xa, 0);
32514 expand_float (xa, xi, 0);
32516 /* res = copysign (xa, operand1) */
32517 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
32519 emit_label (label);
32520 LABEL_NUSES (label) = 1;
32522 emit_move_insn (operand0, res);
32526 /* Table of valid machine attributes. */
32527 static const struct attribute_spec ix86_attribute_table[] =
32529 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
32530 affects_type_identity } */
32531 /* Stdcall attribute says callee is responsible for popping arguments
32532 if they are not variable. */
32533 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32535 /* Fastcall attribute says callee is responsible for popping arguments
32536 if they are not variable. */
32537 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32539 /* Thiscall attribute says callee is responsible for popping arguments
32540 if they are not variable. */
32541 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32543 /* Cdecl attribute says the callee is a normal C declaration */
32544 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32546 /* Regparm attribute specifies how many integer arguments are to be
32547 passed in registers. */
32548 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
32550 /* Sseregparm attribute says we are using x86_64 calling conventions
32551 for FP arguments. */
32552 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32554 /* force_align_arg_pointer says this function realigns the stack at entry. */
32555 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
32556 false, true, true, ix86_handle_cconv_attribute, false },
32557 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32558 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
32559 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
32560 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
32563 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32565 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32567 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32568 SUBTARGET_ATTRIBUTE_TABLE,
32570 /* ms_abi and sysv_abi calling convention function attributes. */
32571 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32572 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32573 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
32575 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32576 ix86_handle_callee_pop_aggregate_return, true },
32578 { NULL, 0, 0, false, false, false, NULL, false }
32581 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32583 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32584 tree vectype ATTRIBUTE_UNUSED,
32585 int misalign ATTRIBUTE_UNUSED)
32587 switch (type_of_cost)
32590 return ix86_cost->scalar_stmt_cost;
32593 return ix86_cost->scalar_load_cost;
32596 return ix86_cost->scalar_store_cost;
32599 return ix86_cost->vec_stmt_cost;
32602 return ix86_cost->vec_align_load_cost;
32605 return ix86_cost->vec_store_cost;
32607 case vec_to_scalar:
32608 return ix86_cost->vec_to_scalar_cost;
32610 case scalar_to_vec:
32611 return ix86_cost->scalar_to_vec_cost;
32613 case unaligned_load:
32614 case unaligned_store:
32615 return ix86_cost->vec_unalign_load_cost;
32617 case cond_branch_taken:
32618 return ix86_cost->cond_taken_branch_cost;
32620 case cond_branch_not_taken:
32621 return ix86_cost->cond_not_taken_branch_cost;
32627 gcc_unreachable ();
32632 /* Implement targetm.vectorize.builtin_vec_perm. */
32635 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32637 tree itype = TREE_TYPE (vec_type);
32638 bool u = TYPE_UNSIGNED (itype);
32639 enum machine_mode vmode = TYPE_MODE (vec_type);
32640 enum ix86_builtins fcode;
32641 bool ok = TARGET_SSE2;
32647 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32650 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32652 itype = ix86_get_builtin_type (IX86_BT_DI);
32657 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32661 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32663 itype = ix86_get_builtin_type (IX86_BT_SI);
32667 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32670 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32673 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32676 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32686 *mask_type = itype;
32687 return ix86_builtins[(int) fcode];
32690 /* Return a vector mode with twice as many elements as VMODE. */
32691 /* ??? Consider moving this to a table generated by genmodes.c. */
32693 static enum machine_mode
32694 doublesize_vector_mode (enum machine_mode vmode)
32698 case V2SFmode: return V4SFmode;
32699 case V1DImode: return V2DImode;
32700 case V2SImode: return V4SImode;
32701 case V4HImode: return V8HImode;
32702 case V8QImode: return V16QImode;
32704 case V2DFmode: return V4DFmode;
32705 case V4SFmode: return V8SFmode;
32706 case V2DImode: return V4DImode;
32707 case V4SImode: return V8SImode;
32708 case V8HImode: return V16HImode;
32709 case V16QImode: return V32QImode;
32711 case V4DFmode: return V8DFmode;
32712 case V8SFmode: return V16SFmode;
32713 case V4DImode: return V8DImode;
32714 case V8SImode: return V16SImode;
32715 case V16HImode: return V32HImode;
32716 case V32QImode: return V64QImode;
32719 gcc_unreachable ();
32723 /* Construct (set target (vec_select op0 (parallel perm))) and
32724 return true if that's a valid instruction in the active ISA. */
32727 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
32729 rtx rperm[MAX_VECT_LEN], x;
32732 for (i = 0; i < nelt; ++i)
32733 rperm[i] = GEN_INT (perm[i]);
32735 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
32736 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
32737 x = gen_rtx_SET (VOIDmode, target, x);
32740 if (recog_memoized (x) < 0)
32748 /* Similar, but generate a vec_concat from op0 and op1 as well. */
32751 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
32752 const unsigned char *perm, unsigned nelt)
32754 enum machine_mode v2mode;
32757 v2mode = doublesize_vector_mode (GET_MODE (op0));
32758 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
32759 return expand_vselect (target, x, perm, nelt);
32762 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32763 in terms of blendp[sd] / pblendw / pblendvb. */
32766 expand_vec_perm_blend (struct expand_vec_perm_d *d)
32768 enum machine_mode vmode = d->vmode;
32769 unsigned i, mask, nelt = d->nelt;
32770 rtx target, op0, op1, x;
32772 if (!TARGET_SSE4_1 || d->op0 == d->op1)
32774 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
32777 /* This is a blend, not a permute. Elements must stay in their
32778 respective lanes. */
32779 for (i = 0; i < nelt; ++i)
32781 unsigned e = d->perm[i];
32782 if (!(e == i || e == i + nelt))
32789 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
32790 decision should be extracted elsewhere, so that we only try that
32791 sequence once all budget==3 options have been tried. */
32793 /* For bytes, see if bytes move in pairs so we can use pblendw with
32794 an immediate argument, rather than pblendvb with a vector argument. */
32795 if (vmode == V16QImode)
32797 bool pblendw_ok = true;
32798 for (i = 0; i < 16 && pblendw_ok; i += 2)
32799 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
32803 rtx rperm[16], vperm;
32805 for (i = 0; i < nelt; ++i)
32806 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
32808 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32809 vperm = force_reg (V16QImode, vperm);
32811 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
32816 target = d->target;
32828 for (i = 0; i < nelt; ++i)
32829 mask |= (d->perm[i] >= nelt) << i;
32833 for (i = 0; i < 2; ++i)
32834 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
32838 for (i = 0; i < 4; ++i)
32839 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
32843 for (i = 0; i < 8; ++i)
32844 mask |= (d->perm[i * 2] >= 16) << i;
32848 target = gen_lowpart (vmode, target);
32849 op0 = gen_lowpart (vmode, op0);
32850 op1 = gen_lowpart (vmode, op1);
32854 gcc_unreachable ();
32857 /* This matches five different patterns with the different modes. */
32858 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
32859 x = gen_rtx_SET (VOIDmode, target, x);
32865 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32866 in terms of the variable form of vpermilps.
32868 Note that we will have already failed the immediate input vpermilps,
32869 which requires that the high and low part shuffle be identical; the
32870 variable form doesn't require that. */
32873 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
32875 rtx rperm[8], vperm;
32878 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
32881 /* We can only permute within the 128-bit lane. */
32882 for (i = 0; i < 8; ++i)
32884 unsigned e = d->perm[i];
32885 if (i < 4 ? e >= 4 : e < 4)
32892 for (i = 0; i < 8; ++i)
32894 unsigned e = d->perm[i];
32896 /* Within each 128-bit lane, the elements of op0 are numbered
32897 from 0 and the elements of op1 are numbered from 4. */
32903 rperm[i] = GEN_INT (e);
32906 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
32907 vperm = force_reg (V8SImode, vperm);
32908 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
32913 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32914 in terms of pshufb or vpperm. */
32917 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
32919 unsigned i, nelt, eltsz;
32920 rtx rperm[16], vperm, target, op0, op1;
32922 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
32924 if (GET_MODE_SIZE (d->vmode) != 16)
32931 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
32933 for (i = 0; i < nelt; ++i)
32935 unsigned j, e = d->perm[i];
32936 for (j = 0; j < eltsz; ++j)
32937 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
32940 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32941 vperm = force_reg (V16QImode, vperm);
32943 target = gen_lowpart (V16QImode, d->target);
32944 op0 = gen_lowpart (V16QImode, d->op0);
32945 if (d->op0 == d->op1)
32946 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
32949 op1 = gen_lowpart (V16QImode, d->op1);
32950 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
32956 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
32957 in a single instruction. */
32960 expand_vec_perm_1 (struct expand_vec_perm_d *d)
32962 unsigned i, nelt = d->nelt;
32963 unsigned char perm2[MAX_VECT_LEN];
32965 /* Check plain VEC_SELECT first, because AVX has instructions that could
32966 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
32967 input where SEL+CONCAT may not. */
32968 if (d->op0 == d->op1)
32970 int mask = nelt - 1;
32972 for (i = 0; i < nelt; i++)
32973 perm2[i] = d->perm[i] & mask;
32975 if (expand_vselect (d->target, d->op0, perm2, nelt))
32978 /* There are plenty of patterns in sse.md that are written for
32979 SEL+CONCAT and are not replicated for a single op. Perhaps
32980 that should be changed, to avoid the nastiness here. */
32982 /* Recognize interleave style patterns, which means incrementing
32983 every other permutation operand. */
32984 for (i = 0; i < nelt; i += 2)
32986 perm2[i] = d->perm[i] & mask;
32987 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
32989 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32992 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
32995 for (i = 0; i < nelt; i += 4)
32997 perm2[i + 0] = d->perm[i + 0] & mask;
32998 perm2[i + 1] = d->perm[i + 1] & mask;
32999 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
33000 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
33003 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33008 /* Finally, try the fully general two operand permute. */
33009 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
33012 /* Recognize interleave style patterns with reversed operands. */
33013 if (d->op0 != d->op1)
33015 for (i = 0; i < nelt; ++i)
33017 unsigned e = d->perm[i];
33025 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
33029 /* Try the SSE4.1 blend variable merge instructions. */
33030 if (expand_vec_perm_blend (d))
33033 /* Try one of the AVX vpermil variable permutations. */
33034 if (expand_vec_perm_vpermil (d))
33037 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
33038 if (expand_vec_perm_pshufb (d))
33044 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33045 in terms of a pair of pshuflw + pshufhw instructions. */
33048 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
33050 unsigned char perm2[MAX_VECT_LEN];
33054 if (d->vmode != V8HImode || d->op0 != d->op1)
33057 /* The two permutations only operate in 64-bit lanes. */
33058 for (i = 0; i < 4; ++i)
33059 if (d->perm[i] >= 4)
33061 for (i = 4; i < 8; ++i)
33062 if (d->perm[i] < 4)
33068 /* Emit the pshuflw. */
33069 memcpy (perm2, d->perm, 4);
33070 for (i = 4; i < 8; ++i)
33072 ok = expand_vselect (d->target, d->op0, perm2, 8);
33075 /* Emit the pshufhw. */
33076 memcpy (perm2 + 4, d->perm + 4, 4);
33077 for (i = 0; i < 4; ++i)
33079 ok = expand_vselect (d->target, d->target, perm2, 8);
33085 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33086 the permutation using the SSSE3 palignr instruction. This succeeds
33087 when all of the elements in PERM fit within one vector and we merely
33088 need to shift them down so that a single vector permutation has a
33089 chance to succeed. */
33092 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
33094 unsigned i, nelt = d->nelt;
33099 /* Even with AVX, palignr only operates on 128-bit vectors. */
33100 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33103 min = nelt, max = 0;
33104 for (i = 0; i < nelt; ++i)
33106 unsigned e = d->perm[i];
33112 if (min == 0 || max - min >= nelt)
33115 /* Given that we have SSSE3, we know we'll be able to implement the
33116 single operand permutation after the palignr with pshufb. */
33120 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
33121 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
33122 gen_lowpart (TImode, d->op1),
33123 gen_lowpart (TImode, d->op0), shift));
33125 d->op0 = d->op1 = d->target;
33128 for (i = 0; i < nelt; ++i)
33130 unsigned e = d->perm[i] - min;
33136 /* Test for the degenerate case where the alignment by itself
33137 produces the desired permutation. */
33141 ok = expand_vec_perm_1 (d);
33147 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33148 a two vector permutation into a single vector permutation by using
33149 an interleave operation to merge the vectors. */
33152 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
33154 struct expand_vec_perm_d dremap, dfinal;
33155 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
33156 unsigned contents, h1, h2, h3, h4;
33157 unsigned char remap[2 * MAX_VECT_LEN];
33161 if (d->op0 == d->op1)
33164 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
33165 lanes. We can use similar techniques with the vperm2f128 instruction,
33166 but it requires slightly different logic. */
33167 if (GET_MODE_SIZE (d->vmode) != 16)
33170 /* Examine from whence the elements come. */
33172 for (i = 0; i < nelt; ++i)
33173 contents |= 1u << d->perm[i];
33175 /* Split the two input vectors into 4 halves. */
33176 h1 = (1u << nelt2) - 1;
33181 memset (remap, 0xff, sizeof (remap));
33184 /* If the elements from the low halves use interleave low, and similarly
33185 for interleave high. If the elements are from mis-matched halves, we
33186 can use shufps for V4SF/V4SI or do a DImode shuffle. */
33187 if ((contents & (h1 | h3)) == contents)
33189 for (i = 0; i < nelt2; ++i)
33192 remap[i + nelt] = i * 2 + 1;
33193 dremap.perm[i * 2] = i;
33194 dremap.perm[i * 2 + 1] = i + nelt;
33197 else if ((contents & (h2 | h4)) == contents)
33199 for (i = 0; i < nelt2; ++i)
33201 remap[i + nelt2] = i * 2;
33202 remap[i + nelt + nelt2] = i * 2 + 1;
33203 dremap.perm[i * 2] = i + nelt2;
33204 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
33207 else if ((contents & (h1 | h4)) == contents)
33209 for (i = 0; i < nelt2; ++i)
33212 remap[i + nelt + nelt2] = i + nelt2;
33213 dremap.perm[i] = i;
33214 dremap.perm[i + nelt2] = i + nelt + nelt2;
33218 dremap.vmode = V2DImode;
33220 dremap.perm[0] = 0;
33221 dremap.perm[1] = 3;
33224 else if ((contents & (h2 | h3)) == contents)
33226 for (i = 0; i < nelt2; ++i)
33228 remap[i + nelt2] = i;
33229 remap[i + nelt] = i + nelt2;
33230 dremap.perm[i] = i + nelt2;
33231 dremap.perm[i + nelt2] = i + nelt;
33235 dremap.vmode = V2DImode;
33237 dremap.perm[0] = 1;
33238 dremap.perm[1] = 2;
33244 /* Use the remapping array set up above to move the elements from their
33245 swizzled locations into their final destinations. */
33247 for (i = 0; i < nelt; ++i)
33249 unsigned e = remap[d->perm[i]];
33250 gcc_assert (e < nelt);
33251 dfinal.perm[i] = e;
33253 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
33254 dfinal.op1 = dfinal.op0;
33255 dremap.target = dfinal.op0;
33257 /* Test if the final remap can be done with a single insn. For V4SFmode or
33258 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
33260 ok = expand_vec_perm_1 (&dfinal);
33261 seq = get_insns ();
33267 if (dremap.vmode != dfinal.vmode)
33269 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
33270 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
33271 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
33274 ok = expand_vec_perm_1 (&dremap);
33281 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33282 permutation with two pshufb insns and an ior. We should have already
33283 failed all two instruction sequences. */
33286 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
33288 rtx rperm[2][16], vperm, l, h, op, m128;
33289 unsigned int i, nelt, eltsz;
33291 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33293 gcc_assert (d->op0 != d->op1);
33296 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33298 /* Generate two permutation masks. If the required element is within
33299 the given vector it is shuffled into the proper lane. If the required
33300 element is in the other vector, force a zero into the lane by setting
33301 bit 7 in the permutation mask. */
33302 m128 = GEN_INT (-128);
33303 for (i = 0; i < nelt; ++i)
33305 unsigned j, e = d->perm[i];
33306 unsigned which = (e >= nelt);
33310 for (j = 0; j < eltsz; ++j)
33312 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
33313 rperm[1-which][i*eltsz + j] = m128;
33317 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
33318 vperm = force_reg (V16QImode, vperm);
33320 l = gen_reg_rtx (V16QImode);
33321 op = gen_lowpart (V16QImode, d->op0);
33322 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
33324 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
33325 vperm = force_reg (V16QImode, vperm);
33327 h = gen_reg_rtx (V16QImode);
33328 op = gen_lowpart (V16QImode, d->op1);
33329 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
33331 op = gen_lowpart (V16QImode, d->target);
33332 emit_insn (gen_iorv16qi3 (op, l, h));
33337 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33338 and extract-odd permutations. */
33341 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
33348 t1 = gen_reg_rtx (V4DFmode);
33349 t2 = gen_reg_rtx (V4DFmode);
33351 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33352 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
33353 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
33355 /* Now an unpck[lh]pd will produce the result required. */
33357 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
33359 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
33365 int mask = odd ? 0xdd : 0x88;
33367 t1 = gen_reg_rtx (V8SFmode);
33368 t2 = gen_reg_rtx (V8SFmode);
33369 t3 = gen_reg_rtx (V8SFmode);
33371 /* Shuffle within the 128-bit lanes to produce:
33372 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33373 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
33376 /* Shuffle the lanes around to produce:
33377 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33378 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
33381 /* Shuffle within the 128-bit lanes to produce:
33382 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33383 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
33385 /* Shuffle within the 128-bit lanes to produce:
33386 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33387 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
33389 /* Shuffle the lanes around to produce:
33390 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33391 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
33400 /* These are always directly implementable by expand_vec_perm_1. */
33401 gcc_unreachable ();
33405 return expand_vec_perm_pshufb2 (d);
33408 /* We need 2*log2(N)-1 operations to achieve odd/even
33409 with interleave. */
33410 t1 = gen_reg_rtx (V8HImode);
33411 t2 = gen_reg_rtx (V8HImode);
33412 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
33413 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
33414 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
33415 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
33417 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
33419 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
33426 return expand_vec_perm_pshufb2 (d);
33429 t1 = gen_reg_rtx (V16QImode);
33430 t2 = gen_reg_rtx (V16QImode);
33431 t3 = gen_reg_rtx (V16QImode);
33432 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
33433 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
33434 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
33435 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
33436 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
33437 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
33439 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
33441 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
33447 gcc_unreachable ();
33453 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33454 extract-even and extract-odd permutations. */
33457 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
33459 unsigned i, odd, nelt = d->nelt;
33462 if (odd != 0 && odd != 1)
33465 for (i = 1; i < nelt; ++i)
33466 if (d->perm[i] != 2 * i + odd)
33469 return expand_vec_perm_even_odd_1 (d, odd);
33472 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33473 permutations. We assume that expand_vec_perm_1 has already failed. */
33476 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
33478 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
33479 enum machine_mode vmode = d->vmode;
33480 unsigned char perm2[4];
33488 /* These are special-cased in sse.md so that we can optionally
33489 use the vbroadcast instruction. They expand to two insns
33490 if the input happens to be in a register. */
33491 gcc_unreachable ();
33497 /* These are always implementable using standard shuffle patterns. */
33498 gcc_unreachable ();
33502 /* These can be implemented via interleave. We save one insn by
33503 stopping once we have promoted to V4SImode and then use pshufd. */
33506 optab otab = vec_interleave_low_optab;
33510 otab = vec_interleave_high_optab;
33515 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
33516 vmode = get_mode_wider_vector (vmode);
33517 op0 = gen_lowpart (vmode, op0);
33519 while (vmode != V4SImode);
33521 memset (perm2, elt, 4);
33522 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
33527 gcc_unreachable ();
33531 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33532 broadcast permutations. */
33535 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
33537 unsigned i, elt, nelt = d->nelt;
33539 if (d->op0 != d->op1)
33543 for (i = 1; i < nelt; ++i)
33544 if (d->perm[i] != elt)
33547 return expand_vec_perm_broadcast_1 (d);
33550 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33551 With all of the interface bits taken care of, perform the expansion
33552 in D and return true on success. */
33555 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
33557 /* Try a single instruction expansion. */
33558 if (expand_vec_perm_1 (d))
33561 /* Try sequences of two instructions. */
33563 if (expand_vec_perm_pshuflw_pshufhw (d))
33566 if (expand_vec_perm_palignr (d))
33569 if (expand_vec_perm_interleave2 (d))
33572 if (expand_vec_perm_broadcast (d))
33575 /* Try sequences of three instructions. */
33577 if (expand_vec_perm_pshufb2 (d))
33580 /* ??? Look for narrow permutations whose element orderings would
33581 allow the promotion to a wider mode. */
33583 /* ??? Look for sequences of interleave or a wider permute that place
33584 the data into the correct lanes for a half-vector shuffle like
33585 pshuf[lh]w or vpermilps. */
33587 /* ??? Look for sequences of interleave that produce the desired results.
33588 The combinatorics of punpck[lh] get pretty ugly... */
33590 if (expand_vec_perm_even_odd (d))
33596 /* Extract the values from the vector CST into the permutation array in D.
33597 Return 0 on error, 1 if all values from the permutation come from the
33598 first vector, 2 if all values from the second vector, and 3 otherwise. */
33601 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33603 tree list = TREE_VECTOR_CST_ELTS (cst);
33604 unsigned i, nelt = d->nelt;
33607 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33609 unsigned HOST_WIDE_INT e;
33611 if (!host_integerp (TREE_VALUE (list), 1))
33613 e = tree_low_cst (TREE_VALUE (list), 1);
33617 ret |= (e < nelt ? 1 : 2);
33620 gcc_assert (list == NULL);
33622 /* For all elements from second vector, fold the elements to first. */
33624 for (i = 0; i < nelt; ++i)
33625 d->perm[i] -= nelt;
33631 ix86_expand_vec_perm_builtin (tree exp)
33633 struct expand_vec_perm_d d;
33634 tree arg0, arg1, arg2;
33636 arg0 = CALL_EXPR_ARG (exp, 0);
33637 arg1 = CALL_EXPR_ARG (exp, 1);
33638 arg2 = CALL_EXPR_ARG (exp, 2);
33640 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33641 d.nelt = GET_MODE_NUNITS (d.vmode);
33642 d.testing_p = false;
33643 gcc_assert (VECTOR_MODE_P (d.vmode));
33645 if (TREE_CODE (arg2) != VECTOR_CST)
33647 error_at (EXPR_LOCATION (exp),
33648 "vector permutation requires vector constant");
33652 switch (extract_vec_perm_cst (&d, arg2))
33658 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33662 if (!operand_equal_p (arg0, arg1, 0))
33664 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33665 d.op0 = force_reg (d.vmode, d.op0);
33666 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33667 d.op1 = force_reg (d.vmode, d.op1);
33671 /* The elements of PERM do not suggest that only the first operand
33672 is used, but both operands are identical. Allow easier matching
33673 of the permutation by folding the permutation into the single
33676 unsigned i, nelt = d.nelt;
33677 for (i = 0; i < nelt; ++i)
33678 if (d.perm[i] >= nelt)
33684 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33685 d.op0 = force_reg (d.vmode, d.op0);
33690 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33691 d.op0 = force_reg (d.vmode, d.op0);
33696 d.target = gen_reg_rtx (d.vmode);
33697 if (ix86_expand_vec_perm_builtin_1 (&d))
33700 /* For compiler generated permutations, we should never got here, because
33701 the compiler should also be checking the ok hook. But since this is a
33702 builtin the user has access too, so don't abort. */
33706 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
33709 sorry ("vector permutation (%d %d %d %d)",
33710 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
33713 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
33714 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33715 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
33718 sorry ("vector permutation "
33719 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
33720 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33721 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
33722 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
33723 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
33726 gcc_unreachable ();
33729 return CONST0_RTX (d.vmode);
33732 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
33735 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
33737 struct expand_vec_perm_d d;
33741 d.vmode = TYPE_MODE (vec_type);
33742 d.nelt = GET_MODE_NUNITS (d.vmode);
33743 d.testing_p = true;
33745 /* Given sufficient ISA support we can just return true here
33746 for selected vector modes. */
33747 if (GET_MODE_SIZE (d.vmode) == 16)
33749 /* All implementable with a single vpperm insn. */
33752 /* All implementable with 2 pshufb + 1 ior. */
33755 /* All implementable with shufpd or unpck[lh]pd. */
33760 vec_mask = extract_vec_perm_cst (&d, mask);
33762 /* This hook is cannot be called in response to something that the
33763 user does (unlike the builtin expander) so we shouldn't ever see
33764 an error generated from the extract. */
33765 gcc_assert (vec_mask > 0 && vec_mask <= 3);
33766 one_vec = (vec_mask != 3);
33768 /* Implementable with shufps or pshufd. */
33769 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
33772 /* Otherwise we have to go through the motions and see if we can
33773 figure out how to generate the requested permutation. */
33774 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
33775 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
33777 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
33780 ret = ix86_expand_vec_perm_builtin_1 (&d);
33787 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
33789 struct expand_vec_perm_d d;
33795 d.vmode = GET_MODE (targ);
33796 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
33797 d.testing_p = false;
33799 for (i = 0; i < nelt; ++i)
33800 d.perm[i] = i * 2 + odd;
33802 /* We'll either be able to implement the permutation directly... */
33803 if (expand_vec_perm_1 (&d))
33806 /* ... or we use the special-case patterns. */
33807 expand_vec_perm_even_odd_1 (&d, odd);
33810 /* Expand an insert into a vector register through pinsr insn.
33811 Return true if successful. */
33814 ix86_expand_pinsr (rtx *operands)
33816 rtx dst = operands[0];
33817 rtx src = operands[3];
33819 unsigned int size = INTVAL (operands[1]);
33820 unsigned int pos = INTVAL (operands[2]);
33822 if (GET_CODE (dst) == SUBREG)
33824 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
33825 dst = SUBREG_REG (dst);
33828 if (GET_CODE (src) == SUBREG)
33829 src = SUBREG_REG (src);
33831 switch (GET_MODE (dst))
33838 enum machine_mode srcmode, dstmode;
33839 rtx (*pinsr)(rtx, rtx, rtx, rtx);
33841 srcmode = mode_for_size (size, MODE_INT, 0);
33846 if (!TARGET_SSE4_1)
33848 dstmode = V16QImode;
33849 pinsr = gen_sse4_1_pinsrb;
33855 dstmode = V8HImode;
33856 pinsr = gen_sse2_pinsrw;
33860 if (!TARGET_SSE4_1)
33862 dstmode = V4SImode;
33863 pinsr = gen_sse4_1_pinsrd;
33867 gcc_assert (TARGET_64BIT);
33868 if (!TARGET_SSE4_1)
33870 dstmode = V2DImode;
33871 pinsr = gen_sse4_1_pinsrq;
33878 dst = gen_lowpart (dstmode, dst);
33879 src = gen_lowpart (srcmode, src);
33883 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
33892 /* This function returns the calling abi specific va_list type node.
33893 It returns the FNDECL specific va_list type. */
33896 ix86_fn_abi_va_list (tree fndecl)
33899 return va_list_type_node;
33900 gcc_assert (fndecl != NULL_TREE);
33902 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
33903 return ms_va_list_type_node;
33905 return sysv_va_list_type_node;
33908 /* Returns the canonical va_list type specified by TYPE. If there
33909 is no valid TYPE provided, it return NULL_TREE. */
33912 ix86_canonical_va_list_type (tree type)
33916 /* Resolve references and pointers to va_list type. */
33917 if (TREE_CODE (type) == MEM_REF)
33918 type = TREE_TYPE (type);
33919 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
33920 type = TREE_TYPE (type);
33921 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
33922 type = TREE_TYPE (type);
33924 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
33926 wtype = va_list_type_node;
33927 gcc_assert (wtype != NULL_TREE);
33929 if (TREE_CODE (wtype) == ARRAY_TYPE)
33931 /* If va_list is an array type, the argument may have decayed
33932 to a pointer type, e.g. by being passed to another function.
33933 In that case, unwrap both types so that we can compare the
33934 underlying records. */
33935 if (TREE_CODE (htype) == ARRAY_TYPE
33936 || POINTER_TYPE_P (htype))
33938 wtype = TREE_TYPE (wtype);
33939 htype = TREE_TYPE (htype);
33942 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33943 return va_list_type_node;
33944 wtype = sysv_va_list_type_node;
33945 gcc_assert (wtype != NULL_TREE);
33947 if (TREE_CODE (wtype) == ARRAY_TYPE)
33949 /* If va_list is an array type, the argument may have decayed
33950 to a pointer type, e.g. by being passed to another function.
33951 In that case, unwrap both types so that we can compare the
33952 underlying records. */
33953 if (TREE_CODE (htype) == ARRAY_TYPE
33954 || POINTER_TYPE_P (htype))
33956 wtype = TREE_TYPE (wtype);
33957 htype = TREE_TYPE (htype);
33960 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33961 return sysv_va_list_type_node;
33962 wtype = ms_va_list_type_node;
33963 gcc_assert (wtype != NULL_TREE);
33965 if (TREE_CODE (wtype) == ARRAY_TYPE)
33967 /* If va_list is an array type, the argument may have decayed
33968 to a pointer type, e.g. by being passed to another function.
33969 In that case, unwrap both types so that we can compare the
33970 underlying records. */
33971 if (TREE_CODE (htype) == ARRAY_TYPE
33972 || POINTER_TYPE_P (htype))
33974 wtype = TREE_TYPE (wtype);
33975 htype = TREE_TYPE (htype);
33978 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33979 return ms_va_list_type_node;
33982 return std_canonical_va_list_type (type);
33985 /* Iterate through the target-specific builtin types for va_list.
33986 IDX denotes the iterator, *PTREE is set to the result type of
33987 the va_list builtin, and *PNAME to its internal type.
33988 Returns zero if there is no element for this index, otherwise
33989 IDX should be increased upon the next call.
33990 Note, do not iterate a base builtin's name like __builtin_va_list.
33991 Used from c_common_nodes_and_builtins. */
33994 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
34004 *ptree = ms_va_list_type_node;
34005 *pname = "__builtin_ms_va_list";
34009 *ptree = sysv_va_list_type_node;
34010 *pname = "__builtin_sysv_va_list";
34018 #undef TARGET_SCHED_DISPATCH
34019 #define TARGET_SCHED_DISPATCH has_dispatch
34020 #undef TARGET_SCHED_DISPATCH_DO
34021 #define TARGET_SCHED_DISPATCH_DO do_dispatch
34023 /* The size of the dispatch window is the total number of bytes of
34024 object code allowed in a window. */
34025 #define DISPATCH_WINDOW_SIZE 16
34027 /* Number of dispatch windows considered for scheduling. */
34028 #define MAX_DISPATCH_WINDOWS 3
34030 /* Maximum number of instructions in a window. */
34033 /* Maximum number of immediate operands in a window. */
34036 /* Maximum number of immediate bits allowed in a window. */
34037 #define MAX_IMM_SIZE 128
34039 /* Maximum number of 32 bit immediates allowed in a window. */
34040 #define MAX_IMM_32 4
34042 /* Maximum number of 64 bit immediates allowed in a window. */
34043 #define MAX_IMM_64 2
34045 /* Maximum total of loads or prefetches allowed in a window. */
34048 /* Maximum total of stores allowed in a window. */
34049 #define MAX_STORE 1
34055 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
34056 enum dispatch_group {
34071 /* Number of allowable groups in a dispatch window. It is an array
34072 indexed by dispatch_group enum. 100 is used as a big number,
34073 because the number of these kind of operations does not have any
34074 effect in dispatch window, but we need them for other reasons in
34076 static unsigned int num_allowable_groups[disp_last] = {
34077 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
34080 char group_name[disp_last + 1][16] = {
34081 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
34082 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
34083 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
34086 /* Instruction path. */
34089 path_single, /* Single micro op. */
34090 path_double, /* Double micro op. */
34091 path_multi, /* Instructions with more than 2 micro op.. */
34095 /* sched_insn_info defines a window to the instructions scheduled in
34096 the basic block. It contains a pointer to the insn_info table and
34097 the instruction scheduled.
34099 Windows are allocated for each basic block and are linked
34101 typedef struct sched_insn_info_s {
34103 enum dispatch_group group;
34104 enum insn_path path;
34109 /* Linked list of dispatch windows. This is a two way list of
34110 dispatch windows of a basic block. It contains information about
34111 the number of uops in the window and the total number of
34112 instructions and of bytes in the object code for this dispatch
34114 typedef struct dispatch_windows_s {
34115 int num_insn; /* Number of insn in the window. */
34116 int num_uops; /* Number of uops in the window. */
34117 int window_size; /* Number of bytes in the window. */
34118 int window_num; /* Window number between 0 or 1. */
34119 int num_imm; /* Number of immediates in an insn. */
34120 int num_imm_32; /* Number of 32 bit immediates in an insn. */
34121 int num_imm_64; /* Number of 64 bit immediates in an insn. */
34122 int imm_size; /* Total immediates in the window. */
34123 int num_loads; /* Total memory loads in the window. */
34124 int num_stores; /* Total memory stores in the window. */
34125 int violation; /* Violation exists in window. */
34126 sched_insn_info *window; /* Pointer to the window. */
34127 struct dispatch_windows_s *next;
34128 struct dispatch_windows_s *prev;
34129 } dispatch_windows;
34131 /* Immediate valuse used in an insn. */
34132 typedef struct imm_info_s
34139 static dispatch_windows *dispatch_window_list;
34140 static dispatch_windows *dispatch_window_list1;
34142 /* Get dispatch group of insn. */
34144 static enum dispatch_group
34145 get_mem_group (rtx insn)
34147 enum attr_memory memory;
34149 if (INSN_CODE (insn) < 0)
34150 return disp_no_group;
34151 memory = get_attr_memory (insn);
34152 if (memory == MEMORY_STORE)
34155 if (memory == MEMORY_LOAD)
34158 if (memory == MEMORY_BOTH)
34159 return disp_load_store;
34161 return disp_no_group;
34164 /* Return true if insn is a compare instruction. */
34169 enum attr_type type;
34171 type = get_attr_type (insn);
34172 return (type == TYPE_TEST
34173 || type == TYPE_ICMP
34174 || type == TYPE_FCMP
34175 || GET_CODE (PATTERN (insn)) == COMPARE);
34178 /* Return true if a dispatch violation encountered. */
34181 dispatch_violation (void)
34183 if (dispatch_window_list->next)
34184 return dispatch_window_list->next->violation;
34185 return dispatch_window_list->violation;
34188 /* Return true if insn is a branch instruction. */
34191 is_branch (rtx insn)
34193 return (CALL_P (insn) || JUMP_P (insn));
34196 /* Return true if insn is a prefetch instruction. */
34199 is_prefetch (rtx insn)
34201 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
34204 /* This function initializes a dispatch window and the list container holding a
34205 pointer to the window. */
34208 init_window (int window_num)
34211 dispatch_windows *new_list;
34213 if (window_num == 0)
34214 new_list = dispatch_window_list;
34216 new_list = dispatch_window_list1;
34218 new_list->num_insn = 0;
34219 new_list->num_uops = 0;
34220 new_list->window_size = 0;
34221 new_list->next = NULL;
34222 new_list->prev = NULL;
34223 new_list->window_num = window_num;
34224 new_list->num_imm = 0;
34225 new_list->num_imm_32 = 0;
34226 new_list->num_imm_64 = 0;
34227 new_list->imm_size = 0;
34228 new_list->num_loads = 0;
34229 new_list->num_stores = 0;
34230 new_list->violation = false;
34232 for (i = 0; i < MAX_INSN; i++)
34234 new_list->window[i].insn = NULL;
34235 new_list->window[i].group = disp_no_group;
34236 new_list->window[i].path = no_path;
34237 new_list->window[i].byte_len = 0;
34238 new_list->window[i].imm_bytes = 0;
34243 /* This function allocates and initializes a dispatch window and the
34244 list container holding a pointer to the window. */
34246 static dispatch_windows *
34247 allocate_window (void)
34249 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
34250 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
34255 /* This routine initializes the dispatch scheduling information. It
34256 initiates building dispatch scheduler tables and constructs the
34257 first dispatch window. */
34260 init_dispatch_sched (void)
34262 /* Allocate a dispatch list and a window. */
34263 dispatch_window_list = allocate_window ();
34264 dispatch_window_list1 = allocate_window ();
34269 /* This function returns true if a branch is detected. End of a basic block
34270 does not have to be a branch, but here we assume only branches end a
34274 is_end_basic_block (enum dispatch_group group)
34276 return group == disp_branch;
34279 /* This function is called when the end of a window processing is reached. */
34282 process_end_window (void)
34284 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
34285 if (dispatch_window_list->next)
34287 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
34288 gcc_assert (dispatch_window_list->window_size
34289 + dispatch_window_list1->window_size <= 48);
34295 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
34296 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
34297 for 48 bytes of instructions. Note that these windows are not dispatch
34298 windows that their sizes are DISPATCH_WINDOW_SIZE. */
34300 static dispatch_windows *
34301 allocate_next_window (int window_num)
34303 if (window_num == 0)
34305 if (dispatch_window_list->next)
34308 return dispatch_window_list;
34311 dispatch_window_list->next = dispatch_window_list1;
34312 dispatch_window_list1->prev = dispatch_window_list;
34314 return dispatch_window_list1;
34317 /* Increment the number of immediate operands of an instruction. */
34320 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
34325 switch ( GET_CODE (*in_rtx))
34330 (imm_values->imm)++;
34331 if (x86_64_immediate_operand (*in_rtx, SImode))
34332 (imm_values->imm32)++;
34334 (imm_values->imm64)++;
34338 (imm_values->imm)++;
34339 (imm_values->imm64)++;
34343 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
34345 (imm_values->imm)++;
34346 (imm_values->imm32)++;
34357 /* Compute number of immediate operands of an instruction. */
34360 find_constant (rtx in_rtx, imm_info *imm_values)
34362 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
34363 (rtx_function) find_constant_1, (void *) imm_values);
34366 /* Return total size of immediate operands of an instruction along with number
34367 of corresponding immediate-operands. It initializes its parameters to zero
34368 befor calling FIND_CONSTANT.
34369 INSN is the input instruction. IMM is the total of immediates.
34370 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34374 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
34376 imm_info imm_values = {0, 0, 0};
34378 find_constant (insn, &imm_values);
34379 *imm = imm_values.imm;
34380 *imm32 = imm_values.imm32;
34381 *imm64 = imm_values.imm64;
34382 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
34385 /* This function indicates if an operand of an instruction is an
34389 has_immediate (rtx insn)
34391 int num_imm_operand;
34392 int num_imm32_operand;
34393 int num_imm64_operand;
34396 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34397 &num_imm64_operand);
34401 /* Return single or double path for instructions. */
34403 static enum insn_path
34404 get_insn_path (rtx insn)
34406 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
34408 if ((int)path == 0)
34409 return path_single;
34411 if ((int)path == 1)
34412 return path_double;
34417 /* Return insn dispatch group. */
34419 static enum dispatch_group
34420 get_insn_group (rtx insn)
34422 enum dispatch_group group = get_mem_group (insn);
34426 if (is_branch (insn))
34427 return disp_branch;
34432 if (has_immediate (insn))
34435 if (is_prefetch (insn))
34436 return disp_prefetch;
34438 return disp_no_group;
34441 /* Count number of GROUP restricted instructions in a dispatch
34442 window WINDOW_LIST. */
34445 count_num_restricted (rtx insn, dispatch_windows *window_list)
34447 enum dispatch_group group = get_insn_group (insn);
34449 int num_imm_operand;
34450 int num_imm32_operand;
34451 int num_imm64_operand;
34453 if (group == disp_no_group)
34456 if (group == disp_imm)
34458 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34459 &num_imm64_operand);
34460 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
34461 || num_imm_operand + window_list->num_imm > MAX_IMM
34462 || (num_imm32_operand > 0
34463 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
34464 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
34465 || (num_imm64_operand > 0
34466 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
34467 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
34468 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
34469 && num_imm64_operand > 0
34470 && ((window_list->num_imm_64 > 0
34471 && window_list->num_insn >= 2)
34472 || window_list->num_insn >= 3)))
34478 if ((group == disp_load_store
34479 && (window_list->num_loads >= MAX_LOAD
34480 || window_list->num_stores >= MAX_STORE))
34481 || ((group == disp_load
34482 || group == disp_prefetch)
34483 && window_list->num_loads >= MAX_LOAD)
34484 || (group == disp_store
34485 && window_list->num_stores >= MAX_STORE))
34491 /* This function returns true if insn satisfies dispatch rules on the
34492 last window scheduled. */
34495 fits_dispatch_window (rtx insn)
34497 dispatch_windows *window_list = dispatch_window_list;
34498 dispatch_windows *window_list_next = dispatch_window_list->next;
34499 unsigned int num_restrict;
34500 enum dispatch_group group = get_insn_group (insn);
34501 enum insn_path path = get_insn_path (insn);
34504 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34505 instructions should be given the lowest priority in the
34506 scheduling process in Haifa scheduler to make sure they will be
34507 scheduled in the same dispatch window as the refrence to them. */
34508 if (group == disp_jcc || group == disp_cmp)
34511 /* Check nonrestricted. */
34512 if (group == disp_no_group || group == disp_branch)
34515 /* Get last dispatch window. */
34516 if (window_list_next)
34517 window_list = window_list_next;
34519 if (window_list->window_num == 1)
34521 sum = window_list->prev->window_size + window_list->window_size;
34524 || (min_insn_size (insn) + sum) >= 48)
34525 /* Window 1 is full. Go for next window. */
34529 num_restrict = count_num_restricted (insn, window_list);
34531 if (num_restrict > num_allowable_groups[group])
34534 /* See if it fits in the first window. */
34535 if (window_list->window_num == 0)
34537 /* The first widow should have only single and double path
34539 if (path == path_double
34540 && (window_list->num_uops + 2) > MAX_INSN)
34542 else if (path != path_single)
34548 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34549 dispatch window WINDOW_LIST. */
34552 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
34554 int byte_len = min_insn_size (insn);
34555 int num_insn = window_list->num_insn;
34557 sched_insn_info *window = window_list->window;
34558 enum dispatch_group group = get_insn_group (insn);
34559 enum insn_path path = get_insn_path (insn);
34560 int num_imm_operand;
34561 int num_imm32_operand;
34562 int num_imm64_operand;
34564 if (!window_list->violation && group != disp_cmp
34565 && !fits_dispatch_window (insn))
34566 window_list->violation = true;
34568 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34569 &num_imm64_operand);
34571 /* Initialize window with new instruction. */
34572 window[num_insn].insn = insn;
34573 window[num_insn].byte_len = byte_len;
34574 window[num_insn].group = group;
34575 window[num_insn].path = path;
34576 window[num_insn].imm_bytes = imm_size;
34578 window_list->window_size += byte_len;
34579 window_list->num_insn = num_insn + 1;
34580 window_list->num_uops = window_list->num_uops + num_uops;
34581 window_list->imm_size += imm_size;
34582 window_list->num_imm += num_imm_operand;
34583 window_list->num_imm_32 += num_imm32_operand;
34584 window_list->num_imm_64 += num_imm64_operand;
34586 if (group == disp_store)
34587 window_list->num_stores += 1;
34588 else if (group == disp_load
34589 || group == disp_prefetch)
34590 window_list->num_loads += 1;
34591 else if (group == disp_load_store)
34593 window_list->num_stores += 1;
34594 window_list->num_loads += 1;
34598 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34599 If the total bytes of instructions or the number of instructions in
34600 the window exceed allowable, it allocates a new window. */
34603 add_to_dispatch_window (rtx insn)
34606 dispatch_windows *window_list;
34607 dispatch_windows *next_list;
34608 dispatch_windows *window0_list;
34609 enum insn_path path;
34610 enum dispatch_group insn_group;
34618 if (INSN_CODE (insn) < 0)
34621 byte_len = min_insn_size (insn);
34622 window_list = dispatch_window_list;
34623 next_list = window_list->next;
34624 path = get_insn_path (insn);
34625 insn_group = get_insn_group (insn);
34627 /* Get the last dispatch window. */
34629 window_list = dispatch_window_list->next;
34631 if (path == path_single)
34633 else if (path == path_double)
34636 insn_num_uops = (int) path;
34638 /* If current window is full, get a new window.
34639 Window number zero is full, if MAX_INSN uops are scheduled in it.
34640 Window number one is full, if window zero's bytes plus window
34641 one's bytes is 32, or if the bytes of the new instruction added
34642 to the total makes it greater than 48, or it has already MAX_INSN
34643 instructions in it. */
34644 num_insn = window_list->num_insn;
34645 num_uops = window_list->num_uops;
34646 window_num = window_list->window_num;
34647 insn_fits = fits_dispatch_window (insn);
34649 if (num_insn >= MAX_INSN
34650 || num_uops + insn_num_uops > MAX_INSN
34653 window_num = ~window_num & 1;
34654 window_list = allocate_next_window (window_num);
34657 if (window_num == 0)
34659 add_insn_window (insn, window_list, insn_num_uops);
34660 if (window_list->num_insn >= MAX_INSN
34661 && insn_group == disp_branch)
34663 process_end_window ();
34667 else if (window_num == 1)
34669 window0_list = window_list->prev;
34670 sum = window0_list->window_size + window_list->window_size;
34672 || (byte_len + sum) >= 48)
34674 process_end_window ();
34675 window_list = dispatch_window_list;
34678 add_insn_window (insn, window_list, insn_num_uops);
34681 gcc_unreachable ();
34683 if (is_end_basic_block (insn_group))
34685 /* End of basic block is reached do end-basic-block process. */
34686 process_end_window ();
34691 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34693 DEBUG_FUNCTION static void
34694 debug_dispatch_window_file (FILE *file, int window_num)
34696 dispatch_windows *list;
34699 if (window_num == 0)
34700 list = dispatch_window_list;
34702 list = dispatch_window_list1;
34704 fprintf (file, "Window #%d:\n", list->window_num);
34705 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
34706 list->num_insn, list->num_uops, list->window_size);
34707 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34708 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
34710 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
34712 fprintf (file, " insn info:\n");
34714 for (i = 0; i < MAX_INSN; i++)
34716 if (!list->window[i].insn)
34718 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
34719 i, group_name[list->window[i].group],
34720 i, (void *)list->window[i].insn,
34721 i, list->window[i].path,
34722 i, list->window[i].byte_len,
34723 i, list->window[i].imm_bytes);
34727 /* Print to stdout a dispatch window. */
34729 DEBUG_FUNCTION void
34730 debug_dispatch_window (int window_num)
34732 debug_dispatch_window_file (stdout, window_num);
34735 /* Print INSN dispatch information to FILE. */
34737 DEBUG_FUNCTION static void
34738 debug_insn_dispatch_info_file (FILE *file, rtx insn)
34741 enum insn_path path;
34742 enum dispatch_group group;
34744 int num_imm_operand;
34745 int num_imm32_operand;
34746 int num_imm64_operand;
34748 if (INSN_CODE (insn) < 0)
34751 byte_len = min_insn_size (insn);
34752 path = get_insn_path (insn);
34753 group = get_insn_group (insn);
34754 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34755 &num_imm64_operand);
34757 fprintf (file, " insn info:\n");
34758 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
34759 group_name[group], path, byte_len);
34760 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34761 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
34764 /* Print to STDERR the status of the ready list with respect to
34765 dispatch windows. */
34767 DEBUG_FUNCTION void
34768 debug_ready_dispatch (void)
34771 int no_ready = number_in_ready ();
34773 fprintf (stdout, "Number of ready: %d\n", no_ready);
34775 for (i = 0; i < no_ready; i++)
34776 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
34779 /* This routine is the driver of the dispatch scheduler. */
34782 do_dispatch (rtx insn, int mode)
34784 if (mode == DISPATCH_INIT)
34785 init_dispatch_sched ();
34786 else if (mode == ADD_TO_DISPATCH_WINDOW)
34787 add_to_dispatch_window (insn);
34790 /* Return TRUE if Dispatch Scheduling is supported. */
34793 has_dispatch (rtx insn, int action)
34795 if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
34796 && flag_dispatch_scheduler)
34802 case IS_DISPATCH_ON:
34807 return is_cmp (insn);
34809 case DISPATCH_VIOLATION:
34810 return dispatch_violation ();
34812 case FITS_DISPATCH_WINDOW:
34813 return fits_dispatch_window (insn);
34819 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
34820 place emms and femms instructions. */
34822 static enum machine_mode
34823 ix86_preferred_simd_mode (enum machine_mode mode)
34840 if (TARGET_AVX && !TARGET_PREFER_AVX128)
34846 if (!TARGET_VECTORIZE_DOUBLE)
34848 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
34850 else if (TARGET_SSE2)
34859 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
34862 static unsigned int
34863 ix86_autovectorize_vector_sizes (void)
34865 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
34868 /* Initialize the GCC target structure. */
34869 #undef TARGET_RETURN_IN_MEMORY
34870 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
34872 #undef TARGET_LEGITIMIZE_ADDRESS
34873 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
34875 #undef TARGET_ATTRIBUTE_TABLE
34876 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
34877 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34878 # undef TARGET_MERGE_DECL_ATTRIBUTES
34879 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
34882 #undef TARGET_COMP_TYPE_ATTRIBUTES
34883 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
34885 #undef TARGET_INIT_BUILTINS
34886 #define TARGET_INIT_BUILTINS ix86_init_builtins
34887 #undef TARGET_BUILTIN_DECL
34888 #define TARGET_BUILTIN_DECL ix86_builtin_decl
34889 #undef TARGET_EXPAND_BUILTIN
34890 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
34892 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
34893 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
34894 ix86_builtin_vectorized_function
34896 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
34897 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
34899 #undef TARGET_BUILTIN_RECIPROCAL
34900 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
34902 #undef TARGET_ASM_FUNCTION_EPILOGUE
34903 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
34905 #undef TARGET_ENCODE_SECTION_INFO
34906 #ifndef SUBTARGET_ENCODE_SECTION_INFO
34907 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
34909 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
34912 #undef TARGET_ASM_OPEN_PAREN
34913 #define TARGET_ASM_OPEN_PAREN ""
34914 #undef TARGET_ASM_CLOSE_PAREN
34915 #define TARGET_ASM_CLOSE_PAREN ""
34917 #undef TARGET_ASM_BYTE_OP
34918 #define TARGET_ASM_BYTE_OP ASM_BYTE
34920 #undef TARGET_ASM_ALIGNED_HI_OP
34921 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
34922 #undef TARGET_ASM_ALIGNED_SI_OP
34923 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
34925 #undef TARGET_ASM_ALIGNED_DI_OP
34926 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
34929 #undef TARGET_PROFILE_BEFORE_PROLOGUE
34930 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
34932 #undef TARGET_ASM_UNALIGNED_HI_OP
34933 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
34934 #undef TARGET_ASM_UNALIGNED_SI_OP
34935 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
34936 #undef TARGET_ASM_UNALIGNED_DI_OP
34937 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
34939 #undef TARGET_PRINT_OPERAND
34940 #define TARGET_PRINT_OPERAND ix86_print_operand
34941 #undef TARGET_PRINT_OPERAND_ADDRESS
34942 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
34943 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
34944 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
34945 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
34946 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
34948 #undef TARGET_SCHED_INIT_GLOBAL
34949 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
34950 #undef TARGET_SCHED_ADJUST_COST
34951 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
34952 #undef TARGET_SCHED_ISSUE_RATE
34953 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
34954 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
34955 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
34956 ia32_multipass_dfa_lookahead
34958 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
34959 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
34962 #undef TARGET_HAVE_TLS
34963 #define TARGET_HAVE_TLS true
34965 #undef TARGET_CANNOT_FORCE_CONST_MEM
34966 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
34967 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
34968 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
34970 #undef TARGET_DELEGITIMIZE_ADDRESS
34971 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
34973 #undef TARGET_MS_BITFIELD_LAYOUT_P
34974 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
34977 #undef TARGET_BINDS_LOCAL_P
34978 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
34980 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34981 #undef TARGET_BINDS_LOCAL_P
34982 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
34985 #undef TARGET_ASM_OUTPUT_MI_THUNK
34986 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
34987 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
34988 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
34990 #undef TARGET_ASM_FILE_START
34991 #define TARGET_ASM_FILE_START x86_file_start
34993 #undef TARGET_OPTION_OVERRIDE
34994 #define TARGET_OPTION_OVERRIDE ix86_option_override
34996 #undef TARGET_REGISTER_MOVE_COST
34997 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
34998 #undef TARGET_MEMORY_MOVE_COST
34999 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
35000 #undef TARGET_RTX_COSTS
35001 #define TARGET_RTX_COSTS ix86_rtx_costs
35002 #undef TARGET_ADDRESS_COST
35003 #define TARGET_ADDRESS_COST ix86_address_cost
35005 #undef TARGET_FIXED_CONDITION_CODE_REGS
35006 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
35007 #undef TARGET_CC_MODES_COMPATIBLE
35008 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
35010 #undef TARGET_MACHINE_DEPENDENT_REORG
35011 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
35013 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
35014 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
35016 #undef TARGET_BUILD_BUILTIN_VA_LIST
35017 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
35019 #undef TARGET_ENUM_VA_LIST_P
35020 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
35022 #undef TARGET_FN_ABI_VA_LIST
35023 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
35025 #undef TARGET_CANONICAL_VA_LIST_TYPE
35026 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
35028 #undef TARGET_EXPAND_BUILTIN_VA_START
35029 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
35031 #undef TARGET_MD_ASM_CLOBBERS
35032 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
35034 #undef TARGET_PROMOTE_PROTOTYPES
35035 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
35036 #undef TARGET_STRUCT_VALUE_RTX
35037 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
35038 #undef TARGET_SETUP_INCOMING_VARARGS
35039 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
35040 #undef TARGET_MUST_PASS_IN_STACK
35041 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
35042 #undef TARGET_FUNCTION_ARG_ADVANCE
35043 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
35044 #undef TARGET_FUNCTION_ARG
35045 #define TARGET_FUNCTION_ARG ix86_function_arg
35046 #undef TARGET_FUNCTION_ARG_BOUNDARY
35047 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
35048 #undef TARGET_PASS_BY_REFERENCE
35049 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
35050 #undef TARGET_INTERNAL_ARG_POINTER
35051 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
35052 #undef TARGET_UPDATE_STACK_BOUNDARY
35053 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
35054 #undef TARGET_GET_DRAP_RTX
35055 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
35056 #undef TARGET_STRICT_ARGUMENT_NAMING
35057 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
35058 #undef TARGET_STATIC_CHAIN
35059 #define TARGET_STATIC_CHAIN ix86_static_chain
35060 #undef TARGET_TRAMPOLINE_INIT
35061 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
35062 #undef TARGET_RETURN_POPS_ARGS
35063 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
35065 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
35066 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
35068 #undef TARGET_SCALAR_MODE_SUPPORTED_P
35069 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
35071 #undef TARGET_VECTOR_MODE_SUPPORTED_P
35072 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
35074 #undef TARGET_C_MODE_FOR_SUFFIX
35075 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
35078 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
35079 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
35082 #ifdef SUBTARGET_INSERT_ATTRIBUTES
35083 #undef TARGET_INSERT_ATTRIBUTES
35084 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
35087 #undef TARGET_MANGLE_TYPE
35088 #define TARGET_MANGLE_TYPE ix86_mangle_type
35090 #ifndef TARGET_MACHO
35091 #undef TARGET_STACK_PROTECT_FAIL
35092 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
35095 #undef TARGET_FUNCTION_VALUE
35096 #define TARGET_FUNCTION_VALUE ix86_function_value
35098 #undef TARGET_FUNCTION_VALUE_REGNO_P
35099 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
35101 #undef TARGET_PROMOTE_FUNCTION_MODE
35102 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
35104 #undef TARGET_SECONDARY_RELOAD
35105 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
35107 #undef TARGET_CLASS_MAX_NREGS
35108 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
35110 #undef TARGET_PREFERRED_RELOAD_CLASS
35111 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
35112 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
35113 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
35114 #undef TARGET_CLASS_LIKELY_SPILLED_P
35115 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
35117 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
35118 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
35119 ix86_builtin_vectorization_cost
35120 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
35121 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
35122 ix86_vectorize_builtin_vec_perm
35123 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
35124 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
35125 ix86_vectorize_builtin_vec_perm_ok
35126 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
35127 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
35128 ix86_preferred_simd_mode
35129 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
35130 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
35131 ix86_autovectorize_vector_sizes
35133 #undef TARGET_SET_CURRENT_FUNCTION
35134 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
35136 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
35137 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
35139 #undef TARGET_OPTION_SAVE
35140 #define TARGET_OPTION_SAVE ix86_function_specific_save
35142 #undef TARGET_OPTION_RESTORE
35143 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
35145 #undef TARGET_OPTION_PRINT
35146 #define TARGET_OPTION_PRINT ix86_function_specific_print
35148 #undef TARGET_CAN_INLINE_P
35149 #define TARGET_CAN_INLINE_P ix86_can_inline_p
35151 #undef TARGET_EXPAND_TO_RTL_HOOK
35152 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
35154 #undef TARGET_LEGITIMATE_ADDRESS_P
35155 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
35157 #undef TARGET_LEGITIMATE_CONSTANT_P
35158 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
35160 #undef TARGET_FRAME_POINTER_REQUIRED
35161 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
35163 #undef TARGET_CAN_ELIMINATE
35164 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
35166 #undef TARGET_EXTRA_LIVE_ON_ENTRY
35167 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
35169 #undef TARGET_ASM_CODE_END
35170 #define TARGET_ASM_CODE_END ix86_code_end
35172 #undef TARGET_CONDITIONAL_REGISTER_USAGE
35173 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
35176 #undef TARGET_INIT_LIBFUNCS
35177 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
35180 struct gcc_target targetm = TARGET_INITIALIZER;
35182 #include "gt-i386.h"