1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
62 #include "diagnostic.h"
64 enum upper_128bits_state
71 typedef struct block_info_def
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
78 /* TRUE if block has been processed. */
80 /* TRUE if block has been scanned. */
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev;
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88 enum call_avx256_state
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256 = -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256,
94 /* Callee passes 256bit AVX register. */
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
99 /* vzeroupper intrinsic. */
103 /* Check if a 256bit AVX register is referenced in stores. */
106 check_avx256_stores (rtx dest, const_rtx set, void *data)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
110 || (GET_CODE (set) == SET
111 && REG_P (SET_SRC (set))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
114 enum upper_128bits_state *state
115 = (enum upper_128bits_state *) data;
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
124 STATE is state of the upper 128bits of AVX registers at entry. */
127 move_or_delete_vzeroupper_2 (basic_block bb,
128 enum upper_128bits_state state)
131 rtx vzeroupper_insn = NULL_RTX;
136 if (BLOCK_INFO (bb)->unchanged)
139 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
142 BLOCK_INFO (bb)->state = state;
146 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
149 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
150 bb->index, BLOCK_INFO (bb)->state);
154 BLOCK_INFO (bb)->prev = state;
157 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
162 /* BB_END changes when it is deleted. */
163 bb_end = BB_END (bb);
165 while (insn != bb_end)
167 insn = NEXT_INSN (insn);
169 if (!NONDEBUG_INSN_P (insn))
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn) || CALL_P (insn))
175 if (!vzeroupper_insn)
178 if (PREV_INSN (insn) != vzeroupper_insn)
182 fprintf (dump_file, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file, PREV_INSN (insn));
184 fprintf (dump_file, "before:\n");
185 print_rtl_single (dump_file, insn);
187 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
190 vzeroupper_insn = NULL_RTX;
194 pat = PATTERN (insn);
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat) == UNSPEC_VOLATILE
198 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file, "Found vzeroupper:\n");
204 print_rtl_single (dump_file, insn);
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat) == PARALLEL
211 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
217 /* Delete pending vzeroupper insertion. */
220 delete_insn (vzeroupper_insn);
221 vzeroupper_insn = NULL_RTX;
224 else if (state != used)
226 note_stores (pat, check_avx256_stores, &state);
233 /* Process vzeroupper intrinsic. */
234 avx256 = INTVAL (XVECEXP (pat, 0, 0));
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256 == callee_return_avx256)
247 /* Remove unnecessary vzeroupper since upper 128bits are
251 fprintf (dump_file, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file, insn);
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
260 if (avx256 != callee_return_pass_avx256)
263 if (avx256 == callee_return_pass_avx256
264 || avx256 == callee_pass_avx256)
266 /* Must remove vzeroupper since callee passes in 256bit
270 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file, insn);
277 vzeroupper_insn = insn;
283 BLOCK_INFO (bb)->state = state;
284 BLOCK_INFO (bb)->unchanged = unchanged;
285 BLOCK_INFO (bb)->scanned = true;
288 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb->index, unchanged ? "unchanged" : "changed",
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
299 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
303 enum upper_128bits_state state, old_state, new_state;
307 fprintf (dump_file, " Process [bb %i]: status: %d\n",
308 block->index, BLOCK_INFO (block)->processed);
310 if (BLOCK_INFO (block)->processed)
315 /* Check all predecessor edges of this block. */
316 seen_unknown = false;
317 FOR_EACH_EDGE (e, ei, block->preds)
321 switch (BLOCK_INFO (e->src)->state)
324 if (!unknown_is_unused)
338 old_state = BLOCK_INFO (block)->state;
339 move_or_delete_vzeroupper_2 (block, state);
340 new_state = BLOCK_INFO (block)->state;
342 if (state != unknown || new_state == used)
343 BLOCK_INFO (block)->processed = true;
345 /* Need to rescan if the upper 128bits of AVX registers are changed
347 if (new_state != old_state)
349 if (new_state == used)
350 cfun->machine->rescan_vzeroupper_p = 1;
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
362 move_or_delete_vzeroupper (void)
367 fibheap_t worklist, pending, fibheap_swap;
368 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def));
376 /* Process outgoing edges of entry point. */
378 fprintf (dump_file, "Process outgoing edges of entry point\n");
380 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
382 move_or_delete_vzeroupper_2 (e->dest,
383 cfun->machine->caller_pass_avx256_p
385 BLOCK_INFO (e->dest)->processed = true;
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
391 bb_order = XNEWVEC (int, last_basic_block);
392 pre_and_rev_post_order_compute (NULL, rc_order, false);
393 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
394 bb_order[rc_order[i]] = i;
397 worklist = fibheap_new ();
398 pending = fibheap_new ();
399 visited = sbitmap_alloc (last_basic_block);
400 in_worklist = sbitmap_alloc (last_basic_block);
401 in_pending = sbitmap_alloc (last_basic_block);
402 sbitmap_zero (in_worklist);
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending);
407 if (BLOCK_INFO (bb)->processed)
408 RESET_BIT (in_pending, bb->index);
411 move_or_delete_vzeroupper_1 (bb, false);
412 fibheap_insert (pending, bb_order[bb->index], bb);
416 fprintf (dump_file, "Check remaining basic blocks\n");
418 while (!fibheap_empty (pending))
420 fibheap_swap = pending;
422 worklist = fibheap_swap;
423 sbitmap_swap = in_pending;
424 in_pending = in_worklist;
425 in_worklist = sbitmap_swap;
427 sbitmap_zero (visited);
429 cfun->machine->rescan_vzeroupper_p = 0;
431 while (!fibheap_empty (worklist))
433 bb = (basic_block) fibheap_extract_min (worklist);
434 RESET_BIT (in_worklist, bb->index);
435 gcc_assert (!TEST_BIT (visited, bb->index));
436 if (!TEST_BIT (visited, bb->index))
440 SET_BIT (visited, bb->index);
442 if (move_or_delete_vzeroupper_1 (bb, false))
443 FOR_EACH_EDGE (e, ei, bb->succs)
445 if (e->dest == EXIT_BLOCK_PTR
446 || BLOCK_INFO (e->dest)->processed)
449 if (TEST_BIT (visited, e->dest->index))
451 if (!TEST_BIT (in_pending, e->dest->index))
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending, e->dest->index);
455 fibheap_insert (pending,
456 bb_order[e->dest->index],
460 else if (!TEST_BIT (in_worklist, e->dest->index))
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist, e->dest->index);
464 fibheap_insert (worklist, bb_order[e->dest->index],
471 if (!cfun->machine->rescan_vzeroupper_p)
476 fibheap_delete (worklist);
477 fibheap_delete (pending);
478 sbitmap_free (visited);
479 sbitmap_free (in_worklist);
480 sbitmap_free (in_pending);
483 fprintf (dump_file, "Process remaining basic blocks\n");
486 move_or_delete_vzeroupper_1 (bb, true);
488 free_aux_for_blocks ();
491 static rtx legitimize_dllimport_symbol (rtx, bool);
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
512 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
566 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
567 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
581 /* Processor costs (relative to an add) */
583 struct processor_costs i386_cost = { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
636 DUMMY_STRINGOP_ALGS},
637 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs i486_cost = { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
708 DUMMY_STRINGOP_ALGS},
709 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
710 DUMMY_STRINGOP_ALGS},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
725 struct processor_costs pentium_cost = {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
778 DUMMY_STRINGOP_ALGS},
779 {{libcall, {{-1, rep_prefix_4_byte}}},
780 DUMMY_STRINGOP_ALGS},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
795 struct processor_costs pentiumpro_cost = {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
853 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
854 DUMMY_STRINGOP_ALGS},
855 {{rep_prefix_4_byte, {{1024, unrolled_loop},
856 {8192, rep_prefix_4_byte}, {-1, libcall}}},
857 DUMMY_STRINGOP_ALGS},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
872 struct processor_costs geode_cost = {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
926 DUMMY_STRINGOP_ALGS},
927 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
928 DUMMY_STRINGOP_ALGS},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
943 struct processor_costs k6_cost = {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
999 DUMMY_STRINGOP_ALGS},
1000 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1001 DUMMY_STRINGOP_ALGS},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1016 struct processor_costs athlon_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1072 DUMMY_STRINGOP_ALGS},
1073 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1074 DUMMY_STRINGOP_ALGS},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1089 struct processor_costs k8_cost = {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1150 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1151 {{libcall, {{8, loop}, {24, unrolled_loop},
1152 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1153 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1167 struct processor_costs amdfam10_cost = {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1212 MOVD reg64, xmmreg Double FADD 3
1214 MOVD reg32, xmmreg Double FADD 3
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1237 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1238 {{libcall, {{8, loop}, {24, unrolled_loop},
1239 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1240 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1254 struct processor_costs bdver1_cost = {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1299 MOVD reg64, xmmreg Double FADD 3
1301 MOVD reg32, xmmreg Double FADD 3
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1324 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1325 {{libcall, {{8, loop}, {24, unrolled_loop},
1326 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1327 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1341 struct processor_costs btver1_cost = {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (2), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (3), /* SI */
1349 COSTS_N_INSNS (4), /* DI */
1350 COSTS_N_INSNS (5)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1361 4, /* cost for loading QImode using movzbl */
1362 {3, 4, 3}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {3, 4, 3}, /* cost of storing integer registers */
1366 4, /* cost of reg,reg fld/fst */
1367 {4, 4, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {6, 6, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {3, 3}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 3}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 5}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 3, /* MMX or SSE register to integer */
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1386 MOVD reg64, xmmreg Double FADD 3
1388 MOVD reg32, xmmreg Double FADD 3
1390 32, /* size of l1 cache. */
1391 512, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 100, /* number of parallel prefetches */
1394 2, /* Branch cost */
1395 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1396 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1397 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1398 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1399 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1400 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1402 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1403 very small blocks it is better to use loop. For large blocks, libcall can
1404 do nontemporary accesses and beat inline considerably. */
1405 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1406 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1407 {{libcall, {{8, loop}, {24, unrolled_loop},
1408 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1409 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1410 4, /* scalar_stmt_cost. */
1411 2, /* scalar load_cost. */
1412 2, /* scalar_store_cost. */
1413 6, /* vec_stmt_cost. */
1414 0, /* vec_to_scalar_cost. */
1415 2, /* scalar_to_vec_cost. */
1416 2, /* vec_align_load_cost. */
1417 2, /* vec_unalign_load_cost. */
1418 2, /* vec_store_cost. */
1419 2, /* cond_taken_branch_cost. */
1420 1, /* cond_not_taken_branch_cost. */
1424 struct processor_costs pentium4_cost = {
1425 COSTS_N_INSNS (1), /* cost of an add instruction */
1426 COSTS_N_INSNS (3), /* cost of a lea instruction */
1427 COSTS_N_INSNS (4), /* variable shift costs */
1428 COSTS_N_INSNS (4), /* constant shift costs */
1429 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1430 COSTS_N_INSNS (15), /* HI */
1431 COSTS_N_INSNS (15), /* SI */
1432 COSTS_N_INSNS (15), /* DI */
1433 COSTS_N_INSNS (15)}, /* other */
1434 0, /* cost of multiply per each bit set */
1435 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1436 COSTS_N_INSNS (56), /* HI */
1437 COSTS_N_INSNS (56), /* SI */
1438 COSTS_N_INSNS (56), /* DI */
1439 COSTS_N_INSNS (56)}, /* other */
1440 COSTS_N_INSNS (1), /* cost of movsx */
1441 COSTS_N_INSNS (1), /* cost of movzx */
1442 16, /* "large" insn */
1444 2, /* cost for loading QImode using movzbl */
1445 {4, 5, 4}, /* cost of loading integer registers
1446 in QImode, HImode and SImode.
1447 Relative to reg-reg move (2). */
1448 {2, 3, 2}, /* cost of storing integer registers */
1449 2, /* cost of reg,reg fld/fst */
1450 {2, 2, 6}, /* cost of loading fp registers
1451 in SFmode, DFmode and XFmode */
1452 {4, 4, 6}, /* cost of storing fp registers
1453 in SFmode, DFmode and XFmode */
1454 2, /* cost of moving MMX register */
1455 {2, 2}, /* cost of loading MMX registers
1456 in SImode and DImode */
1457 {2, 2}, /* cost of storing MMX registers
1458 in SImode and DImode */
1459 12, /* cost of moving SSE register */
1460 {12, 12, 12}, /* cost of loading SSE registers
1461 in SImode, DImode and TImode */
1462 {2, 2, 8}, /* cost of storing SSE registers
1463 in SImode, DImode and TImode */
1464 10, /* MMX or SSE register to integer */
1465 8, /* size of l1 cache. */
1466 256, /* size of l2 cache. */
1467 64, /* size of prefetch block */
1468 6, /* number of parallel prefetches */
1469 2, /* Branch cost */
1470 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1471 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1472 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1473 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1474 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1475 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1476 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1477 DUMMY_STRINGOP_ALGS},
1478 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1480 DUMMY_STRINGOP_ALGS},
1481 1, /* scalar_stmt_cost. */
1482 1, /* scalar load_cost. */
1483 1, /* scalar_store_cost. */
1484 1, /* vec_stmt_cost. */
1485 1, /* vec_to_scalar_cost. */
1486 1, /* scalar_to_vec_cost. */
1487 1, /* vec_align_load_cost. */
1488 2, /* vec_unalign_load_cost. */
1489 1, /* vec_store_cost. */
1490 3, /* cond_taken_branch_cost. */
1491 1, /* cond_not_taken_branch_cost. */
1495 struct processor_costs nocona_cost = {
1496 COSTS_N_INSNS (1), /* cost of an add instruction */
1497 COSTS_N_INSNS (1), /* cost of a lea instruction */
1498 COSTS_N_INSNS (1), /* variable shift costs */
1499 COSTS_N_INSNS (1), /* constant shift costs */
1500 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1501 COSTS_N_INSNS (10), /* HI */
1502 COSTS_N_INSNS (10), /* SI */
1503 COSTS_N_INSNS (10), /* DI */
1504 COSTS_N_INSNS (10)}, /* other */
1505 0, /* cost of multiply per each bit set */
1506 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1507 COSTS_N_INSNS (66), /* HI */
1508 COSTS_N_INSNS (66), /* SI */
1509 COSTS_N_INSNS (66), /* DI */
1510 COSTS_N_INSNS (66)}, /* other */
1511 COSTS_N_INSNS (1), /* cost of movsx */
1512 COSTS_N_INSNS (1), /* cost of movzx */
1513 16, /* "large" insn */
1514 17, /* MOVE_RATIO */
1515 4, /* cost for loading QImode using movzbl */
1516 {4, 4, 4}, /* cost of loading integer registers
1517 in QImode, HImode and SImode.
1518 Relative to reg-reg move (2). */
1519 {4, 4, 4}, /* cost of storing integer registers */
1520 3, /* cost of reg,reg fld/fst */
1521 {12, 12, 12}, /* cost of loading fp registers
1522 in SFmode, DFmode and XFmode */
1523 {4, 4, 4}, /* cost of storing fp registers
1524 in SFmode, DFmode and XFmode */
1525 6, /* cost of moving MMX register */
1526 {12, 12}, /* cost of loading MMX registers
1527 in SImode and DImode */
1528 {12, 12}, /* cost of storing MMX registers
1529 in SImode and DImode */
1530 6, /* cost of moving SSE register */
1531 {12, 12, 12}, /* cost of loading SSE registers
1532 in SImode, DImode and TImode */
1533 {12, 12, 12}, /* cost of storing SSE registers
1534 in SImode, DImode and TImode */
1535 8, /* MMX or SSE register to integer */
1536 8, /* size of l1 cache. */
1537 1024, /* size of l2 cache. */
1538 128, /* size of prefetch block */
1539 8, /* number of parallel prefetches */
1540 1, /* Branch cost */
1541 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1542 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1543 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1544 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1545 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1546 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1547 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1548 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1549 {100000, unrolled_loop}, {-1, libcall}}}},
1550 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1552 {libcall, {{24, loop}, {64, unrolled_loop},
1553 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1554 1, /* scalar_stmt_cost. */
1555 1, /* scalar load_cost. */
1556 1, /* scalar_store_cost. */
1557 1, /* vec_stmt_cost. */
1558 1, /* vec_to_scalar_cost. */
1559 1, /* scalar_to_vec_cost. */
1560 1, /* vec_align_load_cost. */
1561 2, /* vec_unalign_load_cost. */
1562 1, /* vec_store_cost. */
1563 3, /* cond_taken_branch_cost. */
1564 1, /* cond_not_taken_branch_cost. */
1568 struct processor_costs atom_cost = {
1569 COSTS_N_INSNS (1), /* cost of an add instruction */
1570 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1571 COSTS_N_INSNS (1), /* variable shift costs */
1572 COSTS_N_INSNS (1), /* constant shift costs */
1573 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1574 COSTS_N_INSNS (4), /* HI */
1575 COSTS_N_INSNS (3), /* SI */
1576 COSTS_N_INSNS (4), /* DI */
1577 COSTS_N_INSNS (2)}, /* other */
1578 0, /* cost of multiply per each bit set */
1579 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1580 COSTS_N_INSNS (26), /* HI */
1581 COSTS_N_INSNS (42), /* SI */
1582 COSTS_N_INSNS (74), /* DI */
1583 COSTS_N_INSNS (74)}, /* other */
1584 COSTS_N_INSNS (1), /* cost of movsx */
1585 COSTS_N_INSNS (1), /* cost of movzx */
1586 8, /* "large" insn */
1587 17, /* MOVE_RATIO */
1588 2, /* cost for loading QImode using movzbl */
1589 {4, 4, 4}, /* cost of loading integer registers
1590 in QImode, HImode and SImode.
1591 Relative to reg-reg move (2). */
1592 {4, 4, 4}, /* cost of storing integer registers */
1593 4, /* cost of reg,reg fld/fst */
1594 {12, 12, 12}, /* cost of loading fp registers
1595 in SFmode, DFmode and XFmode */
1596 {6, 6, 8}, /* cost of storing fp registers
1597 in SFmode, DFmode and XFmode */
1598 2, /* cost of moving MMX register */
1599 {8, 8}, /* cost of loading MMX registers
1600 in SImode and DImode */
1601 {8, 8}, /* cost of storing MMX registers
1602 in SImode and DImode */
1603 2, /* cost of moving SSE register */
1604 {8, 8, 8}, /* cost of loading SSE registers
1605 in SImode, DImode and TImode */
1606 {8, 8, 8}, /* cost of storing SSE registers
1607 in SImode, DImode and TImode */
1608 5, /* MMX or SSE register to integer */
1609 32, /* size of l1 cache. */
1610 256, /* size of l2 cache. */
1611 64, /* size of prefetch block */
1612 6, /* number of parallel prefetches */
1613 3, /* Branch cost */
1614 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1615 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1616 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1617 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1618 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1619 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1620 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1621 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1622 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1623 {{libcall, {{8, loop}, {15, unrolled_loop},
1624 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1625 {libcall, {{24, loop}, {32, unrolled_loop},
1626 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1627 1, /* scalar_stmt_cost. */
1628 1, /* scalar load_cost. */
1629 1, /* scalar_store_cost. */
1630 1, /* vec_stmt_cost. */
1631 1, /* vec_to_scalar_cost. */
1632 1, /* scalar_to_vec_cost. */
1633 1, /* vec_align_load_cost. */
1634 2, /* vec_unalign_load_cost. */
1635 1, /* vec_store_cost. */
1636 3, /* cond_taken_branch_cost. */
1637 1, /* cond_not_taken_branch_cost. */
1640 /* Generic64 should produce code tuned for Nocona and K8. */
1642 struct processor_costs generic64_cost = {
1643 COSTS_N_INSNS (1), /* cost of an add instruction */
1644 /* On all chips taken into consideration lea is 2 cycles and more. With
1645 this cost however our current implementation of synth_mult results in
1646 use of unnecessary temporary registers causing regression on several
1647 SPECfp benchmarks. */
1648 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1649 COSTS_N_INSNS (1), /* variable shift costs */
1650 COSTS_N_INSNS (1), /* constant shift costs */
1651 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1652 COSTS_N_INSNS (4), /* HI */
1653 COSTS_N_INSNS (3), /* SI */
1654 COSTS_N_INSNS (4), /* DI */
1655 COSTS_N_INSNS (2)}, /* other */
1656 0, /* cost of multiply per each bit set */
1657 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1658 COSTS_N_INSNS (26), /* HI */
1659 COSTS_N_INSNS (42), /* SI */
1660 COSTS_N_INSNS (74), /* DI */
1661 COSTS_N_INSNS (74)}, /* other */
1662 COSTS_N_INSNS (1), /* cost of movsx */
1663 COSTS_N_INSNS (1), /* cost of movzx */
1664 8, /* "large" insn */
1665 17, /* MOVE_RATIO */
1666 4, /* cost for loading QImode using movzbl */
1667 {4, 4, 4}, /* cost of loading integer registers
1668 in QImode, HImode and SImode.
1669 Relative to reg-reg move (2). */
1670 {4, 4, 4}, /* cost of storing integer registers */
1671 4, /* cost of reg,reg fld/fst */
1672 {12, 12, 12}, /* cost of loading fp registers
1673 in SFmode, DFmode and XFmode */
1674 {6, 6, 8}, /* cost of storing fp registers
1675 in SFmode, DFmode and XFmode */
1676 2, /* cost of moving MMX register */
1677 {8, 8}, /* cost of loading MMX registers
1678 in SImode and DImode */
1679 {8, 8}, /* cost of storing MMX registers
1680 in SImode and DImode */
1681 2, /* cost of moving SSE register */
1682 {8, 8, 8}, /* cost of loading SSE registers
1683 in SImode, DImode and TImode */
1684 {8, 8, 8}, /* cost of storing SSE registers
1685 in SImode, DImode and TImode */
1686 5, /* MMX or SSE register to integer */
1687 32, /* size of l1 cache. */
1688 512, /* size of l2 cache. */
1689 64, /* size of prefetch block */
1690 6, /* number of parallel prefetches */
1691 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1692 value is increased to perhaps more appropriate value of 5. */
1693 3, /* Branch cost */
1694 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1695 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1696 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1697 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1698 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1699 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1700 {DUMMY_STRINGOP_ALGS,
1701 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1702 {DUMMY_STRINGOP_ALGS,
1703 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1704 1, /* scalar_stmt_cost. */
1705 1, /* scalar load_cost. */
1706 1, /* scalar_store_cost. */
1707 1, /* vec_stmt_cost. */
1708 1, /* vec_to_scalar_cost. */
1709 1, /* scalar_to_vec_cost. */
1710 1, /* vec_align_load_cost. */
1711 2, /* vec_unalign_load_cost. */
1712 1, /* vec_store_cost. */
1713 3, /* cond_taken_branch_cost. */
1714 1, /* cond_not_taken_branch_cost. */
1717 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1720 struct processor_costs generic32_cost = {
1721 COSTS_N_INSNS (1), /* cost of an add instruction */
1722 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1723 COSTS_N_INSNS (1), /* variable shift costs */
1724 COSTS_N_INSNS (1), /* constant shift costs */
1725 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1726 COSTS_N_INSNS (4), /* HI */
1727 COSTS_N_INSNS (3), /* SI */
1728 COSTS_N_INSNS (4), /* DI */
1729 COSTS_N_INSNS (2)}, /* other */
1730 0, /* cost of multiply per each bit set */
1731 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1732 COSTS_N_INSNS (26), /* HI */
1733 COSTS_N_INSNS (42), /* SI */
1734 COSTS_N_INSNS (74), /* DI */
1735 COSTS_N_INSNS (74)}, /* other */
1736 COSTS_N_INSNS (1), /* cost of movsx */
1737 COSTS_N_INSNS (1), /* cost of movzx */
1738 8, /* "large" insn */
1739 17, /* MOVE_RATIO */
1740 4, /* cost for loading QImode using movzbl */
1741 {4, 4, 4}, /* cost of loading integer registers
1742 in QImode, HImode and SImode.
1743 Relative to reg-reg move (2). */
1744 {4, 4, 4}, /* cost of storing integer registers */
1745 4, /* cost of reg,reg fld/fst */
1746 {12, 12, 12}, /* cost of loading fp registers
1747 in SFmode, DFmode and XFmode */
1748 {6, 6, 8}, /* cost of storing fp registers
1749 in SFmode, DFmode and XFmode */
1750 2, /* cost of moving MMX register */
1751 {8, 8}, /* cost of loading MMX registers
1752 in SImode and DImode */
1753 {8, 8}, /* cost of storing MMX registers
1754 in SImode and DImode */
1755 2, /* cost of moving SSE register */
1756 {8, 8, 8}, /* cost of loading SSE registers
1757 in SImode, DImode and TImode */
1758 {8, 8, 8}, /* cost of storing SSE registers
1759 in SImode, DImode and TImode */
1760 5, /* MMX or SSE register to integer */
1761 32, /* size of l1 cache. */
1762 256, /* size of l2 cache. */
1763 64, /* size of prefetch block */
1764 6, /* number of parallel prefetches */
1765 3, /* Branch cost */
1766 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1767 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1768 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1769 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1770 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1771 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1772 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1773 DUMMY_STRINGOP_ALGS},
1774 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1775 DUMMY_STRINGOP_ALGS},
1776 1, /* scalar_stmt_cost. */
1777 1, /* scalar load_cost. */
1778 1, /* scalar_store_cost. */
1779 1, /* vec_stmt_cost. */
1780 1, /* vec_to_scalar_cost. */
1781 1, /* scalar_to_vec_cost. */
1782 1, /* vec_align_load_cost. */
1783 2, /* vec_unalign_load_cost. */
1784 1, /* vec_store_cost. */
1785 3, /* cond_taken_branch_cost. */
1786 1, /* cond_not_taken_branch_cost. */
1789 const struct processor_costs *ix86_cost = &pentium_cost;
1791 /* Processor feature/optimization bitmasks. */
1792 #define m_386 (1<<PROCESSOR_I386)
1793 #define m_486 (1<<PROCESSOR_I486)
1794 #define m_PENT (1<<PROCESSOR_PENTIUM)
1795 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1796 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1797 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1798 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1799 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1800 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1801 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1802 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1803 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1804 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1805 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1806 #define m_ATOM (1<<PROCESSOR_ATOM)
1808 #define m_GEODE (1<<PROCESSOR_GEODE)
1809 #define m_K6 (1<<PROCESSOR_K6)
1810 #define m_K6_GEODE (m_K6 | m_GEODE)
1811 #define m_K8 (1<<PROCESSOR_K8)
1812 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1813 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1814 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1815 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1816 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1817 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1 | m_BTVER1)
1819 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1820 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1822 /* Generic instruction choice should be common subset of supported CPUs
1823 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1824 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1826 /* Feature tests against the various tunings. */
1827 unsigned char ix86_tune_features[X86_TUNE_LAST];
1829 /* Feature tests against the various tunings used to create ix86_tune_features
1830 based on the processor mask. */
1831 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1832 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1833 negatively, so enabling for Generic64 seems like good code size
1834 tradeoff. We can't enable it for 32bit generic because it does not
1835 work well with PPro base chips. */
1836 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2I7_64 | m_GENERIC64,
1838 /* X86_TUNE_PUSH_MEMORY */
1839 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1840 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1842 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1845 /* X86_TUNE_UNROLL_STRLEN */
1846 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1847 | m_CORE2I7 | m_GENERIC,
1849 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1850 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1851 | m_CORE2I7 | m_GENERIC,
1853 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1854 on simulation result. But after P4 was made, no performance benefit
1855 was observed with branch hints. It also increases the code size.
1856 As a result, icc never generates branch hints. */
1859 /* X86_TUNE_DOUBLE_WITH_ADD */
1862 /* X86_TUNE_USE_SAHF */
1863 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_BTVER1
1864 | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1866 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1867 partial dependencies. */
1868 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1869 | m_CORE2I7 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1871 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1872 register stalls on Generic32 compilation setting as well. However
1873 in current implementation the partial register stalls are not eliminated
1874 very well - they can be introduced via subregs synthesized by combine
1875 and can happen in caller/callee saving sequences. Because this option
1876 pays back little on PPro based chips and is in conflict with partial reg
1877 dependencies used by Athlon/P4 based chips, it is better to leave it off
1878 for generic32 for now. */
1881 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1882 m_CORE2I7 | m_GENERIC,
1884 /* X86_TUNE_USE_HIMODE_FIOP */
1885 m_386 | m_486 | m_K6_GEODE,
1887 /* X86_TUNE_USE_SIMODE_FIOP */
1888 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2I7 | m_GENERIC),
1890 /* X86_TUNE_USE_MOV0 */
1893 /* X86_TUNE_USE_CLTD */
1894 ~(m_PENT | m_ATOM | m_K6 | m_CORE2I7 | m_GENERIC),
1896 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1899 /* X86_TUNE_SPLIT_LONG_MOVES */
1902 /* X86_TUNE_READ_MODIFY_WRITE */
1905 /* X86_TUNE_READ_MODIFY */
1908 /* X86_TUNE_PROMOTE_QIMODE */
1909 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1910 | m_CORE2I7 | m_GENERIC /* | m_PENT4 ? */,
1912 /* X86_TUNE_FAST_PREFIX */
1913 ~(m_PENT | m_486 | m_386),
1915 /* X86_TUNE_SINGLE_STRINGOP */
1916 m_386 | m_PENT4 | m_NOCONA,
1918 /* X86_TUNE_QIMODE_MATH */
1921 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1922 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1923 might be considered for Generic32 if our scheme for avoiding partial
1924 stalls was more effective. */
1927 /* X86_TUNE_PROMOTE_QI_REGS */
1930 /* X86_TUNE_PROMOTE_HI_REGS */
1933 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1934 over esp addition. */
1935 m_386 | m_486 | m_PENT | m_PPRO,
1937 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1938 over esp addition. */
1941 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1942 over esp subtraction. */
1943 m_386 | m_486 | m_PENT | m_K6_GEODE,
1945 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1946 over esp subtraction. */
1947 m_PENT | m_K6_GEODE,
1949 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1950 for DFmode copies */
1951 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
1952 | m_GENERIC | m_GEODE),
1954 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1955 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1957 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1958 conflict here in between PPro/Pentium4 based chips that thread 128bit
1959 SSE registers as single units versus K8 based chips that divide SSE
1960 registers to two 64bit halves. This knob promotes all store destinations
1961 to be 128bit to allow register renaming on 128bit SSE units, but usually
1962 results in one extra microop on 64bit SSE units. Experimental results
1963 shows that disabling this option on P4 brings over 20% SPECfp regression,
1964 while enabling it on K8 brings roughly 2.4% regression that can be partly
1965 masked by careful scheduling of moves. */
1966 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 | m_GENERIC
1967 | m_AMDFAM10 | m_BDVER1,
1969 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1970 m_AMDFAM10 | m_BDVER1 | m_BTVER1 | m_COREI7,
1972 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1973 m_BDVER1 | m_COREI7,
1975 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1978 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1979 are resolved on SSE register parts instead of whole registers, so we may
1980 maintain just lower part of scalar values in proper format leaving the
1981 upper part undefined. */
1984 /* X86_TUNE_SSE_TYPELESS_STORES */
1987 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1988 m_PPRO | m_PENT4 | m_NOCONA,
1990 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1991 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1993 /* X86_TUNE_PROLOGUE_USING_MOVE */
1994 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1996 /* X86_TUNE_EPILOGUE_USING_MOVE */
1997 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1999 /* X86_TUNE_SHIFT1 */
2002 /* X86_TUNE_USE_FFREEP */
2005 /* X86_TUNE_INTER_UNIT_MOVES */
2006 ~(m_AMD_MULTIPLE | m_GENERIC),
2008 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2009 ~(m_AMDFAM10 | m_BDVER1),
2011 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2012 than 4 branch instructions in the 16 byte window. */
2013 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2I7
2016 /* X86_TUNE_SCHEDULE */
2017 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2I7
2020 /* X86_TUNE_USE_BT */
2021 m_AMD_MULTIPLE | m_ATOM | m_CORE2I7 | m_GENERIC,
2023 /* X86_TUNE_USE_INCDEC */
2024 ~(m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC | m_ATOM),
2026 /* X86_TUNE_PAD_RETURNS */
2027 m_AMD_MULTIPLE | m_CORE2I7 | m_GENERIC,
2029 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2032 /* X86_TUNE_EXT_80387_CONSTANTS */
2033 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
2034 | m_CORE2I7 | m_GENERIC,
2036 /* X86_TUNE_SHORTEN_X87_SSE */
2039 /* X86_TUNE_AVOID_VECTOR_DECODE */
2040 m_K8 | m_CORE2I7_64 | m_GENERIC64,
2042 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2043 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2046 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2047 vector path on AMD machines. */
2048 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2050 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2052 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2054 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2058 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2059 but one byte longer. */
2062 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2063 operand that cannot be represented using a modRM byte. The XOR
2064 replacement is long decoded, so this split helps here as well. */
2067 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2069 m_AMDFAM10 | m_CORE2I7 | m_GENERIC,
2071 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2072 from integer to FP. */
2075 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2076 with a subsequent conditional jump instruction into a single
2077 compare-and-branch uop. */
2080 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2081 will impact LEA instruction selection. */
2084 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2088 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2089 at -O3. For the moment, the prefetching seems badly tuned for Intel
2091 m_K6_GEODE | m_AMD_MULTIPLE
2094 /* Feature tests against the various architecture variations. */
2095 unsigned char ix86_arch_features[X86_ARCH_LAST];
2097 /* Feature tests against the various architecture variations, used to create
2098 ix86_arch_features based on the processor mask. */
2099 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2100 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2101 ~(m_386 | m_486 | m_PENT | m_K6),
2103 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2106 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2109 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2112 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2116 static const unsigned int x86_accumulate_outgoing_args
2117 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
2120 static const unsigned int x86_arch_always_fancy_math_387
2121 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
2122 | m_NOCONA | m_CORE2I7 | m_GENERIC;
2124 /* In case the average insn count for single function invocation is
2125 lower than this constant, emit fast (but longer) prologue and
2127 #define FAST_PROLOGUE_INSN_COUNT 20
2129 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2130 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2131 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2132 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2134 /* Array of the smallest class containing reg number REGNO, indexed by
2135 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2137 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2139 /* ax, dx, cx, bx */
2140 AREG, DREG, CREG, BREG,
2141 /* si, di, bp, sp */
2142 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2144 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2145 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2148 /* flags, fpsr, fpcr, frame */
2149 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2151 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2154 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2157 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2158 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2159 /* SSE REX registers */
2160 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2164 /* The "default" register map used in 32bit mode. */
2166 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2168 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2169 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2170 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2171 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2172 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2173 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2174 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2177 /* The "default" register map used in 64bit mode. */
2179 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2181 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2182 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2183 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2184 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2185 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2186 8,9,10,11,12,13,14,15, /* extended integer registers */
2187 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2190 /* Define the register numbers to be used in Dwarf debugging information.
2191 The SVR4 reference port C compiler uses the following register numbers
2192 in its Dwarf output code:
2193 0 for %eax (gcc regno = 0)
2194 1 for %ecx (gcc regno = 2)
2195 2 for %edx (gcc regno = 1)
2196 3 for %ebx (gcc regno = 3)
2197 4 for %esp (gcc regno = 7)
2198 5 for %ebp (gcc regno = 6)
2199 6 for %esi (gcc regno = 4)
2200 7 for %edi (gcc regno = 5)
2201 The following three DWARF register numbers are never generated by
2202 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2203 believes these numbers have these meanings.
2204 8 for %eip (no gcc equivalent)
2205 9 for %eflags (gcc regno = 17)
2206 10 for %trapno (no gcc equivalent)
2207 It is not at all clear how we should number the FP stack registers
2208 for the x86 architecture. If the version of SDB on x86/svr4 were
2209 a bit less brain dead with respect to floating-point then we would
2210 have a precedent to follow with respect to DWARF register numbers
2211 for x86 FP registers, but the SDB on x86/svr4 is so completely
2212 broken with respect to FP registers that it is hardly worth thinking
2213 of it as something to strive for compatibility with.
2214 The version of x86/svr4 SDB I have at the moment does (partially)
2215 seem to believe that DWARF register number 11 is associated with
2216 the x86 register %st(0), but that's about all. Higher DWARF
2217 register numbers don't seem to be associated with anything in
2218 particular, and even for DWARF regno 11, SDB only seems to under-
2219 stand that it should say that a variable lives in %st(0) (when
2220 asked via an `=' command) if we said it was in DWARF regno 11,
2221 but SDB still prints garbage when asked for the value of the
2222 variable in question (via a `/' command).
2223 (Also note that the labels SDB prints for various FP stack regs
2224 when doing an `x' command are all wrong.)
2225 Note that these problems generally don't affect the native SVR4
2226 C compiler because it doesn't allow the use of -O with -g and
2227 because when it is *not* optimizing, it allocates a memory
2228 location for each floating-point variable, and the memory
2229 location is what gets described in the DWARF AT_location
2230 attribute for the variable in question.
2231 Regardless of the severe mental illness of the x86/svr4 SDB, we
2232 do something sensible here and we use the following DWARF
2233 register numbers. Note that these are all stack-top-relative
2235 11 for %st(0) (gcc regno = 8)
2236 12 for %st(1) (gcc regno = 9)
2237 13 for %st(2) (gcc regno = 10)
2238 14 for %st(3) (gcc regno = 11)
2239 15 for %st(4) (gcc regno = 12)
2240 16 for %st(5) (gcc regno = 13)
2241 17 for %st(6) (gcc regno = 14)
2242 18 for %st(7) (gcc regno = 15)
2244 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2246 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2247 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2248 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2249 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2250 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2251 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2252 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2255 /* Define parameter passing and return registers. */
2257 static int const x86_64_int_parameter_registers[6] =
2259 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2262 static int const x86_64_ms_abi_int_parameter_registers[4] =
2264 CX_REG, DX_REG, R8_REG, R9_REG
2267 static int const x86_64_int_return_registers[4] =
2269 AX_REG, DX_REG, DI_REG, SI_REG
2272 /* Define the structure for the machine field in struct function. */
2274 struct GTY(()) stack_local_entry {
2275 unsigned short mode;
2278 struct stack_local_entry *next;
2281 /* Structure describing stack frame layout.
2282 Stack grows downward:
2288 saved static chain if ix86_static_chain_on_stack
2290 saved frame pointer if frame_pointer_needed
2291 <- HARD_FRAME_POINTER
2297 <- sse_regs_save_offset
2300 [va_arg registers] |
2304 [padding2] | = to_allocate
2313 int outgoing_arguments_size;
2314 HOST_WIDE_INT frame;
2316 /* The offsets relative to ARG_POINTER. */
2317 HOST_WIDE_INT frame_pointer_offset;
2318 HOST_WIDE_INT hard_frame_pointer_offset;
2319 HOST_WIDE_INT stack_pointer_offset;
2320 HOST_WIDE_INT hfp_save_offset;
2321 HOST_WIDE_INT reg_save_offset;
2322 HOST_WIDE_INT sse_reg_save_offset;
2324 /* When save_regs_using_mov is set, emit prologue using
2325 move instead of push instructions. */
2326 bool save_regs_using_mov;
2329 /* Which cpu are we scheduling for. */
2330 enum attr_cpu ix86_schedule;
2332 /* Which cpu are we optimizing for. */
2333 enum processor_type ix86_tune;
2335 /* Which instruction set architecture to use. */
2336 enum processor_type ix86_arch;
2338 /* true if sse prefetch instruction is not NOOP. */
2339 int x86_prefetch_sse;
2341 /* -mstackrealign option */
2342 static const char ix86_force_align_arg_pointer_string[]
2343 = "force_align_arg_pointer";
2345 static rtx (*ix86_gen_leave) (void);
2346 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2347 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2348 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2349 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2350 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2351 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2352 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2353 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2354 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2356 /* Preferred alignment for stack boundary in bits. */
2357 unsigned int ix86_preferred_stack_boundary;
2359 /* Alignment for incoming stack boundary in bits specified at
2361 static unsigned int ix86_user_incoming_stack_boundary;
2363 /* Default alignment for incoming stack boundary in bits. */
2364 static unsigned int ix86_default_incoming_stack_boundary;
2366 /* Alignment for incoming stack boundary in bits. */
2367 unsigned int ix86_incoming_stack_boundary;
2369 /* Calling abi specific va_list type nodes. */
2370 static GTY(()) tree sysv_va_list_type_node;
2371 static GTY(()) tree ms_va_list_type_node;
2373 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2374 char internal_label_prefix[16];
2375 int internal_label_prefix_len;
2377 /* Fence to use after loop using movnt. */
2380 /* Register class used for passing given 64bit part of the argument.
2381 These represent classes as documented by the PS ABI, with the exception
2382 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2383 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2385 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2386 whenever possible (upper half does contain padding). */
2387 enum x86_64_reg_class
2390 X86_64_INTEGER_CLASS,
2391 X86_64_INTEGERSI_CLASS,
2398 X86_64_COMPLEX_X87_CLASS,
2402 #define MAX_CLASSES 4
2404 /* Table of constants used by fldpi, fldln2, etc.... */
2405 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2406 static bool ext_80387_constants_init = 0;
2409 static struct machine_function * ix86_init_machine_status (void);
2410 static rtx ix86_function_value (const_tree, const_tree, bool);
2411 static bool ix86_function_value_regno_p (const unsigned int);
2412 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2414 static rtx ix86_static_chain (const_tree, bool);
2415 static int ix86_function_regparm (const_tree, const_tree);
2416 static void ix86_compute_frame_layout (struct ix86_frame *);
2417 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2419 static void ix86_add_new_builtins (int);
2420 static rtx ix86_expand_vec_perm_builtin (tree);
2421 static tree ix86_canonical_va_list_type (tree);
2422 static void predict_jump (int);
2423 static unsigned int split_stack_prologue_scratch_regno (void);
2424 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2426 enum ix86_function_specific_strings
2428 IX86_FUNCTION_SPECIFIC_ARCH,
2429 IX86_FUNCTION_SPECIFIC_TUNE,
2430 IX86_FUNCTION_SPECIFIC_MAX
2433 static char *ix86_target_string (int, int, const char *, const char *,
2434 enum fpmath_unit, bool);
2435 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2436 static void ix86_function_specific_save (struct cl_target_option *);
2437 static void ix86_function_specific_restore (struct cl_target_option *);
2438 static void ix86_function_specific_print (FILE *, int,
2439 struct cl_target_option *);
2440 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2441 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2442 struct gcc_options *);
2443 static bool ix86_can_inline_p (tree, tree);
2444 static void ix86_set_current_function (tree);
2445 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2447 static enum calling_abi ix86_function_abi (const_tree);
2450 #ifndef SUBTARGET32_DEFAULT_CPU
2451 #define SUBTARGET32_DEFAULT_CPU "i386"
2454 /* The svr4 ABI for the i386 says that records and unions are returned
2456 #ifndef DEFAULT_PCC_STRUCT_RETURN
2457 #define DEFAULT_PCC_STRUCT_RETURN 1
2460 /* Whether -mtune= or -march= were specified */
2461 static int ix86_tune_defaulted;
2462 static int ix86_arch_specified;
2464 /* Define a set of ISAs which are available when a given ISA is
2465 enabled. MMX and SSE ISAs are handled separately. */
2467 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2468 #define OPTION_MASK_ISA_3DNOW_SET \
2469 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2471 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2472 #define OPTION_MASK_ISA_SSE2_SET \
2473 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2474 #define OPTION_MASK_ISA_SSE3_SET \
2475 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2476 #define OPTION_MASK_ISA_SSSE3_SET \
2477 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2478 #define OPTION_MASK_ISA_SSE4_1_SET \
2479 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2480 #define OPTION_MASK_ISA_SSE4_2_SET \
2481 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2482 #define OPTION_MASK_ISA_AVX_SET \
2483 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2484 #define OPTION_MASK_ISA_FMA_SET \
2485 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2487 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2489 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2491 #define OPTION_MASK_ISA_SSE4A_SET \
2492 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2493 #define OPTION_MASK_ISA_FMA4_SET \
2494 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2495 | OPTION_MASK_ISA_AVX_SET)
2496 #define OPTION_MASK_ISA_XOP_SET \
2497 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2498 #define OPTION_MASK_ISA_LWP_SET \
2501 /* AES and PCLMUL need SSE2 because they use xmm registers */
2502 #define OPTION_MASK_ISA_AES_SET \
2503 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2504 #define OPTION_MASK_ISA_PCLMUL_SET \
2505 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2507 #define OPTION_MASK_ISA_ABM_SET \
2508 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2510 #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
2511 #define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
2512 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2513 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2514 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2515 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2516 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2518 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2519 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2520 #define OPTION_MASK_ISA_F16C_SET \
2521 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2523 /* Define a set of ISAs which aren't available when a given ISA is
2524 disabled. MMX and SSE ISAs are handled separately. */
2526 #define OPTION_MASK_ISA_MMX_UNSET \
2527 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2528 #define OPTION_MASK_ISA_3DNOW_UNSET \
2529 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2530 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2532 #define OPTION_MASK_ISA_SSE_UNSET \
2533 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2534 #define OPTION_MASK_ISA_SSE2_UNSET \
2535 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2536 #define OPTION_MASK_ISA_SSE3_UNSET \
2537 (OPTION_MASK_ISA_SSE3 \
2538 | OPTION_MASK_ISA_SSSE3_UNSET \
2539 | OPTION_MASK_ISA_SSE4A_UNSET )
2540 #define OPTION_MASK_ISA_SSSE3_UNSET \
2541 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2542 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2543 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2544 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2545 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2546 #define OPTION_MASK_ISA_AVX_UNSET \
2547 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2548 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2549 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2551 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2553 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2555 #define OPTION_MASK_ISA_SSE4A_UNSET \
2556 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2558 #define OPTION_MASK_ISA_FMA4_UNSET \
2559 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2560 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2561 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2563 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2564 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2565 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2566 #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
2567 #define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
2568 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2569 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2570 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2571 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2572 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2574 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2575 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2576 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2578 /* Vectorization library interface and handlers. */
2579 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2581 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2582 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2584 /* Processor target table, indexed by processor number */
2587 const struct processor_costs *cost; /* Processor costs */
2588 const int align_loop; /* Default alignments. */
2589 const int align_loop_max_skip;
2590 const int align_jump;
2591 const int align_jump_max_skip;
2592 const int align_func;
2595 static const struct ptt processor_target_table[PROCESSOR_max] =
2597 {&i386_cost, 4, 3, 4, 3, 4},
2598 {&i486_cost, 16, 15, 16, 15, 16},
2599 {&pentium_cost, 16, 7, 16, 7, 16},
2600 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2601 {&geode_cost, 0, 0, 0, 0, 0},
2602 {&k6_cost, 32, 7, 32, 7, 32},
2603 {&athlon_cost, 16, 7, 16, 7, 16},
2604 {&pentium4_cost, 0, 0, 0, 0, 0},
2605 {&k8_cost, 16, 7, 16, 7, 16},
2606 {&nocona_cost, 0, 0, 0, 0, 0},
2607 /* Core 2 32-bit. */
2608 {&generic32_cost, 16, 10, 16, 10, 16},
2609 /* Core 2 64-bit. */
2610 {&generic64_cost, 16, 10, 16, 10, 16},
2611 /* Core i7 32-bit. */
2612 {&generic32_cost, 16, 10, 16, 10, 16},
2613 /* Core i7 64-bit. */
2614 {&generic64_cost, 16, 10, 16, 10, 16},
2615 {&generic32_cost, 16, 7, 16, 7, 16},
2616 {&generic64_cost, 16, 10, 16, 10, 16},
2617 {&amdfam10_cost, 32, 24, 32, 7, 32},
2618 {&bdver1_cost, 32, 24, 32, 7, 32},
2619 {&btver1_cost, 32, 24, 32, 7, 32},
2620 {&atom_cost, 16, 7, 16, 7, 16}
2623 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2652 /* Return true if a red-zone is in use. */
2655 ix86_using_red_zone (void)
2657 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2660 /* Implement TARGET_HANDLE_OPTION. */
2663 ix86_handle_option (struct gcc_options *opts,
2664 struct gcc_options *opts_set ATTRIBUTE_UNUSED,
2665 const struct cl_decoded_option *decoded,
2668 size_t code = decoded->opt_index;
2669 int value = decoded->value;
2676 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2677 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2681 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2682 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2689 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2690 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2694 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2695 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2705 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2706 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2710 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2711 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2718 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2719 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2723 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2724 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2731 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2732 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2736 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2737 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2744 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2745 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2749 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2750 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2757 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2758 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2762 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2763 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2770 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2771 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2775 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2776 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2783 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2784 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2788 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2789 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2796 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2797 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2801 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2802 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2807 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2808 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2812 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2813 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2819 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2820 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2824 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2825 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2832 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2833 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2837 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2838 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2845 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2846 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2850 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2851 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2858 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2859 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2863 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2864 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2871 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2872 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2876 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2877 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2884 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI_SET;
2885 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_SET;
2889 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_BMI_UNSET;
2890 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_UNSET;
2897 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM_SET;
2898 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_SET;
2902 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_TBM_UNSET;
2903 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_UNSET;
2910 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2911 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2915 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2916 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2923 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2924 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2928 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2929 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2936 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2937 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2941 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2942 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2949 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2950 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2954 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2955 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2962 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2963 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2967 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2968 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2975 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2976 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2980 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2981 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2988 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2989 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2993 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2994 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
3001 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
3002 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
3006 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
3007 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
3014 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
3015 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
3019 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
3020 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
3027 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
3028 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
3032 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
3033 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
3037 /* Comes from final.c -- no real reason to change it. */
3038 #define MAX_CODE_ALIGN 16
3040 case OPT_malign_loops_:
3041 warning_at (loc, 0, "-malign-loops is obsolete, use -falign-loops");
3042 if (value > MAX_CODE_ALIGN)
3043 error_at (loc, "-malign-loops=%d is not between 0 and %d",
3044 value, MAX_CODE_ALIGN);
3046 opts->x_align_loops = 1 << value;
3049 case OPT_malign_jumps_:
3050 warning_at (loc, 0, "-malign-jumps is obsolete, use -falign-jumps");
3051 if (value > MAX_CODE_ALIGN)
3052 error_at (loc, "-malign-jumps=%d is not between 0 and %d",
3053 value, MAX_CODE_ALIGN);
3055 opts->x_align_jumps = 1 << value;
3058 case OPT_malign_functions_:
3060 "-malign-functions is obsolete, use -falign-functions");
3061 if (value > MAX_CODE_ALIGN)
3062 error_at (loc, "-malign-functions=%d is not between 0 and %d",
3063 value, MAX_CODE_ALIGN);
3065 opts->x_align_functions = 1 << value;
3068 case OPT_mbranch_cost_:
3071 error_at (loc, "-mbranch-cost=%d is not between 0 and 5", value);
3072 opts->x_ix86_branch_cost = 5;
3081 /* Return a string that documents the current -m options. The caller is
3082 responsible for freeing the string. */
3085 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
3086 enum fpmath_unit fpmath, bool add_nl_p)
3088 struct ix86_target_opts
3090 const char *option; /* option string */
3091 int mask; /* isa mask options */
3094 /* This table is ordered so that options like -msse4.2 that imply
3095 preceding options while match those first. */
3096 static struct ix86_target_opts isa_opts[] =
3098 { "-m64", OPTION_MASK_ISA_64BIT },
3099 { "-mfma4", OPTION_MASK_ISA_FMA4 },
3100 { "-mfma", OPTION_MASK_ISA_FMA },
3101 { "-mxop", OPTION_MASK_ISA_XOP },
3102 { "-mlwp", OPTION_MASK_ISA_LWP },
3103 { "-msse4a", OPTION_MASK_ISA_SSE4A },
3104 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
3105 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
3106 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
3107 { "-msse3", OPTION_MASK_ISA_SSE3 },
3108 { "-msse2", OPTION_MASK_ISA_SSE2 },
3109 { "-msse", OPTION_MASK_ISA_SSE },
3110 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
3111 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
3112 { "-mmmx", OPTION_MASK_ISA_MMX },
3113 { "-mabm", OPTION_MASK_ISA_ABM },
3114 { "-mbmi", OPTION_MASK_ISA_BMI },
3115 { "-mtbm", OPTION_MASK_ISA_TBM },
3116 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
3117 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
3118 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
3119 { "-maes", OPTION_MASK_ISA_AES },
3120 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
3121 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
3122 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
3123 { "-mf16c", OPTION_MASK_ISA_F16C },
3127 static struct ix86_target_opts flag_opts[] =
3129 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
3130 { "-m80387", MASK_80387 },
3131 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
3132 { "-malign-double", MASK_ALIGN_DOUBLE },
3133 { "-mcld", MASK_CLD },
3134 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
3135 { "-mieee-fp", MASK_IEEE_FP },
3136 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
3137 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
3138 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
3139 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
3140 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
3141 { "-mno-push-args", MASK_NO_PUSH_ARGS },
3142 { "-mno-red-zone", MASK_NO_RED_ZONE },
3143 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
3144 { "-mrecip", MASK_RECIP },
3145 { "-mrtd", MASK_RTD },
3146 { "-msseregparm", MASK_SSEREGPARM },
3147 { "-mstack-arg-probe", MASK_STACK_PROBE },
3148 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
3149 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
3150 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
3151 { "-mvzeroupper", MASK_VZEROUPPER },
3152 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
3153 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
3156 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
3159 char target_other[40];
3168 memset (opts, '\0', sizeof (opts));
3170 /* Add -march= option. */
3173 opts[num][0] = "-march=";
3174 opts[num++][1] = arch;
3177 /* Add -mtune= option. */
3180 opts[num][0] = "-mtune=";
3181 opts[num++][1] = tune;
3184 /* Pick out the options in isa options. */
3185 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
3187 if ((isa & isa_opts[i].mask) != 0)
3189 opts[num++][0] = isa_opts[i].option;
3190 isa &= ~ isa_opts[i].mask;
3194 if (isa && add_nl_p)
3196 opts[num++][0] = isa_other;
3197 sprintf (isa_other, "(other isa: %#x)", isa);
3200 /* Add flag options. */
3201 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
3203 if ((flags & flag_opts[i].mask) != 0)
3205 opts[num++][0] = flag_opts[i].option;
3206 flags &= ~ flag_opts[i].mask;
3210 if (flags && add_nl_p)
3212 opts[num++][0] = target_other;
3213 sprintf (target_other, "(other flags: %#x)", flags);
3216 /* Add -fpmath= option. */
3219 opts[num][0] = "-mfpmath=";
3220 switch ((int) fpmath)
3223 opts[num++][1] = "387";
3227 opts[num++][1] = "sse";
3230 case FPMATH_387 | FPMATH_SSE:
3231 opts[num++][1] = "sse+387";
3243 gcc_assert (num < ARRAY_SIZE (opts));
3245 /* Size the string. */
3247 sep_len = (add_nl_p) ? 3 : 1;
3248 for (i = 0; i < num; i++)
3251 for (j = 0; j < 2; j++)
3253 len += strlen (opts[i][j]);
3256 /* Build the string. */
3257 ret = ptr = (char *) xmalloc (len);
3260 for (i = 0; i < num; i++)
3264 for (j = 0; j < 2; j++)
3265 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3272 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3280 for (j = 0; j < 2; j++)
3283 memcpy (ptr, opts[i][j], len2[j]);
3285 line_len += len2[j];
3290 gcc_assert (ret + len >= ptr);
3295 /* Return true, if profiling code should be emitted before
3296 prologue. Otherwise it returns false.
3297 Note: For x86 with "hotfix" it is sorried. */
3299 ix86_profile_before_prologue (void)
3301 return flag_fentry != 0;
3304 /* Function that is callable from the debugger to print the current
3307 ix86_debug_options (void)
3309 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3310 ix86_arch_string, ix86_tune_string,
3315 fprintf (stderr, "%s\n\n", opts);
3319 fputs ("<no options>\n\n", stderr);
3324 /* Override various settings based on options. If MAIN_ARGS_P, the
3325 options are from the command line, otherwise they are from
3329 ix86_option_override_internal (bool main_args_p)
3332 unsigned int ix86_arch_mask, ix86_tune_mask;
3333 const bool ix86_tune_specified = (ix86_tune_string != NULL);
3344 PTA_PREFETCH_SSE = 1 << 4,
3346 PTA_3DNOW_A = 1 << 6,
3350 PTA_POPCNT = 1 << 10,
3352 PTA_SSE4A = 1 << 12,
3353 PTA_NO_SAHF = 1 << 13,
3354 PTA_SSE4_1 = 1 << 14,
3355 PTA_SSE4_2 = 1 << 15,
3357 PTA_PCLMUL = 1 << 17,
3360 PTA_MOVBE = 1 << 20,
3364 PTA_FSGSBASE = 1 << 24,
3365 PTA_RDRND = 1 << 25,
3369 /* if this reaches 32, need to widen struct pta flags below */
3374 const char *const name; /* processor name or nickname. */
3375 const enum processor_type processor;
3376 const enum attr_cpu schedule;
3377 const unsigned /*enum pta_flags*/ flags;
3379 const processor_alias_table[] =
3381 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3382 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3383 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3384 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3385 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3386 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3387 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3388 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3389 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
3390 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3391 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3392 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
3393 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3395 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3397 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3398 PTA_MMX | PTA_SSE | PTA_SSE2},
3399 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3400 PTA_MMX |PTA_SSE | PTA_SSE2},
3401 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3402 PTA_MMX | PTA_SSE | PTA_SSE2},
3403 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3404 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
3405 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3406 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3407 | PTA_CX16 | PTA_NO_SAHF},
3408 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
3409 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3410 | PTA_SSSE3 | PTA_CX16},
3411 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
3412 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3413 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
3414 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
3415 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3416 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
3417 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
3418 {"atom", PROCESSOR_ATOM, CPU_ATOM,
3419 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3420 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
3421 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3422 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
3423 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3424 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3425 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3426 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3427 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3428 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3429 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3430 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3431 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3432 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3433 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3434 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3435 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3436 {"x86-64", PROCESSOR_K8, CPU_K8,
3437 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3438 {"k8", PROCESSOR_K8, CPU_K8,
3439 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3440 | PTA_SSE2 | PTA_NO_SAHF},
3441 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3442 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3443 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3444 {"opteron", PROCESSOR_K8, CPU_K8,
3445 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3446 | PTA_SSE2 | PTA_NO_SAHF},
3447 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3448 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3449 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3450 {"athlon64", PROCESSOR_K8, CPU_K8,
3451 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3452 | PTA_SSE2 | PTA_NO_SAHF},
3453 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3454 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3455 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3456 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3457 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3458 | PTA_SSE2 | PTA_NO_SAHF},
3459 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3460 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3461 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3462 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3463 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3464 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3465 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3466 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3467 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3468 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3469 | PTA_XOP | PTA_LWP},
3470 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3471 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3472 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3473 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3474 0 /* flags are only used for -march switch. */ },
3475 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3476 PTA_64BIT /* flags are only used for -march switch. */ },
3479 int const pta_size = ARRAY_SIZE (processor_alias_table);
3481 /* Set up prefix/suffix so the error messages refer to either the command
3482 line argument, or the attribute(target). */
3491 prefix = "option(\"";
3496 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3497 SUBTARGET_OVERRIDE_OPTIONS;
3500 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3501 SUBSUBTARGET_OVERRIDE_OPTIONS;
3504 /* -fPIC is the default for x86_64. */
3505 if (TARGET_MACHO && TARGET_64BIT)
3508 /* Need to check -mtune=generic first. */
3509 if (ix86_tune_string)
3511 if (!strcmp (ix86_tune_string, "generic")
3512 || !strcmp (ix86_tune_string, "i686")
3513 /* As special support for cross compilers we read -mtune=native
3514 as -mtune=generic. With native compilers we won't see the
3515 -mtune=native, as it was changed by the driver. */
3516 || !strcmp (ix86_tune_string, "native"))
3519 ix86_tune_string = "generic64";
3521 ix86_tune_string = "generic32";
3523 /* If this call is for setting the option attribute, allow the
3524 generic32/generic64 that was previously set. */
3525 else if (!main_args_p
3526 && (!strcmp (ix86_tune_string, "generic32")
3527 || !strcmp (ix86_tune_string, "generic64")))
3529 else if (!strncmp (ix86_tune_string, "generic", 7))
3530 error ("bad value (%s) for %stune=%s %s",
3531 ix86_tune_string, prefix, suffix, sw);
3532 else if (!strcmp (ix86_tune_string, "x86-64"))
3533 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3534 "%stune=k8%s or %stune=generic%s instead as appropriate",
3535 prefix, suffix, prefix, suffix, prefix, suffix);
3539 if (ix86_arch_string)
3540 ix86_tune_string = ix86_arch_string;
3541 if (!ix86_tune_string)
3543 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3544 ix86_tune_defaulted = 1;
3547 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3548 need to use a sensible tune option. */
3549 if (!strcmp (ix86_tune_string, "generic")
3550 || !strcmp (ix86_tune_string, "x86-64")
3551 || !strcmp (ix86_tune_string, "i686"))
3554 ix86_tune_string = "generic64";
3556 ix86_tune_string = "generic32";
3560 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3562 /* rep; movq isn't available in 32-bit code. */
3563 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3564 ix86_stringop_alg = no_stringop;
3567 if (!ix86_arch_string)
3568 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3570 ix86_arch_specified = 1;
3572 if (!global_options_set.x_ix86_abi)
3573 ix86_abi = DEFAULT_ABI;
3575 if (global_options_set.x_ix86_cmodel)
3577 switch (ix86_cmodel)
3582 ix86_cmodel = CM_SMALL_PIC;
3584 error ("code model %qs not supported in the %s bit mode",
3591 ix86_cmodel = CM_MEDIUM_PIC;
3593 error ("code model %qs not supported in the %s bit mode",
3600 ix86_cmodel = CM_LARGE_PIC;
3602 error ("code model %qs not supported in the %s bit mode",
3608 error ("code model %s does not support PIC mode", "32");
3610 error ("code model %qs not supported in the %s bit mode",
3617 error ("code model %s does not support PIC mode", "kernel");
3618 ix86_cmodel = CM_32;
3621 error ("code model %qs not supported in the %s bit mode",
3631 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3632 use of rip-relative addressing. This eliminates fixups that
3633 would otherwise be needed if this object is to be placed in a
3634 DLL, and is essentially just as efficient as direct addressing. */
3635 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3636 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3637 else if (TARGET_64BIT)
3638 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3640 ix86_cmodel = CM_32;
3642 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3644 error ("-masm=intel not supported in this configuration");
3645 ix86_asm_dialect = ASM_ATT;
3647 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3648 sorry ("%i-bit mode not compiled in",
3649 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3651 for (i = 0; i < pta_size; i++)
3652 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3654 ix86_schedule = processor_alias_table[i].schedule;
3655 ix86_arch = processor_alias_table[i].processor;
3656 /* Default cpu tuning to the architecture. */
3657 ix86_tune = ix86_arch;
3659 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3660 error ("CPU you selected does not support x86-64 "
3663 if (processor_alias_table[i].flags & PTA_MMX
3664 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3665 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3666 if (processor_alias_table[i].flags & PTA_3DNOW
3667 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3668 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3669 if (processor_alias_table[i].flags & PTA_3DNOW_A
3670 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3671 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3672 if (processor_alias_table[i].flags & PTA_SSE
3673 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3674 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3675 if (processor_alias_table[i].flags & PTA_SSE2
3676 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3677 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3678 if (processor_alias_table[i].flags & PTA_SSE3
3679 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3680 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3681 if (processor_alias_table[i].flags & PTA_SSSE3
3682 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3683 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3684 if (processor_alias_table[i].flags & PTA_SSE4_1
3685 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3686 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3687 if (processor_alias_table[i].flags & PTA_SSE4_2
3688 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3689 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3690 if (processor_alias_table[i].flags & PTA_AVX
3691 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3692 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3693 if (processor_alias_table[i].flags & PTA_FMA
3694 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3695 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3696 if (processor_alias_table[i].flags & PTA_SSE4A
3697 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3698 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3699 if (processor_alias_table[i].flags & PTA_FMA4
3700 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3701 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3702 if (processor_alias_table[i].flags & PTA_XOP
3703 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3704 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3705 if (processor_alias_table[i].flags & PTA_LWP
3706 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3707 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3708 if (processor_alias_table[i].flags & PTA_ABM
3709 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3710 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3711 if (processor_alias_table[i].flags & PTA_BMI
3712 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3713 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3714 if (processor_alias_table[i].flags & PTA_TBM
3715 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3716 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3717 if (processor_alias_table[i].flags & PTA_CX16
3718 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3719 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3720 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3721 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3722 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3723 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3724 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3725 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3726 if (processor_alias_table[i].flags & PTA_MOVBE
3727 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3728 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3729 if (processor_alias_table[i].flags & PTA_AES
3730 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3731 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3732 if (processor_alias_table[i].flags & PTA_PCLMUL
3733 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3734 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3735 if (processor_alias_table[i].flags & PTA_FSGSBASE
3736 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3737 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3738 if (processor_alias_table[i].flags & PTA_RDRND
3739 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3740 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3741 if (processor_alias_table[i].flags & PTA_F16C
3742 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3743 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3744 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3745 x86_prefetch_sse = true;
3750 if (!strcmp (ix86_arch_string, "generic"))
3751 error ("generic CPU can be used only for %stune=%s %s",
3752 prefix, suffix, sw);
3753 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3754 error ("bad value (%s) for %sarch=%s %s",
3755 ix86_arch_string, prefix, suffix, sw);
3757 ix86_arch_mask = 1u << ix86_arch;
3758 for (i = 0; i < X86_ARCH_LAST; ++i)
3759 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3761 for (i = 0; i < pta_size; i++)
3762 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3764 ix86_schedule = processor_alias_table[i].schedule;
3765 ix86_tune = processor_alias_table[i].processor;
3768 if (!(processor_alias_table[i].flags & PTA_64BIT))
3770 if (ix86_tune_defaulted)
3772 ix86_tune_string = "x86-64";
3773 for (i = 0; i < pta_size; i++)
3774 if (! strcmp (ix86_tune_string,
3775 processor_alias_table[i].name))
3777 ix86_schedule = processor_alias_table[i].schedule;
3778 ix86_tune = processor_alias_table[i].processor;
3781 error ("CPU you selected does not support x86-64 "
3787 /* Adjust tuning when compiling for 32-bit ABI. */
3790 case PROCESSOR_GENERIC64:
3791 ix86_tune = PROCESSOR_GENERIC32;
3792 ix86_schedule = CPU_PENTIUMPRO;
3795 case PROCESSOR_CORE2_64:
3796 ix86_tune = PROCESSOR_CORE2_32;
3799 case PROCESSOR_COREI7_64:
3800 ix86_tune = PROCESSOR_COREI7_32;
3807 /* Intel CPUs have always interpreted SSE prefetch instructions as
3808 NOPs; so, we can enable SSE prefetch instructions even when
3809 -mtune (rather than -march) points us to a processor that has them.
3810 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3811 higher processors. */
3813 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3814 x86_prefetch_sse = true;
3818 if (ix86_tune_specified && i == pta_size)
3819 error ("bad value (%s) for %stune=%s %s",
3820 ix86_tune_string, prefix, suffix, sw);
3822 ix86_tune_mask = 1u << ix86_tune;
3823 for (i = 0; i < X86_TUNE_LAST; ++i)
3824 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3826 #ifndef USE_IX86_FRAME_POINTER
3827 #define USE_IX86_FRAME_POINTER 0
3830 #ifndef USE_X86_64_FRAME_POINTER
3831 #define USE_X86_64_FRAME_POINTER 0
3834 /* Set the default values for switches whose default depends on TARGET_64BIT
3835 in case they weren't overwritten by command line options. */
3838 if (optimize > 1 && !global_options_set.x_flag_zee)
3840 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3841 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3842 if (flag_asynchronous_unwind_tables == 2)
3843 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3844 if (flag_pcc_struct_return == 2)
3845 flag_pcc_struct_return = 0;
3849 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3850 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3851 if (flag_asynchronous_unwind_tables == 2)
3852 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3853 if (flag_pcc_struct_return == 2)
3854 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3858 ix86_cost = &ix86_size_cost;
3860 ix86_cost = processor_target_table[ix86_tune].cost;
3862 /* Arrange to set up i386_stack_locals for all functions. */
3863 init_machine_status = ix86_init_machine_status;
3865 /* Validate -mregparm= value. */
3866 if (global_options_set.x_ix86_regparm)
3869 warning (0, "-mregparm is ignored in 64-bit mode");
3870 if (ix86_regparm > REGPARM_MAX)
3872 error ("-mregparm=%d is not between 0 and %d",
3873 ix86_regparm, REGPARM_MAX);
3878 ix86_regparm = REGPARM_MAX;
3880 /* Default align_* from the processor table. */
3881 if (align_loops == 0)
3883 align_loops = processor_target_table[ix86_tune].align_loop;
3884 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3886 if (align_jumps == 0)
3888 align_jumps = processor_target_table[ix86_tune].align_jump;
3889 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3891 if (align_functions == 0)
3893 align_functions = processor_target_table[ix86_tune].align_func;
3896 /* Provide default for -mbranch-cost= value. */
3897 if (!global_options_set.x_ix86_branch_cost)
3898 ix86_branch_cost = ix86_cost->branch_cost;
3902 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3904 /* Enable by default the SSE and MMX builtins. Do allow the user to
3905 explicitly disable any of these. In particular, disabling SSE and
3906 MMX for kernel code is extremely useful. */
3907 if (!ix86_arch_specified)
3909 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3910 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3913 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3917 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3919 if (!ix86_arch_specified)
3921 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3923 /* i386 ABI does not specify red zone. It still makes sense to use it
3924 when programmer takes care to stack from being destroyed. */
3925 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3926 target_flags |= MASK_NO_RED_ZONE;
3929 /* Keep nonleaf frame pointers. */
3930 if (flag_omit_frame_pointer)
3931 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3932 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3933 flag_omit_frame_pointer = 1;
3935 /* If we're doing fast math, we don't care about comparison order
3936 wrt NaNs. This lets us use a shorter comparison sequence. */
3937 if (flag_finite_math_only)
3938 target_flags &= ~MASK_IEEE_FP;
3940 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3941 since the insns won't need emulation. */
3942 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3943 target_flags &= ~MASK_NO_FANCY_MATH_387;
3945 /* Likewise, if the target doesn't have a 387, or we've specified
3946 software floating point, don't use 387 inline intrinsics. */
3948 target_flags |= MASK_NO_FANCY_MATH_387;
3950 /* On 32bit targets, avoid moving DFmode values in
3951 integer registers when optimizing for size. */
3953 target_flags |= TARGET_INTEGER_DFMODE_MOVES;
3954 else if (optimize_size)
3955 target_flags &= ~TARGET_INTEGER_DFMODE_MOVES;
3957 /* Turn on MMX builtins for -msse. */
3960 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3961 x86_prefetch_sse = true;
3964 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3965 if (TARGET_SSE4_2 || TARGET_ABM)
3966 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3968 /* Validate -mpreferred-stack-boundary= value or default it to
3969 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3970 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3971 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3973 int min = (TARGET_64BIT ? 4 : 2);
3974 int max = (TARGET_SEH ? 4 : 12);
3976 if (ix86_preferred_stack_boundary_arg < min
3977 || ix86_preferred_stack_boundary_arg > max)
3980 error ("-mpreferred-stack-boundary is not supported "
3983 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3984 ix86_preferred_stack_boundary_arg, min, max);
3987 ix86_preferred_stack_boundary
3988 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3991 /* Set the default value for -mstackrealign. */
3992 if (ix86_force_align_arg_pointer == -1)
3993 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3995 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3997 /* Validate -mincoming-stack-boundary= value or default it to
3998 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3999 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4000 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
4002 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
4003 || ix86_incoming_stack_boundary_arg > 12)
4004 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4005 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
4008 ix86_user_incoming_stack_boundary
4009 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4010 ix86_incoming_stack_boundary
4011 = ix86_user_incoming_stack_boundary;
4015 /* Accept -msseregparm only if at least SSE support is enabled. */
4016 if (TARGET_SSEREGPARM
4018 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4020 if (global_options_set.x_ix86_fpmath)
4022 if (ix86_fpmath & FPMATH_SSE)
4026 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4027 ix86_fpmath = FPMATH_387;
4029 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
4031 warning (0, "387 instruction set disabled, using SSE arithmetics");
4032 ix86_fpmath = FPMATH_SSE;
4037 ix86_fpmath = TARGET_FPMATH_DEFAULT;
4039 /* If the i387 is disabled, then do not return values in it. */
4041 target_flags &= ~MASK_FLOAT_RETURNS;
4043 /* Use external vectorized library in vectorizing intrinsics. */
4044 if (global_options_set.x_ix86_veclibabi_type)
4045 switch (ix86_veclibabi_type)
4047 case ix86_veclibabi_type_svml:
4048 ix86_veclib_handler = ix86_veclibabi_svml;
4051 case ix86_veclibabi_type_acml:
4052 ix86_veclib_handler = ix86_veclibabi_acml;
4059 if ((!USE_IX86_FRAME_POINTER
4060 || (x86_accumulate_outgoing_args & ix86_tune_mask))
4061 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4063 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4065 /* ??? Unwind info is not correct around the CFG unless either a frame
4066 pointer is present or M_A_O_A is set. Fixing this requires rewriting
4067 unwind info generation to be aware of the CFG and propagating states
4069 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
4070 || flag_exceptions || flag_non_call_exceptions)
4071 && flag_omit_frame_pointer
4072 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4074 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4075 warning (0, "unwind tables currently require either a frame pointer "
4076 "or %saccumulate-outgoing-args%s for correctness",
4078 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4081 /* If stack probes are required, the space used for large function
4082 arguments on the stack must also be probed, so enable
4083 -maccumulate-outgoing-args so this happens in the prologue. */
4084 if (TARGET_STACK_PROBE
4085 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4087 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4088 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4089 "for correctness", prefix, suffix);
4090 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4093 /* For sane SSE instruction set generation we need fcomi instruction.
4094 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
4095 expands to a sequence that includes conditional move. */
4096 if (TARGET_SSE || TARGET_RDRND)
4099 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4102 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4103 p = strchr (internal_label_prefix, 'X');
4104 internal_label_prefix_len = p - internal_label_prefix;
4108 /* When scheduling description is not available, disable scheduler pass
4109 so it won't slow down the compilation and make x87 code slower. */
4110 if (!TARGET_SCHEDULE)
4111 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
4113 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4114 ix86_cost->simultaneous_prefetches,
4115 global_options.x_param_values,
4116 global_options_set.x_param_values);
4117 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
4118 global_options.x_param_values,
4119 global_options_set.x_param_values);
4120 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
4121 global_options.x_param_values,
4122 global_options_set.x_param_values);
4123 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
4124 global_options.x_param_values,
4125 global_options_set.x_param_values);
4127 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4128 if (flag_prefetch_loop_arrays < 0
4131 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4132 flag_prefetch_loop_arrays = 1;
4134 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4135 can be optimized to ap = __builtin_next_arg (0). */
4136 if (!TARGET_64BIT && !flag_split_stack)
4137 targetm.expand_builtin_va_start = NULL;
4141 ix86_gen_leave = gen_leave_rex64;
4142 ix86_gen_add3 = gen_adddi3;
4143 ix86_gen_sub3 = gen_subdi3;
4144 ix86_gen_sub3_carry = gen_subdi3_carry;
4145 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4146 ix86_gen_monitor = gen_sse3_monitor64;
4147 ix86_gen_andsp = gen_anddi3;
4148 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4149 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4150 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4154 ix86_gen_leave = gen_leave;
4155 ix86_gen_add3 = gen_addsi3;
4156 ix86_gen_sub3 = gen_subsi3;
4157 ix86_gen_sub3_carry = gen_subsi3_carry;
4158 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4159 ix86_gen_monitor = gen_sse3_monitor;
4160 ix86_gen_andsp = gen_andsi3;
4161 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4162 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4163 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4167 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4169 target_flags |= MASK_CLD & ~target_flags_explicit;
4172 if (!TARGET_64BIT && flag_pic)
4174 if (flag_fentry > 0)
4175 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4179 else if (TARGET_SEH)
4181 if (flag_fentry == 0)
4182 sorry ("-mno-fentry isn%'t compatible with SEH");
4185 else if (flag_fentry < 0)
4187 #if defined(PROFILE_BEFORE_PROLOGUE)
4196 /* When not optimize for size, enable vzeroupper optimization for
4197 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4198 AVX unaligned load/store. */
4201 if (flag_expensive_optimizations
4202 && !(target_flags_explicit & MASK_VZEROUPPER))
4203 target_flags |= MASK_VZEROUPPER;
4204 if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4205 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4206 if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4207 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4212 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4213 target_flags &= ~MASK_VZEROUPPER;
4216 /* Save the initial options in case the user does function specific
4219 target_option_default_node = target_option_current_node
4220 = build_target_option_node ();
4223 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4226 function_pass_avx256_p (const_rtx val)
4231 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
4234 if (GET_CODE (val) == PARALLEL)
4239 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
4241 r = XVECEXP (val, 0, i);
4242 if (GET_CODE (r) == EXPR_LIST
4244 && REG_P (XEXP (r, 0))
4245 && (GET_MODE (XEXP (r, 0)) == OImode
4246 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
4254 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4257 ix86_option_override (void)
4259 ix86_option_override_internal (true);
4262 /* Update register usage after having seen the compiler flags. */
4265 ix86_conditional_register_usage (void)
4270 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4272 if (fixed_regs[i] > 1)
4273 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
4274 if (call_used_regs[i] > 1)
4275 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
4278 /* The PIC register, if it exists, is fixed. */
4279 j = PIC_OFFSET_TABLE_REGNUM;
4280 if (j != INVALID_REGNUM)
4281 fixed_regs[j] = call_used_regs[j] = 1;
4283 /* The 64-bit MS_ABI changes the set of call-used registers. */
4284 if (TARGET_64BIT_MS_ABI)
4286 call_used_regs[SI_REG] = 0;
4287 call_used_regs[DI_REG] = 0;
4288 call_used_regs[XMM6_REG] = 0;
4289 call_used_regs[XMM7_REG] = 0;
4290 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4291 call_used_regs[i] = 0;
4294 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4295 other call-clobbered regs for 64-bit. */
4298 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4300 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4301 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4302 && call_used_regs[i])
4303 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4306 /* If MMX is disabled, squash the registers. */
4308 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4309 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4310 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4312 /* If SSE is disabled, squash the registers. */
4314 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4315 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4316 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4318 /* If the FPU is disabled, squash the registers. */
4319 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4320 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4321 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4322 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4324 /* If 32-bit, squash the 64-bit registers. */
4327 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4329 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4335 /* Save the current options */
4338 ix86_function_specific_save (struct cl_target_option *ptr)
4340 ptr->arch = ix86_arch;
4341 ptr->schedule = ix86_schedule;
4342 ptr->tune = ix86_tune;
4343 ptr->branch_cost = ix86_branch_cost;
4344 ptr->tune_defaulted = ix86_tune_defaulted;
4345 ptr->arch_specified = ix86_arch_specified;
4346 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4347 ptr->ix86_target_flags_explicit = target_flags_explicit;
4349 /* The fields are char but the variables are not; make sure the
4350 values fit in the fields. */
4351 gcc_assert (ptr->arch == ix86_arch);
4352 gcc_assert (ptr->schedule == ix86_schedule);
4353 gcc_assert (ptr->tune == ix86_tune);
4354 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4357 /* Restore the current options */
4360 ix86_function_specific_restore (struct cl_target_option *ptr)
4362 enum processor_type old_tune = ix86_tune;
4363 enum processor_type old_arch = ix86_arch;
4364 unsigned int ix86_arch_mask, ix86_tune_mask;
4367 ix86_arch = (enum processor_type) ptr->arch;
4368 ix86_schedule = (enum attr_cpu) ptr->schedule;
4369 ix86_tune = (enum processor_type) ptr->tune;
4370 ix86_branch_cost = ptr->branch_cost;
4371 ix86_tune_defaulted = ptr->tune_defaulted;
4372 ix86_arch_specified = ptr->arch_specified;
4373 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4374 target_flags_explicit = ptr->ix86_target_flags_explicit;
4376 /* Recreate the arch feature tests if the arch changed */
4377 if (old_arch != ix86_arch)
4379 ix86_arch_mask = 1u << ix86_arch;
4380 for (i = 0; i < X86_ARCH_LAST; ++i)
4381 ix86_arch_features[i]
4382 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4385 /* Recreate the tune optimization tests */
4386 if (old_tune != ix86_tune)
4388 ix86_tune_mask = 1u << ix86_tune;
4389 for (i = 0; i < X86_TUNE_LAST; ++i)
4390 ix86_tune_features[i]
4391 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4395 /* Print the current options */
4398 ix86_function_specific_print (FILE *file, int indent,
4399 struct cl_target_option *ptr)
4402 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4403 NULL, NULL, ptr->x_ix86_fpmath, false);
4405 fprintf (file, "%*sarch = %d (%s)\n",
4408 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4409 ? cpu_names[ptr->arch]
4412 fprintf (file, "%*stune = %d (%s)\n",
4415 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4416 ? cpu_names[ptr->tune]
4419 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4423 fprintf (file, "%*s%s\n", indent, "", target_string);
4424 free (target_string);
4429 /* Inner function to process the attribute((target(...))), take an argument and
4430 set the current options from the argument. If we have a list, recursively go
4434 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4435 struct gcc_options *enum_opts_set)
4440 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4441 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4442 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4443 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4444 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4460 enum ix86_opt_type type;
4465 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4466 IX86_ATTR_ISA ("abm", OPT_mabm),
4467 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4468 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4469 IX86_ATTR_ISA ("aes", OPT_maes),
4470 IX86_ATTR_ISA ("avx", OPT_mavx),
4471 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4472 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4473 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4474 IX86_ATTR_ISA ("sse", OPT_msse),
4475 IX86_ATTR_ISA ("sse2", OPT_msse2),
4476 IX86_ATTR_ISA ("sse3", OPT_msse3),
4477 IX86_ATTR_ISA ("sse4", OPT_msse4),
4478 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4479 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4480 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4481 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4482 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4483 IX86_ATTR_ISA ("xop", OPT_mxop),
4484 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4485 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4486 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4487 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4490 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4492 /* string options */
4493 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4494 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4497 IX86_ATTR_YES ("cld",
4501 IX86_ATTR_NO ("fancy-math-387",
4502 OPT_mfancy_math_387,
4503 MASK_NO_FANCY_MATH_387),
4505 IX86_ATTR_YES ("ieee-fp",
4509 IX86_ATTR_YES ("inline-all-stringops",
4510 OPT_minline_all_stringops,
4511 MASK_INLINE_ALL_STRINGOPS),
4513 IX86_ATTR_YES ("inline-stringops-dynamically",
4514 OPT_minline_stringops_dynamically,
4515 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4517 IX86_ATTR_NO ("align-stringops",
4518 OPT_mno_align_stringops,
4519 MASK_NO_ALIGN_STRINGOPS),
4521 IX86_ATTR_YES ("recip",
4527 /* If this is a list, recurse to get the options. */
4528 if (TREE_CODE (args) == TREE_LIST)
4532 for (; args; args = TREE_CHAIN (args))
4533 if (TREE_VALUE (args)
4534 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4535 p_strings, enum_opts_set))
4541 else if (TREE_CODE (args) != STRING_CST)
4544 /* Handle multiple arguments separated by commas. */
4545 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4547 while (next_optstr && *next_optstr != '\0')
4549 char *p = next_optstr;
4551 char *comma = strchr (next_optstr, ',');
4552 const char *opt_string;
4553 size_t len, opt_len;
4558 enum ix86_opt_type type = ix86_opt_unknown;
4564 len = comma - next_optstr;
4565 next_optstr = comma + 1;
4573 /* Recognize no-xxx. */
4574 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4583 /* Find the option. */
4586 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4588 type = attrs[i].type;
4589 opt_len = attrs[i].len;
4590 if (ch == attrs[i].string[0]
4591 && ((type != ix86_opt_str && type != ix86_opt_enum)
4594 && memcmp (p, attrs[i].string, opt_len) == 0)
4597 mask = attrs[i].mask;
4598 opt_string = attrs[i].string;
4603 /* Process the option. */
4606 error ("attribute(target(\"%s\")) is unknown", orig_p);
4610 else if (type == ix86_opt_isa)
4612 struct cl_decoded_option decoded;
4614 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4615 ix86_handle_option (&global_options, &global_options_set,
4616 &decoded, input_location);
4619 else if (type == ix86_opt_yes || type == ix86_opt_no)
4621 if (type == ix86_opt_no)
4622 opt_set_p = !opt_set_p;
4625 target_flags |= mask;
4627 target_flags &= ~mask;
4630 else if (type == ix86_opt_str)
4634 error ("option(\"%s\") was already specified", opt_string);
4638 p_strings[opt] = xstrdup (p + opt_len);
4641 else if (type == ix86_opt_enum)
4646 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4648 set_option (&global_options, enum_opts_set, opt, value,
4649 p + opt_len, DK_UNSPECIFIED, input_location,
4653 error ("attribute(target(\"%s\")) is unknown", orig_p);
4665 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4668 ix86_valid_target_attribute_tree (tree args)
4670 const char *orig_arch_string = ix86_arch_string;
4671 const char *orig_tune_string = ix86_tune_string;
4672 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4673 int orig_tune_defaulted = ix86_tune_defaulted;
4674 int orig_arch_specified = ix86_arch_specified;
4675 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4678 struct cl_target_option *def
4679 = TREE_TARGET_OPTION (target_option_default_node);
4680 struct gcc_options enum_opts_set;
4682 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4684 /* Process each of the options on the chain. */
4685 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4689 /* If the changed options are different from the default, rerun
4690 ix86_option_override_internal, and then save the options away.
4691 The string options are are attribute options, and will be undone
4692 when we copy the save structure. */
4693 if (ix86_isa_flags != def->x_ix86_isa_flags
4694 || target_flags != def->x_target_flags
4695 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4696 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4697 || enum_opts_set.x_ix86_fpmath)
4699 /* If we are using the default tune= or arch=, undo the string assigned,
4700 and use the default. */
4701 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4702 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4703 else if (!orig_arch_specified)
4704 ix86_arch_string = NULL;
4706 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4707 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4708 else if (orig_tune_defaulted)
4709 ix86_tune_string = NULL;
4711 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4712 if (enum_opts_set.x_ix86_fpmath)
4713 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4714 else if (!TARGET_64BIT && TARGET_SSE)
4716 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4717 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4720 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4721 ix86_option_override_internal (false);
4723 /* Add any builtin functions with the new isa if any. */
4724 ix86_add_new_builtins (ix86_isa_flags);
4726 /* Save the current options unless we are validating options for
4728 t = build_target_option_node ();
4730 ix86_arch_string = orig_arch_string;
4731 ix86_tune_string = orig_tune_string;
4732 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4734 /* Free up memory allocated to hold the strings */
4735 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4736 free (option_strings[i]);
4742 /* Hook to validate attribute((target("string"))). */
4745 ix86_valid_target_attribute_p (tree fndecl,
4746 tree ARG_UNUSED (name),
4748 int ARG_UNUSED (flags))
4750 struct cl_target_option cur_target;
4752 tree old_optimize = build_optimization_node ();
4753 tree new_target, new_optimize;
4754 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4756 /* If the function changed the optimization levels as well as setting target
4757 options, start with the optimizations specified. */
4758 if (func_optimize && func_optimize != old_optimize)
4759 cl_optimization_restore (&global_options,
4760 TREE_OPTIMIZATION (func_optimize));
4762 /* The target attributes may also change some optimization flags, so update
4763 the optimization options if necessary. */
4764 cl_target_option_save (&cur_target, &global_options);
4765 new_target = ix86_valid_target_attribute_tree (args);
4766 new_optimize = build_optimization_node ();
4773 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4775 if (old_optimize != new_optimize)
4776 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4779 cl_target_option_restore (&global_options, &cur_target);
4781 if (old_optimize != new_optimize)
4782 cl_optimization_restore (&global_options,
4783 TREE_OPTIMIZATION (old_optimize));
4789 /* Hook to determine if one function can safely inline another. */
4792 ix86_can_inline_p (tree caller, tree callee)
4795 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4796 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4798 /* If callee has no option attributes, then it is ok to inline. */
4802 /* If caller has no option attributes, but callee does then it is not ok to
4804 else if (!caller_tree)
4809 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4810 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4812 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4813 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4815 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4816 != callee_opts->x_ix86_isa_flags)
4819 /* See if we have the same non-isa options. */
4820 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4823 /* See if arch, tune, etc. are the same. */
4824 else if (caller_opts->arch != callee_opts->arch)
4827 else if (caller_opts->tune != callee_opts->tune)
4830 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4833 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4844 /* Remember the last target of ix86_set_current_function. */
4845 static GTY(()) tree ix86_previous_fndecl;
4847 /* Establish appropriate back-end context for processing the function
4848 FNDECL. The argument might be NULL to indicate processing at top
4849 level, outside of any function scope. */
4851 ix86_set_current_function (tree fndecl)
4853 /* Only change the context if the function changes. This hook is called
4854 several times in the course of compiling a function, and we don't want to
4855 slow things down too much or call target_reinit when it isn't safe. */
4856 if (fndecl && fndecl != ix86_previous_fndecl)
4858 tree old_tree = (ix86_previous_fndecl
4859 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4862 tree new_tree = (fndecl
4863 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4866 ix86_previous_fndecl = fndecl;
4867 if (old_tree == new_tree)
4872 cl_target_option_restore (&global_options,
4873 TREE_TARGET_OPTION (new_tree));
4879 struct cl_target_option *def
4880 = TREE_TARGET_OPTION (target_option_current_node);
4882 cl_target_option_restore (&global_options, def);
4889 /* Return true if this goes in large data/bss. */
4892 ix86_in_large_data_p (tree exp)
4894 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4897 /* Functions are never large data. */
4898 if (TREE_CODE (exp) == FUNCTION_DECL)
4901 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4903 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4904 if (strcmp (section, ".ldata") == 0
4905 || strcmp (section, ".lbss") == 0)
4911 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4913 /* If this is an incomplete type with size 0, then we can't put it
4914 in data because it might be too big when completed. */
4915 if (!size || size > ix86_section_threshold)
4922 /* Switch to the appropriate section for output of DECL.
4923 DECL is either a `VAR_DECL' node or a constant of some sort.
4924 RELOC indicates whether forming the initial value of DECL requires
4925 link-time relocations. */
4927 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4931 x86_64_elf_select_section (tree decl, int reloc,
4932 unsigned HOST_WIDE_INT align)
4934 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4935 && ix86_in_large_data_p (decl))
4937 const char *sname = NULL;
4938 unsigned int flags = SECTION_WRITE;
4939 switch (categorize_decl_for_section (decl, reloc))
4944 case SECCAT_DATA_REL:
4945 sname = ".ldata.rel";
4947 case SECCAT_DATA_REL_LOCAL:
4948 sname = ".ldata.rel.local";
4950 case SECCAT_DATA_REL_RO:
4951 sname = ".ldata.rel.ro";
4953 case SECCAT_DATA_REL_RO_LOCAL:
4954 sname = ".ldata.rel.ro.local";
4958 flags |= SECTION_BSS;
4961 case SECCAT_RODATA_MERGE_STR:
4962 case SECCAT_RODATA_MERGE_STR_INIT:
4963 case SECCAT_RODATA_MERGE_CONST:
4967 case SECCAT_SRODATA:
4974 /* We don't split these for medium model. Place them into
4975 default sections and hope for best. */
4980 /* We might get called with string constants, but get_named_section
4981 doesn't like them as they are not DECLs. Also, we need to set
4982 flags in that case. */
4984 return get_section (sname, flags, NULL);
4985 return get_named_section (decl, sname, reloc);
4988 return default_elf_select_section (decl, reloc, align);
4991 /* Build up a unique section name, expressed as a
4992 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4993 RELOC indicates whether the initial value of EXP requires
4994 link-time relocations. */
4996 static void ATTRIBUTE_UNUSED
4997 x86_64_elf_unique_section (tree decl, int reloc)
4999 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5000 && ix86_in_large_data_p (decl))
5002 const char *prefix = NULL;
5003 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5004 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
5006 switch (categorize_decl_for_section (decl, reloc))
5009 case SECCAT_DATA_REL:
5010 case SECCAT_DATA_REL_LOCAL:
5011 case SECCAT_DATA_REL_RO:
5012 case SECCAT_DATA_REL_RO_LOCAL:
5013 prefix = one_only ? ".ld" : ".ldata";
5016 prefix = one_only ? ".lb" : ".lbss";
5019 case SECCAT_RODATA_MERGE_STR:
5020 case SECCAT_RODATA_MERGE_STR_INIT:
5021 case SECCAT_RODATA_MERGE_CONST:
5022 prefix = one_only ? ".lr" : ".lrodata";
5024 case SECCAT_SRODATA:
5031 /* We don't split these for medium model. Place them into
5032 default sections and hope for best. */
5037 const char *name, *linkonce;
5040 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5041 name = targetm.strip_name_encoding (name);
5043 /* If we're using one_only, then there needs to be a .gnu.linkonce
5044 prefix to the section name. */
5045 linkonce = one_only ? ".gnu.linkonce" : "";
5047 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5049 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5053 default_unique_section (decl, reloc);
5056 #ifdef COMMON_ASM_OP
5057 /* This says how to output assembler code to declare an
5058 uninitialized external linkage data object.
5060 For medium model x86-64 we need to use .largecomm opcode for
5063 x86_elf_aligned_common (FILE *file,
5064 const char *name, unsigned HOST_WIDE_INT size,
5067 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5068 && size > (unsigned int)ix86_section_threshold)
5069 fputs (".largecomm\t", file);
5071 fputs (COMMON_ASM_OP, file);
5072 assemble_name (file, name);
5073 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5074 size, align / BITS_PER_UNIT);
5078 /* Utility function for targets to use in implementing
5079 ASM_OUTPUT_ALIGNED_BSS. */
5082 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5083 const char *name, unsigned HOST_WIDE_INT size,
5086 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5087 && size > (unsigned int)ix86_section_threshold)
5088 switch_to_section (get_named_section (decl, ".lbss", 0));
5090 switch_to_section (bss_section);
5091 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5092 #ifdef ASM_DECLARE_OBJECT_NAME
5093 last_assemble_variable_decl = decl;
5094 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5096 /* Standard thing is just output label for the object. */
5097 ASM_OUTPUT_LABEL (file, name);
5098 #endif /* ASM_DECLARE_OBJECT_NAME */
5099 ASM_OUTPUT_SKIP (file, size ? size : 1);
5102 static const struct default_options ix86_option_optimization_table[] =
5104 /* Turn off -fschedule-insns by default. It tends to make the
5105 problem with not enough registers even worse. */
5106 #ifdef INSN_SCHEDULING
5107 { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
5110 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
5111 SUBTARGET_OPTIMIZATION_OPTIONS,
5113 { OPT_LEVELS_NONE, 0, NULL, 0 }
5116 /* Implement TARGET_OPTION_INIT_STRUCT. */
5119 ix86_option_init_struct (struct gcc_options *opts)
5122 /* The Darwin libraries never set errno, so we might as well
5123 avoid calling them when that's the only reason we would. */
5124 opts->x_flag_errno_math = 0;
5126 opts->x_flag_pcc_struct_return = 2;
5127 opts->x_flag_asynchronous_unwind_tables = 2;
5128 opts->x_flag_vect_cost_model = 1;
5131 /* Decide whether we must probe the stack before any space allocation
5132 on this target. It's essentially TARGET_STACK_PROBE except when
5133 -fstack-check causes the stack to be already probed differently. */
5136 ix86_target_stack_probe (void)
5138 /* Do not probe the stack twice if static stack checking is enabled. */
5139 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5142 return TARGET_STACK_PROBE;
5145 /* Decide whether we can make a sibling call to a function. DECL is the
5146 declaration of the function being targeted by the call and EXP is the
5147 CALL_EXPR representing the call. */
5150 ix86_function_ok_for_sibcall (tree decl, tree exp)
5152 tree type, decl_or_type;
5155 /* If we are generating position-independent code, we cannot sibcall
5156 optimize any indirect call, or a direct call to a global function,
5157 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5161 && (!decl || !targetm.binds_local_p (decl)))
5164 /* If we need to align the outgoing stack, then sibcalling would
5165 unalign the stack, which may break the called function. */
5166 if (ix86_minimum_incoming_stack_boundary (true)
5167 < PREFERRED_STACK_BOUNDARY)
5172 decl_or_type = decl;
5173 type = TREE_TYPE (decl);
5177 /* We're looking at the CALL_EXPR, we need the type of the function. */
5178 type = CALL_EXPR_FN (exp); /* pointer expression */
5179 type = TREE_TYPE (type); /* pointer type */
5180 type = TREE_TYPE (type); /* function type */
5181 decl_or_type = type;
5184 /* Check that the return value locations are the same. Like
5185 if we are returning floats on the 80387 register stack, we cannot
5186 make a sibcall from a function that doesn't return a float to a
5187 function that does or, conversely, from a function that does return
5188 a float to a function that doesn't; the necessary stack adjustment
5189 would not be executed. This is also the place we notice
5190 differences in the return value ABI. Note that it is ok for one
5191 of the functions to have void return type as long as the return
5192 value of the other is passed in a register. */
5193 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5194 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5196 if (STACK_REG_P (a) || STACK_REG_P (b))
5198 if (!rtx_equal_p (a, b))
5201 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5203 /* Disable sibcall if we need to generate vzeroupper after
5205 if (TARGET_VZEROUPPER
5206 && cfun->machine->callee_return_avx256_p
5207 && !cfun->machine->caller_return_avx256_p)
5210 else if (!rtx_equal_p (a, b))
5215 /* The SYSV ABI has more call-clobbered registers;
5216 disallow sibcalls from MS to SYSV. */
5217 if (cfun->machine->call_abi == MS_ABI
5218 && ix86_function_type_abi (type) == SYSV_ABI)
5223 /* If this call is indirect, we'll need to be able to use a
5224 call-clobbered register for the address of the target function.
5225 Make sure that all such registers are not used for passing
5226 parameters. Note that DLLIMPORT functions are indirect. */
5228 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5230 if (ix86_function_regparm (type, NULL) >= 3)
5232 /* ??? Need to count the actual number of registers to be used,
5233 not the possible number of registers. Fix later. */
5239 /* Otherwise okay. That also includes certain types of indirect calls. */
5243 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5244 and "sseregparm" calling convention attributes;
5245 arguments as in struct attribute_spec.handler. */
5248 ix86_handle_cconv_attribute (tree *node, tree name,
5250 int flags ATTRIBUTE_UNUSED,
5253 if (TREE_CODE (*node) != FUNCTION_TYPE
5254 && TREE_CODE (*node) != METHOD_TYPE
5255 && TREE_CODE (*node) != FIELD_DECL
5256 && TREE_CODE (*node) != TYPE_DECL)
5258 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5260 *no_add_attrs = true;
5264 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5265 if (is_attribute_p ("regparm", name))
5269 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5271 error ("fastcall and regparm attributes are not compatible");
5274 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5276 error ("regparam and thiscall attributes are not compatible");
5279 cst = TREE_VALUE (args);
5280 if (TREE_CODE (cst) != INTEGER_CST)
5282 warning (OPT_Wattributes,
5283 "%qE attribute requires an integer constant argument",
5285 *no_add_attrs = true;
5287 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5289 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5291 *no_add_attrs = true;
5299 /* Do not warn when emulating the MS ABI. */
5300 if ((TREE_CODE (*node) != FUNCTION_TYPE
5301 && TREE_CODE (*node) != METHOD_TYPE)
5302 || ix86_function_type_abi (*node) != MS_ABI)
5303 warning (OPT_Wattributes, "%qE attribute ignored",
5305 *no_add_attrs = true;
5309 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5310 if (is_attribute_p ("fastcall", name))
5312 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5314 error ("fastcall and cdecl attributes are not compatible");
5316 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5318 error ("fastcall and stdcall attributes are not compatible");
5320 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5322 error ("fastcall and regparm attributes are not compatible");
5324 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5326 error ("fastcall and thiscall attributes are not compatible");
5330 /* Can combine stdcall with fastcall (redundant), regparm and
5332 else if (is_attribute_p ("stdcall", name))
5334 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5336 error ("stdcall and cdecl attributes are not compatible");
5338 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5340 error ("stdcall and fastcall attributes are not compatible");
5342 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5344 error ("stdcall and thiscall attributes are not compatible");
5348 /* Can combine cdecl with regparm and sseregparm. */
5349 else if (is_attribute_p ("cdecl", name))
5351 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5353 error ("stdcall and cdecl attributes are not compatible");
5355 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5357 error ("fastcall and cdecl attributes are not compatible");
5359 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5361 error ("cdecl and thiscall attributes are not compatible");
5364 else if (is_attribute_p ("thiscall", name))
5366 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5367 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5369 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5371 error ("stdcall and thiscall attributes are not compatible");
5373 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5375 error ("fastcall and thiscall attributes are not compatible");
5377 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5379 error ("cdecl and thiscall attributes are not compatible");
5383 /* Can combine sseregparm with all attributes. */
5388 /* This function determines from TYPE the calling-convention. */
5391 ix86_get_callcvt (const_tree type)
5393 unsigned int ret = 0;
5398 return IX86_CALLCVT_CDECL;
5400 attrs = TYPE_ATTRIBUTES (type);
5401 if (attrs != NULL_TREE)
5403 if (lookup_attribute ("cdecl", attrs))
5404 ret |= IX86_CALLCVT_CDECL;
5405 else if (lookup_attribute ("stdcall", attrs))
5406 ret |= IX86_CALLCVT_STDCALL;
5407 else if (lookup_attribute ("fastcall", attrs))
5408 ret |= IX86_CALLCVT_FASTCALL;
5409 else if (lookup_attribute ("thiscall", attrs))
5410 ret |= IX86_CALLCVT_THISCALL;
5412 /* Regparam isn't allowed for thiscall and fastcall. */
5413 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5415 if (lookup_attribute ("regparm", attrs))
5416 ret |= IX86_CALLCVT_REGPARM;
5417 if (lookup_attribute ("sseregparm", attrs))
5418 ret |= IX86_CALLCVT_SSEREGPARM;
5421 if (IX86_BASE_CALLCVT(ret) != 0)
5425 is_stdarg = stdarg_p (type);
5426 if (TARGET_RTD && !is_stdarg)
5427 return IX86_CALLCVT_STDCALL | ret;
5431 || TREE_CODE (type) != METHOD_TYPE
5432 || ix86_function_type_abi (type) != MS_ABI)
5433 return IX86_CALLCVT_CDECL | ret;
5435 return IX86_CALLCVT_THISCALL;
5438 /* Return 0 if the attributes for two types are incompatible, 1 if they
5439 are compatible, and 2 if they are nearly compatible (which causes a
5440 warning to be generated). */
5443 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5445 unsigned int ccvt1, ccvt2;
5447 if (TREE_CODE (type1) != FUNCTION_TYPE
5448 && TREE_CODE (type1) != METHOD_TYPE)
5451 ccvt1 = ix86_get_callcvt (type1);
5452 ccvt2 = ix86_get_callcvt (type2);
5455 if (ix86_function_regparm (type1, NULL)
5456 != ix86_function_regparm (type2, NULL))
5462 /* Return the regparm value for a function with the indicated TYPE and DECL.
5463 DECL may be NULL when calling function indirectly
5464 or considering a libcall. */
5467 ix86_function_regparm (const_tree type, const_tree decl)
5474 return (ix86_function_type_abi (type) == SYSV_ABI
5475 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5476 ccvt = ix86_get_callcvt (type);
5477 regparm = ix86_regparm;
5479 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5481 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5484 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5488 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5490 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5493 /* Use register calling convention for local functions when possible. */
5495 && TREE_CODE (decl) == FUNCTION_DECL
5497 && !(profile_flag && !flag_fentry))
5499 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5500 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5501 if (i && i->local && i->can_change_signature)
5503 int local_regparm, globals = 0, regno;
5505 /* Make sure no regparm register is taken by a
5506 fixed register variable. */
5507 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5508 if (fixed_regs[local_regparm])
5511 /* We don't want to use regparm(3) for nested functions as
5512 these use a static chain pointer in the third argument. */
5513 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5516 /* In 32-bit mode save a register for the split stack. */
5517 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5520 /* Each fixed register usage increases register pressure,
5521 so less registers should be used for argument passing.
5522 This functionality can be overriden by an explicit
5524 for (regno = 0; regno <= DI_REG; regno++)
5525 if (fixed_regs[regno])
5529 = globals < local_regparm ? local_regparm - globals : 0;
5531 if (local_regparm > regparm)
5532 regparm = local_regparm;
5539 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5540 DFmode (2) arguments in SSE registers for a function with the
5541 indicated TYPE and DECL. DECL may be NULL when calling function
5542 indirectly or considering a libcall. Otherwise return 0. */
5545 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5547 gcc_assert (!TARGET_64BIT);
5549 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5550 by the sseregparm attribute. */
5551 if (TARGET_SSEREGPARM
5552 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5559 error ("calling %qD with attribute sseregparm without "
5560 "SSE/SSE2 enabled", decl);
5562 error ("calling %qT with attribute sseregparm without "
5563 "SSE/SSE2 enabled", type);
5571 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5572 (and DFmode for SSE2) arguments in SSE registers. */
5573 if (decl && TARGET_SSE_MATH && optimize
5574 && !(profile_flag && !flag_fentry))
5576 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5577 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5578 if (i && i->local && i->can_change_signature)
5579 return TARGET_SSE2 ? 2 : 1;
5585 /* Return true if EAX is live at the start of the function. Used by
5586 ix86_expand_prologue to determine if we need special help before
5587 calling allocate_stack_worker. */
5590 ix86_eax_live_at_start_p (void)
5592 /* Cheat. Don't bother working forward from ix86_function_regparm
5593 to the function type to whether an actual argument is located in
5594 eax. Instead just look at cfg info, which is still close enough
5595 to correct at this point. This gives false positives for broken
5596 functions that might use uninitialized data that happens to be
5597 allocated in eax, but who cares? */
5598 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5602 ix86_keep_aggregate_return_pointer (tree fntype)
5608 attr = lookup_attribute ("callee_pop_aggregate_return",
5609 TYPE_ATTRIBUTES (fntype));
5611 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5613 /* For 32-bit MS-ABI the default is to keep aggregate
5615 if (ix86_function_type_abi (fntype) == MS_ABI)
5618 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5621 /* Value is the number of bytes of arguments automatically
5622 popped when returning from a subroutine call.
5623 FUNDECL is the declaration node of the function (as a tree),
5624 FUNTYPE is the data type of the function (as a tree),
5625 or for a library call it is an identifier node for the subroutine name.
5626 SIZE is the number of bytes of arguments passed on the stack.
5628 On the 80386, the RTD insn may be used to pop them if the number
5629 of args is fixed, but if the number is variable then the caller
5630 must pop them all. RTD can't be used for library calls now
5631 because the library is compiled with the Unix compiler.
5632 Use of RTD is a selectable option, since it is incompatible with
5633 standard Unix calling sequences. If the option is not selected,
5634 the caller must always pop the args.
5636 The attribute stdcall is equivalent to RTD on a per module basis. */
5639 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5643 /* None of the 64-bit ABIs pop arguments. */
5647 ccvt = ix86_get_callcvt (funtype);
5649 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5650 | IX86_CALLCVT_THISCALL)) != 0
5651 && ! stdarg_p (funtype))
5654 /* Lose any fake structure return argument if it is passed on the stack. */
5655 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5656 && !ix86_keep_aggregate_return_pointer (funtype))
5658 int nregs = ix86_function_regparm (funtype, fundecl);
5660 return GET_MODE_SIZE (Pmode);
5666 /* Argument support functions. */
5668 /* Return true when register may be used to pass function parameters. */
5670 ix86_function_arg_regno_p (int regno)
5673 const int *parm_regs;
5678 return (regno < REGPARM_MAX
5679 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5681 return (regno < REGPARM_MAX
5682 || (TARGET_MMX && MMX_REGNO_P (regno)
5683 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5684 || (TARGET_SSE && SSE_REGNO_P (regno)
5685 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5690 if (SSE_REGNO_P (regno) && TARGET_SSE)
5695 if (TARGET_SSE && SSE_REGNO_P (regno)
5696 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5700 /* TODO: The function should depend on current function ABI but
5701 builtins.c would need updating then. Therefore we use the
5704 /* RAX is used as hidden argument to va_arg functions. */
5705 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5708 if (ix86_abi == MS_ABI)
5709 parm_regs = x86_64_ms_abi_int_parameter_registers;
5711 parm_regs = x86_64_int_parameter_registers;
5712 for (i = 0; i < (ix86_abi == MS_ABI
5713 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5714 if (regno == parm_regs[i])
5719 /* Return if we do not know how to pass TYPE solely in registers. */
5722 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5724 if (must_pass_in_stack_var_size_or_pad (mode, type))
5727 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5728 The layout_type routine is crafty and tries to trick us into passing
5729 currently unsupported vector types on the stack by using TImode. */
5730 return (!TARGET_64BIT && mode == TImode
5731 && type && TREE_CODE (type) != VECTOR_TYPE);
5734 /* It returns the size, in bytes, of the area reserved for arguments passed
5735 in registers for the function represented by fndecl dependent to the used
5738 ix86_reg_parm_stack_space (const_tree fndecl)
5740 enum calling_abi call_abi = SYSV_ABI;
5741 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5742 call_abi = ix86_function_abi (fndecl);
5744 call_abi = ix86_function_type_abi (fndecl);
5745 if (TARGET_64BIT && call_abi == MS_ABI)
5750 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5753 ix86_function_type_abi (const_tree fntype)
5755 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5757 enum calling_abi abi = ix86_abi;
5758 if (abi == SYSV_ABI)
5760 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5763 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5771 ix86_function_ms_hook_prologue (const_tree fn)
5773 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5775 if (decl_function_context (fn) != NULL_TREE)
5776 error_at (DECL_SOURCE_LOCATION (fn),
5777 "ms_hook_prologue is not compatible with nested function");
5784 static enum calling_abi
5785 ix86_function_abi (const_tree fndecl)
5789 return ix86_function_type_abi (TREE_TYPE (fndecl));
5792 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5795 ix86_cfun_abi (void)
5799 return cfun->machine->call_abi;
5802 /* Write the extra assembler code needed to declare a function properly. */
5805 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5808 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5812 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5813 unsigned int filler_cc = 0xcccccccc;
5815 for (i = 0; i < filler_count; i += 4)
5816 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5819 #ifdef SUBTARGET_ASM_UNWIND_INIT
5820 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5823 ASM_OUTPUT_LABEL (asm_out_file, fname);
5825 /* Output magic byte marker, if hot-patch attribute is set. */
5830 /* leaq [%rsp + 0], %rsp */
5831 asm_fprintf (asm_out_file, ASM_BYTE
5832 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5836 /* movl.s %edi, %edi
5838 movl.s %esp, %ebp */
5839 asm_fprintf (asm_out_file, ASM_BYTE
5840 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5846 extern void init_regs (void);
5848 /* Implementation of call abi switching target hook. Specific to FNDECL
5849 the specific call register sets are set. See also
5850 ix86_conditional_register_usage for more details. */
5852 ix86_call_abi_override (const_tree fndecl)
5854 if (fndecl == NULL_TREE)
5855 cfun->machine->call_abi = ix86_abi;
5857 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5860 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5861 expensive re-initialization of init_regs each time we switch function context
5862 since this is needed only during RTL expansion. */
5864 ix86_maybe_switch_abi (void)
5867 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5871 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5872 for a call to a function whose data type is FNTYPE.
5873 For a library call, FNTYPE is 0. */
5876 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5877 tree fntype, /* tree ptr for function decl */
5878 rtx libname, /* SYMBOL_REF of library name or 0 */
5882 struct cgraph_local_info *i;
5885 memset (cum, 0, sizeof (*cum));
5887 /* Initialize for the current callee. */
5890 cfun->machine->callee_pass_avx256_p = false;
5891 cfun->machine->callee_return_avx256_p = false;
5896 i = cgraph_local_info (fndecl);
5897 cum->call_abi = ix86_function_abi (fndecl);
5898 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5903 cum->call_abi = ix86_function_type_abi (fntype);
5905 fnret_type = TREE_TYPE (fntype);
5910 if (TARGET_VZEROUPPER && fnret_type)
5912 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5914 if (function_pass_avx256_p (fnret_value))
5916 /* The return value of this function uses 256bit AVX modes. */
5918 cfun->machine->callee_return_avx256_p = true;
5920 cfun->machine->caller_return_avx256_p = true;
5924 cum->caller = caller;
5926 /* Set up the number of registers to use for passing arguments. */
5928 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5929 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5930 "or subtarget optimization implying it");
5931 cum->nregs = ix86_regparm;
5934 cum->nregs = (cum->call_abi == SYSV_ABI
5935 ? X86_64_REGPARM_MAX
5936 : X86_64_MS_REGPARM_MAX);
5940 cum->sse_nregs = SSE_REGPARM_MAX;
5943 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5944 ? X86_64_SSE_REGPARM_MAX
5945 : X86_64_MS_SSE_REGPARM_MAX);
5949 cum->mmx_nregs = MMX_REGPARM_MAX;
5950 cum->warn_avx = true;
5951 cum->warn_sse = true;
5952 cum->warn_mmx = true;
5954 /* Because type might mismatch in between caller and callee, we need to
5955 use actual type of function for local calls.
5956 FIXME: cgraph_analyze can be told to actually record if function uses
5957 va_start so for local functions maybe_vaarg can be made aggressive
5959 FIXME: once typesytem is fixed, we won't need this code anymore. */
5960 if (i && i->local && i->can_change_signature)
5961 fntype = TREE_TYPE (fndecl);
5962 cum->maybe_vaarg = (fntype
5963 ? (!prototype_p (fntype) || stdarg_p (fntype))
5968 /* If there are variable arguments, then we won't pass anything
5969 in registers in 32-bit mode. */
5970 if (stdarg_p (fntype))
5981 /* Use ecx and edx registers if function has fastcall attribute,
5982 else look for regparm information. */
5985 unsigned int ccvt = ix86_get_callcvt (fntype);
5986 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5989 cum->fastcall = 1; /* Same first register as in fastcall. */
5991 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5997 cum->nregs = ix86_function_regparm (fntype, fndecl);
6000 /* Set up the number of SSE registers used for passing SFmode
6001 and DFmode arguments. Warn for mismatching ABI. */
6002 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6006 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6007 But in the case of vector types, it is some vector mode.
6009 When we have only some of our vector isa extensions enabled, then there
6010 are some modes for which vector_mode_supported_p is false. For these
6011 modes, the generic vector support in gcc will choose some non-vector mode
6012 in order to implement the type. By computing the natural mode, we'll
6013 select the proper ABI location for the operand and not depend on whatever
6014 the middle-end decides to do with these vector types.
6016 The midde-end can't deal with the vector types > 16 bytes. In this
6017 case, we return the original mode and warn ABI change if CUM isn't
6020 static enum machine_mode
6021 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
6023 enum machine_mode mode = TYPE_MODE (type);
6025 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6027 HOST_WIDE_INT size = int_size_in_bytes (type);
6028 if ((size == 8 || size == 16 || size == 32)
6029 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6030 && TYPE_VECTOR_SUBPARTS (type) > 1)
6032 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6034 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6035 mode = MIN_MODE_VECTOR_FLOAT;
6037 mode = MIN_MODE_VECTOR_INT;
6039 /* Get the mode which has this inner mode and number of units. */
6040 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6041 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6042 && GET_MODE_INNER (mode) == innermode)
6044 if (size == 32 && !TARGET_AVX)
6046 static bool warnedavx;
6053 warning (0, "AVX vector argument without AVX "
6054 "enabled changes the ABI");
6056 return TYPE_MODE (type);
6069 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6070 this may not agree with the mode that the type system has chosen for the
6071 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6072 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6075 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6080 if (orig_mode != BLKmode)
6081 tmp = gen_rtx_REG (orig_mode, regno);
6084 tmp = gen_rtx_REG (mode, regno);
6085 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6086 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6092 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6093 of this code is to classify each 8bytes of incoming argument by the register
6094 class and assign registers accordingly. */
6096 /* Return the union class of CLASS1 and CLASS2.
6097 See the x86-64 PS ABI for details. */
6099 static enum x86_64_reg_class
6100 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6102 /* Rule #1: If both classes are equal, this is the resulting class. */
6103 if (class1 == class2)
6106 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6108 if (class1 == X86_64_NO_CLASS)
6110 if (class2 == X86_64_NO_CLASS)
6113 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6114 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6115 return X86_64_MEMORY_CLASS;
6117 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6118 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6119 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6120 return X86_64_INTEGERSI_CLASS;
6121 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6122 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6123 return X86_64_INTEGER_CLASS;
6125 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6127 if (class1 == X86_64_X87_CLASS
6128 || class1 == X86_64_X87UP_CLASS
6129 || class1 == X86_64_COMPLEX_X87_CLASS
6130 || class2 == X86_64_X87_CLASS
6131 || class2 == X86_64_X87UP_CLASS
6132 || class2 == X86_64_COMPLEX_X87_CLASS)
6133 return X86_64_MEMORY_CLASS;
6135 /* Rule #6: Otherwise class SSE is used. */
6136 return X86_64_SSE_CLASS;
6139 /* Classify the argument of type TYPE and mode MODE.
6140 CLASSES will be filled by the register class used to pass each word
6141 of the operand. The number of words is returned. In case the parameter
6142 should be passed in memory, 0 is returned. As a special case for zero
6143 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6145 BIT_OFFSET is used internally for handling records and specifies offset
6146 of the offset in bits modulo 256 to avoid overflow cases.
6148 See the x86-64 PS ABI for details.
6152 classify_argument (enum machine_mode mode, const_tree type,
6153 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6155 HOST_WIDE_INT bytes =
6156 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6157 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6159 /* Variable sized entities are always passed/returned in memory. */
6163 if (mode != VOIDmode
6164 && targetm.calls.must_pass_in_stack (mode, type))
6167 if (type && AGGREGATE_TYPE_P (type))
6171 enum x86_64_reg_class subclasses[MAX_CLASSES];
6173 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6177 for (i = 0; i < words; i++)
6178 classes[i] = X86_64_NO_CLASS;
6180 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6181 signalize memory class, so handle it as special case. */
6184 classes[0] = X86_64_NO_CLASS;
6188 /* Classify each field of record and merge classes. */
6189 switch (TREE_CODE (type))
6192 /* And now merge the fields of structure. */
6193 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6195 if (TREE_CODE (field) == FIELD_DECL)
6199 if (TREE_TYPE (field) == error_mark_node)
6202 /* Bitfields are always classified as integer. Handle them
6203 early, since later code would consider them to be
6204 misaligned integers. */
6205 if (DECL_BIT_FIELD (field))
6207 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6208 i < ((int_bit_position (field) + (bit_offset % 64))
6209 + tree_low_cst (DECL_SIZE (field), 0)
6212 merge_classes (X86_64_INTEGER_CLASS,
6219 type = TREE_TYPE (field);
6221 /* Flexible array member is ignored. */
6222 if (TYPE_MODE (type) == BLKmode
6223 && TREE_CODE (type) == ARRAY_TYPE
6224 && TYPE_SIZE (type) == NULL_TREE
6225 && TYPE_DOMAIN (type) != NULL_TREE
6226 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6231 if (!warned && warn_psabi)
6234 inform (input_location,
6235 "the ABI of passing struct with"
6236 " a flexible array member has"
6237 " changed in GCC 4.4");
6241 num = classify_argument (TYPE_MODE (type), type,
6243 (int_bit_position (field)
6244 + bit_offset) % 256);
6247 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6248 for (i = 0; i < num && (i + pos) < words; i++)
6250 merge_classes (subclasses[i], classes[i + pos]);
6257 /* Arrays are handled as small records. */
6260 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6261 TREE_TYPE (type), subclasses, bit_offset);
6265 /* The partial classes are now full classes. */
6266 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6267 subclasses[0] = X86_64_SSE_CLASS;
6268 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6269 && !((bit_offset % 64) == 0 && bytes == 4))
6270 subclasses[0] = X86_64_INTEGER_CLASS;
6272 for (i = 0; i < words; i++)
6273 classes[i] = subclasses[i % num];
6278 case QUAL_UNION_TYPE:
6279 /* Unions are similar to RECORD_TYPE but offset is always 0.
6281 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6283 if (TREE_CODE (field) == FIELD_DECL)
6287 if (TREE_TYPE (field) == error_mark_node)
6290 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6291 TREE_TYPE (field), subclasses,
6295 for (i = 0; i < num; i++)
6296 classes[i] = merge_classes (subclasses[i], classes[i]);
6307 /* When size > 16 bytes, if the first one isn't
6308 X86_64_SSE_CLASS or any other ones aren't
6309 X86_64_SSEUP_CLASS, everything should be passed in
6311 if (classes[0] != X86_64_SSE_CLASS)
6314 for (i = 1; i < words; i++)
6315 if (classes[i] != X86_64_SSEUP_CLASS)
6319 /* Final merger cleanup. */
6320 for (i = 0; i < words; i++)
6322 /* If one class is MEMORY, everything should be passed in
6324 if (classes[i] == X86_64_MEMORY_CLASS)
6327 /* The X86_64_SSEUP_CLASS should be always preceded by
6328 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6329 if (classes[i] == X86_64_SSEUP_CLASS
6330 && classes[i - 1] != X86_64_SSE_CLASS
6331 && classes[i - 1] != X86_64_SSEUP_CLASS)
6333 /* The first one should never be X86_64_SSEUP_CLASS. */
6334 gcc_assert (i != 0);
6335 classes[i] = X86_64_SSE_CLASS;
6338 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6339 everything should be passed in memory. */
6340 if (classes[i] == X86_64_X87UP_CLASS
6341 && (classes[i - 1] != X86_64_X87_CLASS))
6345 /* The first one should never be X86_64_X87UP_CLASS. */
6346 gcc_assert (i != 0);
6347 if (!warned && warn_psabi)
6350 inform (input_location,
6351 "the ABI of passing union with long double"
6352 " has changed in GCC 4.4");
6360 /* Compute alignment needed. We align all types to natural boundaries with
6361 exception of XFmode that is aligned to 64bits. */
6362 if (mode != VOIDmode && mode != BLKmode)
6364 int mode_alignment = GET_MODE_BITSIZE (mode);
6367 mode_alignment = 128;
6368 else if (mode == XCmode)
6369 mode_alignment = 256;
6370 if (COMPLEX_MODE_P (mode))
6371 mode_alignment /= 2;
6372 /* Misaligned fields are always returned in memory. */
6373 if (bit_offset % mode_alignment)
6377 /* for V1xx modes, just use the base mode */
6378 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6379 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6380 mode = GET_MODE_INNER (mode);
6382 /* Classification of atomic types. */
6387 classes[0] = X86_64_SSE_CLASS;
6390 classes[0] = X86_64_SSE_CLASS;
6391 classes[1] = X86_64_SSEUP_CLASS;
6401 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6405 classes[0] = X86_64_INTEGERSI_CLASS;
6408 else if (size <= 64)
6410 classes[0] = X86_64_INTEGER_CLASS;
6413 else if (size <= 64+32)
6415 classes[0] = X86_64_INTEGER_CLASS;
6416 classes[1] = X86_64_INTEGERSI_CLASS;
6419 else if (size <= 64+64)
6421 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6429 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6433 /* OImode shouldn't be used directly. */
6438 if (!(bit_offset % 64))
6439 classes[0] = X86_64_SSESF_CLASS;
6441 classes[0] = X86_64_SSE_CLASS;
6444 classes[0] = X86_64_SSEDF_CLASS;
6447 classes[0] = X86_64_X87_CLASS;
6448 classes[1] = X86_64_X87UP_CLASS;
6451 classes[0] = X86_64_SSE_CLASS;
6452 classes[1] = X86_64_SSEUP_CLASS;
6455 classes[0] = X86_64_SSE_CLASS;
6456 if (!(bit_offset % 64))
6462 if (!warned && warn_psabi)
6465 inform (input_location,
6466 "the ABI of passing structure with complex float"
6467 " member has changed in GCC 4.4");
6469 classes[1] = X86_64_SSESF_CLASS;
6473 classes[0] = X86_64_SSEDF_CLASS;
6474 classes[1] = X86_64_SSEDF_CLASS;
6477 classes[0] = X86_64_COMPLEX_X87_CLASS;
6480 /* This modes is larger than 16 bytes. */
6488 classes[0] = X86_64_SSE_CLASS;
6489 classes[1] = X86_64_SSEUP_CLASS;
6490 classes[2] = X86_64_SSEUP_CLASS;
6491 classes[3] = X86_64_SSEUP_CLASS;
6499 classes[0] = X86_64_SSE_CLASS;
6500 classes[1] = X86_64_SSEUP_CLASS;
6508 classes[0] = X86_64_SSE_CLASS;
6514 gcc_assert (VECTOR_MODE_P (mode));
6519 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6521 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6522 classes[0] = X86_64_INTEGERSI_CLASS;
6524 classes[0] = X86_64_INTEGER_CLASS;
6525 classes[1] = X86_64_INTEGER_CLASS;
6526 return 1 + (bytes > 8);
6530 /* Examine the argument and return set number of register required in each
6531 class. Return 0 iff parameter should be passed in memory. */
6533 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6534 int *int_nregs, int *sse_nregs)
6536 enum x86_64_reg_class regclass[MAX_CLASSES];
6537 int n = classify_argument (mode, type, regclass, 0);
6543 for (n--; n >= 0; n--)
6544 switch (regclass[n])
6546 case X86_64_INTEGER_CLASS:
6547 case X86_64_INTEGERSI_CLASS:
6550 case X86_64_SSE_CLASS:
6551 case X86_64_SSESF_CLASS:
6552 case X86_64_SSEDF_CLASS:
6555 case X86_64_NO_CLASS:
6556 case X86_64_SSEUP_CLASS:
6558 case X86_64_X87_CLASS:
6559 case X86_64_X87UP_CLASS:
6563 case X86_64_COMPLEX_X87_CLASS:
6564 return in_return ? 2 : 0;
6565 case X86_64_MEMORY_CLASS:
6571 /* Construct container for the argument used by GCC interface. See
6572 FUNCTION_ARG for the detailed description. */
6575 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6576 const_tree type, int in_return, int nintregs, int nsseregs,
6577 const int *intreg, int sse_regno)
6579 /* The following variables hold the static issued_error state. */
6580 static bool issued_sse_arg_error;
6581 static bool issued_sse_ret_error;
6582 static bool issued_x87_ret_error;
6584 enum machine_mode tmpmode;
6586 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6587 enum x86_64_reg_class regclass[MAX_CLASSES];
6591 int needed_sseregs, needed_intregs;
6592 rtx exp[MAX_CLASSES];
6595 n = classify_argument (mode, type, regclass, 0);
6598 if (!examine_argument (mode, type, in_return, &needed_intregs,
6601 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6604 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6605 some less clueful developer tries to use floating-point anyway. */
6606 if (needed_sseregs && !TARGET_SSE)
6610 if (!issued_sse_ret_error)
6612 error ("SSE register return with SSE disabled");
6613 issued_sse_ret_error = true;
6616 else if (!issued_sse_arg_error)
6618 error ("SSE register argument with SSE disabled");
6619 issued_sse_arg_error = true;
6624 /* Likewise, error if the ABI requires us to return values in the
6625 x87 registers and the user specified -mno-80387. */
6626 if (!TARGET_80387 && in_return)
6627 for (i = 0; i < n; i++)
6628 if (regclass[i] == X86_64_X87_CLASS
6629 || regclass[i] == X86_64_X87UP_CLASS
6630 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6632 if (!issued_x87_ret_error)
6634 error ("x87 register return with x87 disabled");
6635 issued_x87_ret_error = true;
6640 /* First construct simple cases. Avoid SCmode, since we want to use
6641 single register to pass this type. */
6642 if (n == 1 && mode != SCmode)
6643 switch (regclass[0])
6645 case X86_64_INTEGER_CLASS:
6646 case X86_64_INTEGERSI_CLASS:
6647 return gen_rtx_REG (mode, intreg[0]);
6648 case X86_64_SSE_CLASS:
6649 case X86_64_SSESF_CLASS:
6650 case X86_64_SSEDF_CLASS:
6651 if (mode != BLKmode)
6652 return gen_reg_or_parallel (mode, orig_mode,
6653 SSE_REGNO (sse_regno));
6655 case X86_64_X87_CLASS:
6656 case X86_64_COMPLEX_X87_CLASS:
6657 return gen_rtx_REG (mode, FIRST_STACK_REG);
6658 case X86_64_NO_CLASS:
6659 /* Zero sized array, struct or class. */
6664 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6665 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6666 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6668 && regclass[0] == X86_64_SSE_CLASS
6669 && regclass[1] == X86_64_SSEUP_CLASS
6670 && regclass[2] == X86_64_SSEUP_CLASS
6671 && regclass[3] == X86_64_SSEUP_CLASS
6673 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6676 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6677 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6678 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6679 && regclass[1] == X86_64_INTEGER_CLASS
6680 && (mode == CDImode || mode == TImode || mode == TFmode)
6681 && intreg[0] + 1 == intreg[1])
6682 return gen_rtx_REG (mode, intreg[0]);
6684 /* Otherwise figure out the entries of the PARALLEL. */
6685 for (i = 0; i < n; i++)
6689 switch (regclass[i])
6691 case X86_64_NO_CLASS:
6693 case X86_64_INTEGER_CLASS:
6694 case X86_64_INTEGERSI_CLASS:
6695 /* Merge TImodes on aligned occasions here too. */
6696 if (i * 8 + 8 > bytes)
6697 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6698 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6702 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6703 if (tmpmode == BLKmode)
6705 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6706 gen_rtx_REG (tmpmode, *intreg),
6710 case X86_64_SSESF_CLASS:
6711 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6712 gen_rtx_REG (SFmode,
6713 SSE_REGNO (sse_regno)),
6717 case X86_64_SSEDF_CLASS:
6718 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6719 gen_rtx_REG (DFmode,
6720 SSE_REGNO (sse_regno)),
6724 case X86_64_SSE_CLASS:
6732 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6742 && regclass[1] == X86_64_SSEUP_CLASS
6743 && regclass[2] == X86_64_SSEUP_CLASS
6744 && regclass[3] == X86_64_SSEUP_CLASS);
6751 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6752 gen_rtx_REG (tmpmode,
6753 SSE_REGNO (sse_regno)),
6762 /* Empty aligned struct, union or class. */
6766 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6767 for (i = 0; i < nexps; i++)
6768 XVECEXP (ret, 0, i) = exp [i];
6772 /* Update the data in CUM to advance over an argument of mode MODE
6773 and data type TYPE. (TYPE is null for libcalls where that information
6774 may not be available.) */
6777 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6778 const_tree type, HOST_WIDE_INT bytes,
6779 HOST_WIDE_INT words)
6795 cum->words += words;
6796 cum->nregs -= words;
6797 cum->regno += words;
6799 if (cum->nregs <= 0)
6807 /* OImode shouldn't be used directly. */
6811 if (cum->float_in_sse < 2)
6814 if (cum->float_in_sse < 1)
6831 if (!type || !AGGREGATE_TYPE_P (type))
6833 cum->sse_words += words;
6834 cum->sse_nregs -= 1;
6835 cum->sse_regno += 1;
6836 if (cum->sse_nregs <= 0)
6850 if (!type || !AGGREGATE_TYPE_P (type))
6852 cum->mmx_words += words;
6853 cum->mmx_nregs -= 1;
6854 cum->mmx_regno += 1;
6855 if (cum->mmx_nregs <= 0)
6866 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6867 const_tree type, HOST_WIDE_INT words, bool named)
6869 int int_nregs, sse_nregs;
6871 /* Unnamed 256bit vector mode parameters are passed on stack. */
6872 if (!named && VALID_AVX256_REG_MODE (mode))
6875 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6876 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6878 cum->nregs -= int_nregs;
6879 cum->sse_nregs -= sse_nregs;
6880 cum->regno += int_nregs;
6881 cum->sse_regno += sse_nregs;
6885 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6886 cum->words = (cum->words + align - 1) & ~(align - 1);
6887 cum->words += words;
6892 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6893 HOST_WIDE_INT words)
6895 /* Otherwise, this should be passed indirect. */
6896 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6898 cum->words += words;
6906 /* Update the data in CUM to advance over an argument of mode MODE and
6907 data type TYPE. (TYPE is null for libcalls where that information
6908 may not be available.) */
6911 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6912 const_tree type, bool named)
6914 HOST_WIDE_INT bytes, words;
6916 if (mode == BLKmode)
6917 bytes = int_size_in_bytes (type);
6919 bytes = GET_MODE_SIZE (mode);
6920 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6923 mode = type_natural_mode (type, NULL);
6925 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6926 function_arg_advance_ms_64 (cum, bytes, words);
6927 else if (TARGET_64BIT)
6928 function_arg_advance_64 (cum, mode, type, words, named);
6930 function_arg_advance_32 (cum, mode, type, bytes, words);
6933 /* Define where to put the arguments to a function.
6934 Value is zero to push the argument on the stack,
6935 or a hard register in which to store the argument.
6937 MODE is the argument's machine mode.
6938 TYPE is the data type of the argument (as a tree).
6939 This is null for libcalls where that information may
6941 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6942 the preceding args and about the function being called.
6943 NAMED is nonzero if this argument is a named parameter
6944 (otherwise it is an extra parameter matching an ellipsis). */
6947 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6948 enum machine_mode orig_mode, const_tree type,
6949 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6951 static bool warnedsse, warnedmmx;
6953 /* Avoid the AL settings for the Unix64 ABI. */
6954 if (mode == VOIDmode)
6970 if (words <= cum->nregs)
6972 int regno = cum->regno;
6974 /* Fastcall allocates the first two DWORD (SImode) or
6975 smaller arguments to ECX and EDX if it isn't an
6981 || (type && AGGREGATE_TYPE_P (type)))
6984 /* ECX not EAX is the first allocated register. */
6985 if (regno == AX_REG)
6988 return gen_rtx_REG (mode, regno);
6993 if (cum->float_in_sse < 2)
6996 if (cum->float_in_sse < 1)
7000 /* In 32bit, we pass TImode in xmm registers. */
7007 if (!type || !AGGREGATE_TYPE_P (type))
7009 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
7012 warning (0, "SSE vector argument without SSE enabled "
7016 return gen_reg_or_parallel (mode, orig_mode,
7017 cum->sse_regno + FIRST_SSE_REG);
7022 /* OImode shouldn't be used directly. */
7031 if (!type || !AGGREGATE_TYPE_P (type))
7034 return gen_reg_or_parallel (mode, orig_mode,
7035 cum->sse_regno + FIRST_SSE_REG);
7045 if (!type || !AGGREGATE_TYPE_P (type))
7047 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
7050 warning (0, "MMX vector argument without MMX enabled "
7054 return gen_reg_or_parallel (mode, orig_mode,
7055 cum->mmx_regno + FIRST_MMX_REG);
7064 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7065 enum machine_mode orig_mode, const_tree type, bool named)
7067 /* Handle a hidden AL argument containing number of registers
7068 for varargs x86-64 functions. */
7069 if (mode == VOIDmode)
7070 return GEN_INT (cum->maybe_vaarg
7071 ? (cum->sse_nregs < 0
7072 ? X86_64_SSE_REGPARM_MAX
7087 /* Unnamed 256bit vector mode parameters are passed on stack. */
7093 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7095 &x86_64_int_parameter_registers [cum->regno],
7100 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7101 enum machine_mode orig_mode, bool named,
7102 HOST_WIDE_INT bytes)
7106 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7107 We use value of -2 to specify that current function call is MSABI. */
7108 if (mode == VOIDmode)
7109 return GEN_INT (-2);
7111 /* If we've run out of registers, it goes on the stack. */
7112 if (cum->nregs == 0)
7115 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7117 /* Only floating point modes are passed in anything but integer regs. */
7118 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7121 regno = cum->regno + FIRST_SSE_REG;
7126 /* Unnamed floating parameters are passed in both the
7127 SSE and integer registers. */
7128 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7129 t2 = gen_rtx_REG (mode, regno);
7130 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7131 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7132 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7135 /* Handle aggregated types passed in register. */
7136 if (orig_mode == BLKmode)
7138 if (bytes > 0 && bytes <= 8)
7139 mode = (bytes > 4 ? DImode : SImode);
7140 if (mode == BLKmode)
7144 return gen_reg_or_parallel (mode, orig_mode, regno);
7147 /* Return where to put the arguments to a function.
7148 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7150 MODE is the argument's machine mode. TYPE is the data type of the
7151 argument. It is null for libcalls where that information may not be
7152 available. CUM gives information about the preceding args and about
7153 the function being called. NAMED is nonzero if this argument is a
7154 named parameter (otherwise it is an extra parameter matching an
7158 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
7159 const_tree type, bool named)
7161 enum machine_mode mode = omode;
7162 HOST_WIDE_INT bytes, words;
7165 if (mode == BLKmode)
7166 bytes = int_size_in_bytes (type);
7168 bytes = GET_MODE_SIZE (mode);
7169 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7171 /* To simplify the code below, represent vector types with a vector mode
7172 even if MMX/SSE are not active. */
7173 if (type && TREE_CODE (type) == VECTOR_TYPE)
7174 mode = type_natural_mode (type, cum);
7176 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7177 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7178 else if (TARGET_64BIT)
7179 arg = function_arg_64 (cum, mode, omode, type, named);
7181 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7183 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
7185 /* This argument uses 256bit AVX modes. */
7187 cfun->machine->callee_pass_avx256_p = true;
7189 cfun->machine->caller_pass_avx256_p = true;
7195 /* A C expression that indicates when an argument must be passed by
7196 reference. If nonzero for an argument, a copy of that argument is
7197 made in memory and a pointer to the argument is passed instead of
7198 the argument itself. The pointer is passed in whatever way is
7199 appropriate for passing a pointer to that type. */
7202 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
7203 enum machine_mode mode ATTRIBUTE_UNUSED,
7204 const_tree type, bool named ATTRIBUTE_UNUSED)
7206 /* See Windows x64 Software Convention. */
7207 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7209 int msize = (int) GET_MODE_SIZE (mode);
7212 /* Arrays are passed by reference. */
7213 if (TREE_CODE (type) == ARRAY_TYPE)
7216 if (AGGREGATE_TYPE_P (type))
7218 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7219 are passed by reference. */
7220 msize = int_size_in_bytes (type);
7224 /* __m128 is passed by reference. */
7226 case 1: case 2: case 4: case 8:
7232 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7238 /* Return true when TYPE should be 128bit aligned for 32bit argument
7239 passing ABI. XXX: This function is obsolete and is only used for
7240 checking psABI compatibility with previous versions of GCC. */
7243 ix86_compat_aligned_value_p (const_tree type)
7245 enum machine_mode mode = TYPE_MODE (type);
7246 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7250 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7252 if (TYPE_ALIGN (type) < 128)
7255 if (AGGREGATE_TYPE_P (type))
7257 /* Walk the aggregates recursively. */
7258 switch (TREE_CODE (type))
7262 case QUAL_UNION_TYPE:
7266 /* Walk all the structure fields. */
7267 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7269 if (TREE_CODE (field) == FIELD_DECL
7270 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7277 /* Just for use if some languages passes arrays by value. */
7278 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7289 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7290 XXX: This function is obsolete and is only used for checking psABI
7291 compatibility with previous versions of GCC. */
7294 ix86_compat_function_arg_boundary (enum machine_mode mode,
7295 const_tree type, unsigned int align)
7297 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7298 natural boundaries. */
7299 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7301 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7302 make an exception for SSE modes since these require 128bit
7305 The handling here differs from field_alignment. ICC aligns MMX
7306 arguments to 4 byte boundaries, while structure fields are aligned
7307 to 8 byte boundaries. */
7310 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7311 align = PARM_BOUNDARY;
7315 if (!ix86_compat_aligned_value_p (type))
7316 align = PARM_BOUNDARY;
7319 if (align > BIGGEST_ALIGNMENT)
7320 align = BIGGEST_ALIGNMENT;
7324 /* Return true when TYPE should be 128bit aligned for 32bit argument
7328 ix86_contains_aligned_value_p (const_tree type)
7330 enum machine_mode mode = TYPE_MODE (type);
7332 if (mode == XFmode || mode == XCmode)
7335 if (TYPE_ALIGN (type) < 128)
7338 if (AGGREGATE_TYPE_P (type))
7340 /* Walk the aggregates recursively. */
7341 switch (TREE_CODE (type))
7345 case QUAL_UNION_TYPE:
7349 /* Walk all the structure fields. */
7350 for (field = TYPE_FIELDS (type);
7352 field = DECL_CHAIN (field))
7354 if (TREE_CODE (field) == FIELD_DECL
7355 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7362 /* Just for use if some languages passes arrays by value. */
7363 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7372 return TYPE_ALIGN (type) >= 128;
7377 /* Gives the alignment boundary, in bits, of an argument with the
7378 specified mode and type. */
7381 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7386 /* Since the main variant type is used for call, we convert it to
7387 the main variant type. */
7388 type = TYPE_MAIN_VARIANT (type);
7389 align = TYPE_ALIGN (type);
7392 align = GET_MODE_ALIGNMENT (mode);
7393 if (align < PARM_BOUNDARY)
7394 align = PARM_BOUNDARY;
7398 unsigned int saved_align = align;
7402 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7405 if (mode == XFmode || mode == XCmode)
7406 align = PARM_BOUNDARY;
7408 else if (!ix86_contains_aligned_value_p (type))
7409 align = PARM_BOUNDARY;
7412 align = PARM_BOUNDARY;
7417 && align != ix86_compat_function_arg_boundary (mode, type,
7421 inform (input_location,
7422 "The ABI for passing parameters with %d-byte"
7423 " alignment has changed in GCC 4.6",
7424 align / BITS_PER_UNIT);
7431 /* Return true if N is a possible register number of function value. */
7434 ix86_function_value_regno_p (const unsigned int regno)
7441 case FIRST_FLOAT_REG:
7442 /* TODO: The function should depend on current function ABI but
7443 builtins.c would need updating then. Therefore we use the
7445 if (TARGET_64BIT && ix86_abi == MS_ABI)
7447 return TARGET_FLOAT_RETURNS_IN_80387;
7453 if (TARGET_MACHO || TARGET_64BIT)
7461 /* Define how to find the value returned by a function.
7462 VALTYPE is the data type of the value (as a tree).
7463 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7464 otherwise, FUNC is 0. */
7467 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7468 const_tree fntype, const_tree fn)
7472 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7473 we normally prevent this case when mmx is not available. However
7474 some ABIs may require the result to be returned like DImode. */
7475 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7476 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7478 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7479 we prevent this case when sse is not available. However some ABIs
7480 may require the result to be returned like integer TImode. */
7481 else if (mode == TImode
7482 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7483 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7485 /* 32-byte vector modes in %ymm0. */
7486 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7487 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7489 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7490 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7491 regno = FIRST_FLOAT_REG;
7493 /* Most things go in %eax. */
7496 /* Override FP return register with %xmm0 for local functions when
7497 SSE math is enabled or for functions with sseregparm attribute. */
7498 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7500 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7501 if ((sse_level >= 1 && mode == SFmode)
7502 || (sse_level == 2 && mode == DFmode))
7503 regno = FIRST_SSE_REG;
7506 /* OImode shouldn't be used directly. */
7507 gcc_assert (mode != OImode);
7509 return gen_rtx_REG (orig_mode, regno);
7513 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7518 /* Handle libcalls, which don't provide a type node. */
7519 if (valtype == NULL)
7531 return gen_rtx_REG (mode, FIRST_SSE_REG);
7534 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7538 return gen_rtx_REG (mode, AX_REG);
7542 ret = construct_container (mode, orig_mode, valtype, 1,
7543 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7544 x86_64_int_return_registers, 0);
7546 /* For zero sized structures, construct_container returns NULL, but we
7547 need to keep rest of compiler happy by returning meaningful value. */
7549 ret = gen_rtx_REG (orig_mode, AX_REG);
7555 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7557 unsigned int regno = AX_REG;
7561 switch (GET_MODE_SIZE (mode))
7564 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7565 && !COMPLEX_MODE_P (mode))
7566 regno = FIRST_SSE_REG;
7570 if (mode == SFmode || mode == DFmode)
7571 regno = FIRST_SSE_REG;
7577 return gen_rtx_REG (orig_mode, regno);
7581 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7582 enum machine_mode orig_mode, enum machine_mode mode)
7584 const_tree fn, fntype;
7587 if (fntype_or_decl && DECL_P (fntype_or_decl))
7588 fn = fntype_or_decl;
7589 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7591 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7592 return function_value_ms_64 (orig_mode, mode);
7593 else if (TARGET_64BIT)
7594 return function_value_64 (orig_mode, mode, valtype);
7596 return function_value_32 (orig_mode, mode, fntype, fn);
7600 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7601 bool outgoing ATTRIBUTE_UNUSED)
7603 enum machine_mode mode, orig_mode;
7605 orig_mode = TYPE_MODE (valtype);
7606 mode = type_natural_mode (valtype, NULL);
7607 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7611 ix86_libcall_value (enum machine_mode mode)
7613 return ix86_function_value_1 (NULL, NULL, mode, mode);
7616 /* Return true iff type is returned in memory. */
7618 static bool ATTRIBUTE_UNUSED
7619 return_in_memory_32 (const_tree type, enum machine_mode mode)
7623 if (mode == BLKmode)
7626 size = int_size_in_bytes (type);
7628 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7631 if (VECTOR_MODE_P (mode) || mode == TImode)
7633 /* User-created vectors small enough to fit in EAX. */
7637 /* MMX/3dNow values are returned in MM0,
7638 except when it doesn't exits or the ABI prescribes otherwise. */
7640 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7642 /* SSE values are returned in XMM0, except when it doesn't exist. */
7646 /* AVX values are returned in YMM0, except when it doesn't exist. */
7657 /* OImode shouldn't be used directly. */
7658 gcc_assert (mode != OImode);
7663 static bool ATTRIBUTE_UNUSED
7664 return_in_memory_64 (const_tree type, enum machine_mode mode)
7666 int needed_intregs, needed_sseregs;
7667 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7670 static bool ATTRIBUTE_UNUSED
7671 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7673 HOST_WIDE_INT size = int_size_in_bytes (type);
7675 /* __m128 is returned in xmm0. */
7676 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7677 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7680 /* Otherwise, the size must be exactly in [1248]. */
7681 return size != 1 && size != 2 && size != 4 && size != 8;
7685 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7687 #ifdef SUBTARGET_RETURN_IN_MEMORY
7688 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7690 const enum machine_mode mode = type_natural_mode (type, NULL);
7694 if (ix86_function_type_abi (fntype) == MS_ABI)
7695 return return_in_memory_ms_64 (type, mode);
7697 return return_in_memory_64 (type, mode);
7700 return return_in_memory_32 (type, mode);
7704 /* When returning SSE vector types, we have a choice of either
7705 (1) being abi incompatible with a -march switch, or
7706 (2) generating an error.
7707 Given no good solution, I think the safest thing is one warning.
7708 The user won't be able to use -Werror, but....
7710 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7711 called in response to actually generating a caller or callee that
7712 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7713 via aggregate_value_p for general type probing from tree-ssa. */
7716 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7718 static bool warnedsse, warnedmmx;
7720 if (!TARGET_64BIT && type)
7722 /* Look at the return type of the function, not the function type. */
7723 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7725 if (!TARGET_SSE && !warnedsse)
7728 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7731 warning (0, "SSE vector return without SSE enabled "
7736 if (!TARGET_MMX && !warnedmmx)
7738 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7741 warning (0, "MMX vector return without MMX enabled "
7751 /* Create the va_list data type. */
7753 /* Returns the calling convention specific va_list date type.
7754 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7757 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7759 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7761 /* For i386 we use plain pointer to argument area. */
7762 if (!TARGET_64BIT || abi == MS_ABI)
7763 return build_pointer_type (char_type_node);
7765 record = lang_hooks.types.make_type (RECORD_TYPE);
7766 type_decl = build_decl (BUILTINS_LOCATION,
7767 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7769 f_gpr = build_decl (BUILTINS_LOCATION,
7770 FIELD_DECL, get_identifier ("gp_offset"),
7771 unsigned_type_node);
7772 f_fpr = build_decl (BUILTINS_LOCATION,
7773 FIELD_DECL, get_identifier ("fp_offset"),
7774 unsigned_type_node);
7775 f_ovf = build_decl (BUILTINS_LOCATION,
7776 FIELD_DECL, get_identifier ("overflow_arg_area"),
7778 f_sav = build_decl (BUILTINS_LOCATION,
7779 FIELD_DECL, get_identifier ("reg_save_area"),
7782 va_list_gpr_counter_field = f_gpr;
7783 va_list_fpr_counter_field = f_fpr;
7785 DECL_FIELD_CONTEXT (f_gpr) = record;
7786 DECL_FIELD_CONTEXT (f_fpr) = record;
7787 DECL_FIELD_CONTEXT (f_ovf) = record;
7788 DECL_FIELD_CONTEXT (f_sav) = record;
7790 TYPE_STUB_DECL (record) = type_decl;
7791 TYPE_NAME (record) = type_decl;
7792 TYPE_FIELDS (record) = f_gpr;
7793 DECL_CHAIN (f_gpr) = f_fpr;
7794 DECL_CHAIN (f_fpr) = f_ovf;
7795 DECL_CHAIN (f_ovf) = f_sav;
7797 layout_type (record);
7799 /* The correct type is an array type of one element. */
7800 return build_array_type (record, build_index_type (size_zero_node));
7803 /* Setup the builtin va_list data type and for 64-bit the additional
7804 calling convention specific va_list data types. */
7807 ix86_build_builtin_va_list (void)
7809 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7811 /* Initialize abi specific va_list builtin types. */
7815 if (ix86_abi == MS_ABI)
7817 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7818 if (TREE_CODE (t) != RECORD_TYPE)
7819 t = build_variant_type_copy (t);
7820 sysv_va_list_type_node = t;
7825 if (TREE_CODE (t) != RECORD_TYPE)
7826 t = build_variant_type_copy (t);
7827 sysv_va_list_type_node = t;
7829 if (ix86_abi != MS_ABI)
7831 t = ix86_build_builtin_va_list_abi (MS_ABI);
7832 if (TREE_CODE (t) != RECORD_TYPE)
7833 t = build_variant_type_copy (t);
7834 ms_va_list_type_node = t;
7839 if (TREE_CODE (t) != RECORD_TYPE)
7840 t = build_variant_type_copy (t);
7841 ms_va_list_type_node = t;
7848 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7851 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7857 /* GPR size of varargs save area. */
7858 if (cfun->va_list_gpr_size)
7859 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7861 ix86_varargs_gpr_size = 0;
7863 /* FPR size of varargs save area. We don't need it if we don't pass
7864 anything in SSE registers. */
7865 if (TARGET_SSE && cfun->va_list_fpr_size)
7866 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7868 ix86_varargs_fpr_size = 0;
7870 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7873 save_area = frame_pointer_rtx;
7874 set = get_varargs_alias_set ();
7876 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7877 if (max > X86_64_REGPARM_MAX)
7878 max = X86_64_REGPARM_MAX;
7880 for (i = cum->regno; i < max; i++)
7882 mem = gen_rtx_MEM (Pmode,
7883 plus_constant (save_area, i * UNITS_PER_WORD));
7884 MEM_NOTRAP_P (mem) = 1;
7885 set_mem_alias_set (mem, set);
7886 emit_move_insn (mem, gen_rtx_REG (Pmode,
7887 x86_64_int_parameter_registers[i]));
7890 if (ix86_varargs_fpr_size)
7892 enum machine_mode smode;
7895 /* Now emit code to save SSE registers. The AX parameter contains number
7896 of SSE parameter registers used to call this function, though all we
7897 actually check here is the zero/non-zero status. */
7899 label = gen_label_rtx ();
7900 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7901 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7904 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7905 we used movdqa (i.e. TImode) instead? Perhaps even better would
7906 be if we could determine the real mode of the data, via a hook
7907 into pass_stdarg. Ignore all that for now. */
7909 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7910 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7912 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7913 if (max > X86_64_SSE_REGPARM_MAX)
7914 max = X86_64_SSE_REGPARM_MAX;
7916 for (i = cum->sse_regno; i < max; ++i)
7918 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7919 mem = gen_rtx_MEM (smode, mem);
7920 MEM_NOTRAP_P (mem) = 1;
7921 set_mem_alias_set (mem, set);
7922 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7924 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7932 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7934 alias_set_type set = get_varargs_alias_set ();
7937 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7941 mem = gen_rtx_MEM (Pmode,
7942 plus_constant (virtual_incoming_args_rtx,
7943 i * UNITS_PER_WORD));
7944 MEM_NOTRAP_P (mem) = 1;
7945 set_mem_alias_set (mem, set);
7947 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7948 emit_move_insn (mem, reg);
7953 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7954 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7957 CUMULATIVE_ARGS next_cum;
7960 /* This argument doesn't appear to be used anymore. Which is good,
7961 because the old code here didn't suppress rtl generation. */
7962 gcc_assert (!no_rtl);
7967 fntype = TREE_TYPE (current_function_decl);
7969 /* For varargs, we do not want to skip the dummy va_dcl argument.
7970 For stdargs, we do want to skip the last named argument. */
7972 if (stdarg_p (fntype))
7973 ix86_function_arg_advance (&next_cum, mode, type, true);
7975 if (cum->call_abi == MS_ABI)
7976 setup_incoming_varargs_ms_64 (&next_cum);
7978 setup_incoming_varargs_64 (&next_cum);
7981 /* Checks if TYPE is of kind va_list char *. */
7984 is_va_list_char_pointer (tree type)
7988 /* For 32-bit it is always true. */
7991 canonic = ix86_canonical_va_list_type (type);
7992 return (canonic == ms_va_list_type_node
7993 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7996 /* Implement va_start. */
7999 ix86_va_start (tree valist, rtx nextarg)
8001 HOST_WIDE_INT words, n_gpr, n_fpr;
8002 tree f_gpr, f_fpr, f_ovf, f_sav;
8003 tree gpr, fpr, ovf, sav, t;
8007 if (flag_split_stack
8008 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8010 unsigned int scratch_regno;
8012 /* When we are splitting the stack, we can't refer to the stack
8013 arguments using internal_arg_pointer, because they may be on
8014 the old stack. The split stack prologue will arrange to
8015 leave a pointer to the old stack arguments in a scratch
8016 register, which we here copy to a pseudo-register. The split
8017 stack prologue can't set the pseudo-register directly because
8018 it (the prologue) runs before any registers have been saved. */
8020 scratch_regno = split_stack_prologue_scratch_regno ();
8021 if (scratch_regno != INVALID_REGNUM)
8025 reg = gen_reg_rtx (Pmode);
8026 cfun->machine->split_stack_varargs_pointer = reg;
8029 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8033 push_topmost_sequence ();
8034 emit_insn_after (seq, entry_of_function ());
8035 pop_topmost_sequence ();
8039 /* Only 64bit target needs something special. */
8040 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8042 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8043 std_expand_builtin_va_start (valist, nextarg);
8048 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8049 next = expand_binop (ptr_mode, add_optab,
8050 cfun->machine->split_stack_varargs_pointer,
8051 crtl->args.arg_offset_rtx,
8052 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8053 convert_move (va_r, next, 0);
8058 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8059 f_fpr = DECL_CHAIN (f_gpr);
8060 f_ovf = DECL_CHAIN (f_fpr);
8061 f_sav = DECL_CHAIN (f_ovf);
8063 valist = build_simple_mem_ref (valist);
8064 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8065 /* The following should be folded into the MEM_REF offset. */
8066 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8068 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8070 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8072 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8075 /* Count number of gp and fp argument registers used. */
8076 words = crtl->args.info.words;
8077 n_gpr = crtl->args.info.regno;
8078 n_fpr = crtl->args.info.sse_regno;
8080 if (cfun->va_list_gpr_size)
8082 type = TREE_TYPE (gpr);
8083 t = build2 (MODIFY_EXPR, type,
8084 gpr, build_int_cst (type, n_gpr * 8));
8085 TREE_SIDE_EFFECTS (t) = 1;
8086 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8089 if (TARGET_SSE && cfun->va_list_fpr_size)
8091 type = TREE_TYPE (fpr);
8092 t = build2 (MODIFY_EXPR, type, fpr,
8093 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8094 TREE_SIDE_EFFECTS (t) = 1;
8095 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8098 /* Find the overflow area. */
8099 type = TREE_TYPE (ovf);
8100 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8101 ovf_rtx = crtl->args.internal_arg_pointer;
8103 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8104 t = make_tree (type, ovf_rtx);
8106 t = build2 (POINTER_PLUS_EXPR, type, t,
8107 size_int (words * UNITS_PER_WORD));
8108 t = build2 (MODIFY_EXPR, type, ovf, t);
8109 TREE_SIDE_EFFECTS (t) = 1;
8110 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8112 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8114 /* Find the register save area.
8115 Prologue of the function save it right above stack frame. */
8116 type = TREE_TYPE (sav);
8117 t = make_tree (type, frame_pointer_rtx);
8118 if (!ix86_varargs_gpr_size)
8119 t = build2 (POINTER_PLUS_EXPR, type, t,
8120 size_int (-8 * X86_64_REGPARM_MAX));
8121 t = build2 (MODIFY_EXPR, type, sav, t);
8122 TREE_SIDE_EFFECTS (t) = 1;
8123 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8127 /* Implement va_arg. */
8130 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8133 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8134 tree f_gpr, f_fpr, f_ovf, f_sav;
8135 tree gpr, fpr, ovf, sav, t;
8137 tree lab_false, lab_over = NULL_TREE;
8142 enum machine_mode nat_mode;
8143 unsigned int arg_boundary;
8145 /* Only 64bit target needs something special. */
8146 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8147 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8149 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8150 f_fpr = DECL_CHAIN (f_gpr);
8151 f_ovf = DECL_CHAIN (f_fpr);
8152 f_sav = DECL_CHAIN (f_ovf);
8154 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8155 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8156 valist = build_va_arg_indirect_ref (valist);
8157 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8158 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8159 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8161 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8163 type = build_pointer_type (type);
8164 size = int_size_in_bytes (type);
8165 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8167 nat_mode = type_natural_mode (type, NULL);
8176 /* Unnamed 256bit vector mode parameters are passed on stack. */
8177 if (!TARGET_64BIT_MS_ABI)
8184 container = construct_container (nat_mode, TYPE_MODE (type),
8185 type, 0, X86_64_REGPARM_MAX,
8186 X86_64_SSE_REGPARM_MAX, intreg,
8191 /* Pull the value out of the saved registers. */
8193 addr = create_tmp_var (ptr_type_node, "addr");
8197 int needed_intregs, needed_sseregs;
8199 tree int_addr, sse_addr;
8201 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8202 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8204 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8206 need_temp = (!REG_P (container)
8207 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8208 || TYPE_ALIGN (type) > 128));
8210 /* In case we are passing structure, verify that it is consecutive block
8211 on the register save area. If not we need to do moves. */
8212 if (!need_temp && !REG_P (container))
8214 /* Verify that all registers are strictly consecutive */
8215 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8219 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8221 rtx slot = XVECEXP (container, 0, i);
8222 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8223 || INTVAL (XEXP (slot, 1)) != i * 16)
8231 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8233 rtx slot = XVECEXP (container, 0, i);
8234 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8235 || INTVAL (XEXP (slot, 1)) != i * 8)
8247 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8248 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8251 /* First ensure that we fit completely in registers. */
8254 t = build_int_cst (TREE_TYPE (gpr),
8255 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8256 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8257 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8258 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8259 gimplify_and_add (t, pre_p);
8263 t = build_int_cst (TREE_TYPE (fpr),
8264 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8265 + X86_64_REGPARM_MAX * 8);
8266 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8267 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8268 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8269 gimplify_and_add (t, pre_p);
8272 /* Compute index to start of area used for integer regs. */
8275 /* int_addr = gpr + sav; */
8276 t = fold_convert (sizetype, gpr);
8277 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8278 gimplify_assign (int_addr, t, pre_p);
8282 /* sse_addr = fpr + sav; */
8283 t = fold_convert (sizetype, fpr);
8284 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8285 gimplify_assign (sse_addr, t, pre_p);
8289 int i, prev_size = 0;
8290 tree temp = create_tmp_var (type, "va_arg_tmp");
8293 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8294 gimplify_assign (addr, t, pre_p);
8296 for (i = 0; i < XVECLEN (container, 0); i++)
8298 rtx slot = XVECEXP (container, 0, i);
8299 rtx reg = XEXP (slot, 0);
8300 enum machine_mode mode = GET_MODE (reg);
8306 tree dest_addr, dest;
8307 int cur_size = GET_MODE_SIZE (mode);
8309 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8310 prev_size = INTVAL (XEXP (slot, 1));
8311 if (prev_size + cur_size > size)
8313 cur_size = size - prev_size;
8314 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8315 if (mode == BLKmode)
8318 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8319 if (mode == GET_MODE (reg))
8320 addr_type = build_pointer_type (piece_type);
8322 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8324 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8327 if (SSE_REGNO_P (REGNO (reg)))
8329 src_addr = sse_addr;
8330 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8334 src_addr = int_addr;
8335 src_offset = REGNO (reg) * 8;
8337 src_addr = fold_convert (addr_type, src_addr);
8338 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
8339 size_int (src_offset));
8341 dest_addr = fold_convert (daddr_type, addr);
8342 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
8343 size_int (prev_size));
8344 if (cur_size == GET_MODE_SIZE (mode))
8346 src = build_va_arg_indirect_ref (src_addr);
8347 dest = build_va_arg_indirect_ref (dest_addr);
8349 gimplify_assign (dest, src, pre_p);
8354 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
8355 3, dest_addr, src_addr,
8356 size_int (cur_size));
8357 gimplify_and_add (copy, pre_p);
8359 prev_size += cur_size;
8365 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8366 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8367 gimplify_assign (gpr, t, pre_p);
8372 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8373 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8374 gimplify_assign (fpr, t, pre_p);
8377 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8379 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8382 /* ... otherwise out of the overflow area. */
8384 /* When we align parameter on stack for caller, if the parameter
8385 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8386 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8387 here with caller. */
8388 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8389 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8390 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8392 /* Care for on-stack alignment if needed. */
8393 if (arg_boundary <= 64 || size == 0)
8397 HOST_WIDE_INT align = arg_boundary / 8;
8398 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
8399 size_int (align - 1));
8400 t = fold_convert (sizetype, t);
8401 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8403 t = fold_convert (TREE_TYPE (ovf), t);
8406 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8407 gimplify_assign (addr, t, pre_p);
8409 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
8410 size_int (rsize * UNITS_PER_WORD));
8411 gimplify_assign (unshare_expr (ovf), t, pre_p);
8414 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8416 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8417 addr = fold_convert (ptrtype, addr);
8420 addr = build_va_arg_indirect_ref (addr);
8421 return build_va_arg_indirect_ref (addr);
8424 /* Return true if OPNUM's MEM should be matched
8425 in movabs* patterns. */
8428 ix86_check_movabs (rtx insn, int opnum)
8432 set = PATTERN (insn);
8433 if (GET_CODE (set) == PARALLEL)
8434 set = XVECEXP (set, 0, 0);
8435 gcc_assert (GET_CODE (set) == SET);
8436 mem = XEXP (set, opnum);
8437 while (GET_CODE (mem) == SUBREG)
8438 mem = SUBREG_REG (mem);
8439 gcc_assert (MEM_P (mem));
8440 return volatile_ok || !MEM_VOLATILE_P (mem);
8443 /* Initialize the table of extra 80387 mathematical constants. */
8446 init_ext_80387_constants (void)
8448 static const char * cst[5] =
8450 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8451 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8452 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8453 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8454 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8458 for (i = 0; i < 5; i++)
8460 real_from_string (&ext_80387_constants_table[i], cst[i]);
8461 /* Ensure each constant is rounded to XFmode precision. */
8462 real_convert (&ext_80387_constants_table[i],
8463 XFmode, &ext_80387_constants_table[i]);
8466 ext_80387_constants_init = 1;
8469 /* Return non-zero if the constant is something that
8470 can be loaded with a special instruction. */
8473 standard_80387_constant_p (rtx x)
8475 enum machine_mode mode = GET_MODE (x);
8479 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8482 if (x == CONST0_RTX (mode))
8484 if (x == CONST1_RTX (mode))
8487 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8489 /* For XFmode constants, try to find a special 80387 instruction when
8490 optimizing for size or on those CPUs that benefit from them. */
8492 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8496 if (! ext_80387_constants_init)
8497 init_ext_80387_constants ();
8499 for (i = 0; i < 5; i++)
8500 if (real_identical (&r, &ext_80387_constants_table[i]))
8504 /* Load of the constant -0.0 or -1.0 will be split as
8505 fldz;fchs or fld1;fchs sequence. */
8506 if (real_isnegzero (&r))
8508 if (real_identical (&r, &dconstm1))
8514 /* Return the opcode of the special instruction to be used to load
8518 standard_80387_constant_opcode (rtx x)
8520 switch (standard_80387_constant_p (x))
8544 /* Return the CONST_DOUBLE representing the 80387 constant that is
8545 loaded by the specified special instruction. The argument IDX
8546 matches the return value from standard_80387_constant_p. */
8549 standard_80387_constant_rtx (int idx)
8553 if (! ext_80387_constants_init)
8554 init_ext_80387_constants ();
8570 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8574 /* Return 1 if X is all 0s and 2 if x is all 1s
8575 in supported SSE vector mode. */
8578 standard_sse_constant_p (rtx x)
8580 enum machine_mode mode = GET_MODE (x);
8582 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8584 if (vector_all_ones_operand (x, mode))
8600 /* Return the opcode of the special instruction to be used to load
8604 standard_sse_constant_opcode (rtx insn, rtx x)
8606 switch (standard_sse_constant_p (x))
8609 switch (get_attr_mode (insn))
8612 return "%vxorps\t%0, %d0";
8614 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8615 return "%vxorps\t%0, %d0";
8617 return "%vxorpd\t%0, %d0";
8619 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8620 return "%vxorps\t%0, %d0";
8622 return "%vpxor\t%0, %d0";
8624 return "vxorps\t%x0, %x0, %x0";
8626 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8627 return "vxorps\t%x0, %x0, %x0";
8629 return "vxorpd\t%x0, %x0, %x0";
8631 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8632 return "vxorps\t%x0, %x0, %x0";
8634 return "vpxor\t%x0, %x0, %x0";
8639 return "%vpcmpeqd\t%0, %d0";
8646 /* Returns true if OP contains a symbol reference */
8649 symbolic_reference_mentioned_p (rtx op)
8654 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8657 fmt = GET_RTX_FORMAT (GET_CODE (op));
8658 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8664 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8665 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8669 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8676 /* Return true if it is appropriate to emit `ret' instructions in the
8677 body of a function. Do this only if the epilogue is simple, needing a
8678 couple of insns. Prior to reloading, we can't tell how many registers
8679 must be saved, so return false then. Return false if there is no frame
8680 marker to de-allocate. */
8683 ix86_can_use_return_insn_p (void)
8685 struct ix86_frame frame;
8687 if (! reload_completed || frame_pointer_needed)
8690 /* Don't allow more than 32k pop, since that's all we can do
8691 with one instruction. */
8692 if (crtl->args.pops_args && crtl->args.size >= 32768)
8695 ix86_compute_frame_layout (&frame);
8696 return (frame.stack_pointer_offset == UNITS_PER_WORD
8697 && (frame.nregs + frame.nsseregs) == 0);
8700 /* Value should be nonzero if functions must have frame pointers.
8701 Zero means the frame pointer need not be set up (and parms may
8702 be accessed via the stack pointer) in functions that seem suitable. */
8705 ix86_frame_pointer_required (void)
8707 /* If we accessed previous frames, then the generated code expects
8708 to be able to access the saved ebp value in our frame. */
8709 if (cfun->machine->accesses_prev_frame)
8712 /* Several x86 os'es need a frame pointer for other reasons,
8713 usually pertaining to setjmp. */
8714 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8717 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8718 turns off the frame pointer by default. Turn it back on now if
8719 we've not got a leaf function. */
8720 if (TARGET_OMIT_LEAF_FRAME_POINTER
8721 && (!current_function_is_leaf
8722 || ix86_current_function_calls_tls_descriptor))
8725 if (crtl->profile && !flag_fentry)
8731 /* Record that the current function accesses previous call frames. */
8734 ix86_setup_frame_addresses (void)
8736 cfun->machine->accesses_prev_frame = 1;
8739 #ifndef USE_HIDDEN_LINKONCE
8740 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
8741 # define USE_HIDDEN_LINKONCE 1
8743 # define USE_HIDDEN_LINKONCE 0
8747 static int pic_labels_used;
8749 /* Fills in the label name that should be used for a pc thunk for
8750 the given register. */
8753 get_pc_thunk_name (char name[32], unsigned int regno)
8755 gcc_assert (!TARGET_64BIT);
8757 if (USE_HIDDEN_LINKONCE)
8758 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8760 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8764 /* This function generates code for -fpic that loads %ebx with
8765 the return address of the caller and then returns. */
8768 ix86_code_end (void)
8773 #ifdef TARGET_SOLARIS
8774 solaris_code_end ();
8777 for (regno = AX_REG; regno <= SP_REG; regno++)
8782 if (!(pic_labels_used & (1 << regno)))
8785 get_pc_thunk_name (name, regno);
8787 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8788 get_identifier (name),
8789 build_function_type_list (void_type_node, NULL_TREE));
8790 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8791 NULL_TREE, void_type_node);
8792 TREE_PUBLIC (decl) = 1;
8793 TREE_STATIC (decl) = 1;
8798 switch_to_section (darwin_sections[text_coal_section]);
8799 fputs ("\t.weak_definition\t", asm_out_file);
8800 assemble_name (asm_out_file, name);
8801 fputs ("\n\t.private_extern\t", asm_out_file);
8802 assemble_name (asm_out_file, name);
8803 putc ('\n', asm_out_file);
8804 ASM_OUTPUT_LABEL (asm_out_file, name);
8805 DECL_WEAK (decl) = 1;
8809 if (USE_HIDDEN_LINKONCE)
8811 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8813 targetm.asm_out.unique_section (decl, 0);
8814 switch_to_section (get_named_section (decl, NULL, 0));
8816 targetm.asm_out.globalize_label (asm_out_file, name);
8817 fputs ("\t.hidden\t", asm_out_file);
8818 assemble_name (asm_out_file, name);
8819 putc ('\n', asm_out_file);
8820 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8824 switch_to_section (text_section);
8825 ASM_OUTPUT_LABEL (asm_out_file, name);
8828 DECL_INITIAL (decl) = make_node (BLOCK);
8829 current_function_decl = decl;
8830 init_function_start (decl);
8831 first_function_block_is_cold = false;
8832 /* Make sure unwind info is emitted for the thunk if needed. */
8833 final_start_function (emit_barrier (), asm_out_file, 1);
8835 /* Pad stack IP move with 4 instructions (two NOPs count
8836 as one instruction). */
8837 if (TARGET_PAD_SHORT_FUNCTION)
8842 fputs ("\tnop\n", asm_out_file);
8845 xops[0] = gen_rtx_REG (Pmode, regno);
8846 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8847 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8848 fputs ("\tret\n", asm_out_file);
8849 final_end_function ();
8850 init_insn_lengths ();
8851 free_after_compilation (cfun);
8853 current_function_decl = NULL;
8856 if (flag_split_stack)
8857 file_end_indicate_split_stack ();
8860 /* Emit code for the SET_GOT patterns. */
8863 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8869 if (TARGET_VXWORKS_RTP && flag_pic)
8871 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8872 xops[2] = gen_rtx_MEM (Pmode,
8873 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8874 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8876 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8877 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8878 an unadorned address. */
8879 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8880 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8881 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8885 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8887 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8889 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8892 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8895 output_asm_insn ("call\t%a2", xops);
8896 #ifdef DWARF2_UNWIND_INFO
8897 /* The call to next label acts as a push. */
8898 if (dwarf2out_do_frame ())
8902 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8903 gen_rtx_PLUS (Pmode,
8906 RTX_FRAME_RELATED_P (insn) = 1;
8907 dwarf2out_frame_debug (insn, true);
8914 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8915 is what will be referenced by the Mach-O PIC subsystem. */
8917 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8920 targetm.asm_out.internal_label (asm_out_file, "L",
8921 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8925 output_asm_insn ("pop%z0\t%0", xops);
8926 #ifdef DWARF2_UNWIND_INFO
8927 /* The pop is a pop and clobbers dest, but doesn't restore it
8928 for unwind info purposes. */
8929 if (dwarf2out_do_frame ())
8933 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8934 dwarf2out_frame_debug (insn, true);
8935 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8936 gen_rtx_PLUS (Pmode,
8939 RTX_FRAME_RELATED_P (insn) = 1;
8940 dwarf2out_frame_debug (insn, true);
8949 get_pc_thunk_name (name, REGNO (dest));
8950 pic_labels_used |= 1 << REGNO (dest);
8952 #ifdef DWARF2_UNWIND_INFO
8953 /* Ensure all queued register saves are flushed before the
8955 if (dwarf2out_do_frame ())
8956 dwarf2out_flush_queued_reg_saves ();
8958 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8959 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8960 output_asm_insn ("call\t%X2", xops);
8961 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8962 is what will be referenced by the Mach-O PIC subsystem. */
8965 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8967 targetm.asm_out.internal_label (asm_out_file, "L",
8968 CODE_LABEL_NUMBER (label));
8975 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8976 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8978 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8983 /* Generate an "push" pattern for input ARG. */
8988 struct machine_function *m = cfun->machine;
8990 if (m->fs.cfa_reg == stack_pointer_rtx)
8991 m->fs.cfa_offset += UNITS_PER_WORD;
8992 m->fs.sp_offset += UNITS_PER_WORD;
8994 return gen_rtx_SET (VOIDmode,
8996 gen_rtx_PRE_DEC (Pmode,
8997 stack_pointer_rtx)),
9001 /* Generate an "pop" pattern for input ARG. */
9006 return gen_rtx_SET (VOIDmode,
9009 gen_rtx_POST_INC (Pmode,
9010 stack_pointer_rtx)));
9013 /* Return >= 0 if there is an unused call-clobbered register available
9014 for the entire function. */
9017 ix86_select_alt_pic_regnum (void)
9019 if (current_function_is_leaf
9021 && !ix86_current_function_calls_tls_descriptor)
9024 /* Can't use the same register for both PIC and DRAP. */
9026 drap = REGNO (crtl->drap_reg);
9029 for (i = 2; i >= 0; --i)
9030 if (i != drap && !df_regs_ever_live_p (i))
9034 return INVALID_REGNUM;
9037 /* Return TRUE if we need to save REGNO. */
9040 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9042 if (pic_offset_table_rtx
9043 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9044 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9046 || crtl->calls_eh_return
9047 || crtl->uses_const_pool))
9048 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9050 if (crtl->calls_eh_return && maybe_eh_return)
9055 unsigned test = EH_RETURN_DATA_REGNO (i);
9056 if (test == INVALID_REGNUM)
9063 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9066 return (df_regs_ever_live_p (regno)
9067 && !call_used_regs[regno]
9068 && !fixed_regs[regno]
9069 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9072 /* Return number of saved general prupose registers. */
9075 ix86_nsaved_regs (void)
9080 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9081 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9086 /* Return number of saved SSE registrers. */
9089 ix86_nsaved_sseregs (void)
9094 if (!TARGET_64BIT_MS_ABI)
9096 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9097 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9102 /* Given FROM and TO register numbers, say whether this elimination is
9103 allowed. If stack alignment is needed, we can only replace argument
9104 pointer with hard frame pointer, or replace frame pointer with stack
9105 pointer. Otherwise, frame pointer elimination is automatically
9106 handled and all other eliminations are valid. */
9109 ix86_can_eliminate (const int from, const int to)
9111 if (stack_realign_fp)
9112 return ((from == ARG_POINTER_REGNUM
9113 && to == HARD_FRAME_POINTER_REGNUM)
9114 || (from == FRAME_POINTER_REGNUM
9115 && to == STACK_POINTER_REGNUM));
9117 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9120 /* Return the offset between two registers, one to be eliminated, and the other
9121 its replacement, at the start of a routine. */
9124 ix86_initial_elimination_offset (int from, int to)
9126 struct ix86_frame frame;
9127 ix86_compute_frame_layout (&frame);
9129 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9130 return frame.hard_frame_pointer_offset;
9131 else if (from == FRAME_POINTER_REGNUM
9132 && to == HARD_FRAME_POINTER_REGNUM)
9133 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9136 gcc_assert (to == STACK_POINTER_REGNUM);
9138 if (from == ARG_POINTER_REGNUM)
9139 return frame.stack_pointer_offset;
9141 gcc_assert (from == FRAME_POINTER_REGNUM);
9142 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9146 /* In a dynamically-aligned function, we can't know the offset from
9147 stack pointer to frame pointer, so we must ensure that setjmp
9148 eliminates fp against the hard fp (%ebp) rather than trying to
9149 index from %esp up to the top of the frame across a gap that is
9150 of unknown (at compile-time) size. */
9152 ix86_builtin_setjmp_frame_value (void)
9154 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9157 /* On the x86 -fsplit-stack and -fstack-protector both use the same
9158 field in the TCB, so they can not be used together. */
9161 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED,
9162 struct gcc_options *opts ATTRIBUTE_UNUSED)
9166 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
9168 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
9171 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
9174 error ("%<-fsplit-stack%> requires "
9175 "assembler support for CFI directives");
9183 /* When using -fsplit-stack, the allocation routines set a field in
9184 the TCB to the bottom of the stack plus this much space, measured
9187 #define SPLIT_STACK_AVAILABLE 256
9189 /* Fill structure ix86_frame about frame of currently computed function. */
9192 ix86_compute_frame_layout (struct ix86_frame *frame)
9194 unsigned int stack_alignment_needed;
9195 HOST_WIDE_INT offset;
9196 unsigned int preferred_alignment;
9197 HOST_WIDE_INT size = get_frame_size ();
9198 HOST_WIDE_INT to_allocate;
9200 frame->nregs = ix86_nsaved_regs ();
9201 frame->nsseregs = ix86_nsaved_sseregs ();
9203 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9204 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9206 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9207 function prologues and leaf. */
9208 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9209 && (!current_function_is_leaf || cfun->calls_alloca != 0
9210 || ix86_current_function_calls_tls_descriptor))
9212 preferred_alignment = 16;
9213 stack_alignment_needed = 16;
9214 crtl->preferred_stack_boundary = 128;
9215 crtl->stack_alignment_needed = 128;
9218 gcc_assert (!size || stack_alignment_needed);
9219 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9220 gcc_assert (preferred_alignment <= stack_alignment_needed);
9222 /* For SEH we have to limit the amount of code movement into the prologue.
9223 At present we do this via a BLOCKAGE, at which point there's very little
9224 scheduling that can be done, which means that there's very little point
9225 in doing anything except PUSHs. */
9227 cfun->machine->use_fast_prologue_epilogue = false;
9229 /* During reload iteration the amount of registers saved can change.
9230 Recompute the value as needed. Do not recompute when amount of registers
9231 didn't change as reload does multiple calls to the function and does not
9232 expect the decision to change within single iteration. */
9233 else if (!optimize_function_for_size_p (cfun)
9234 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9236 int count = frame->nregs;
9237 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9239 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9241 /* The fast prologue uses move instead of push to save registers. This
9242 is significantly longer, but also executes faster as modern hardware
9243 can execute the moves in parallel, but can't do that for push/pop.
9245 Be careful about choosing what prologue to emit: When function takes
9246 many instructions to execute we may use slow version as well as in
9247 case function is known to be outside hot spot (this is known with
9248 feedback only). Weight the size of function by number of registers
9249 to save as it is cheap to use one or two push instructions but very
9250 slow to use many of them. */
9252 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9253 if (node->frequency < NODE_FREQUENCY_NORMAL
9254 || (flag_branch_probabilities
9255 && node->frequency < NODE_FREQUENCY_HOT))
9256 cfun->machine->use_fast_prologue_epilogue = false;
9258 cfun->machine->use_fast_prologue_epilogue
9259 = !expensive_function_p (count);
9261 if (TARGET_PROLOGUE_USING_MOVE
9262 && cfun->machine->use_fast_prologue_epilogue)
9263 frame->save_regs_using_mov = true;
9265 frame->save_regs_using_mov = false;
9267 /* If static stack checking is enabled and done with probes, the registers
9268 need to be saved before allocating the frame. */
9269 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9270 frame->save_regs_using_mov = false;
9272 /* Skip return address. */
9273 offset = UNITS_PER_WORD;
9275 /* Skip pushed static chain. */
9276 if (ix86_static_chain_on_stack)
9277 offset += UNITS_PER_WORD;
9279 /* Skip saved base pointer. */
9280 if (frame_pointer_needed)
9281 offset += UNITS_PER_WORD;
9282 frame->hfp_save_offset = offset;
9284 /* The traditional frame pointer location is at the top of the frame. */
9285 frame->hard_frame_pointer_offset = offset;
9287 /* Register save area */
9288 offset += frame->nregs * UNITS_PER_WORD;
9289 frame->reg_save_offset = offset;
9291 /* Align and set SSE register save area. */
9292 if (frame->nsseregs)
9294 /* The only ABI that has saved SSE registers (Win64) also has a
9295 16-byte aligned default stack, and thus we don't need to be
9296 within the re-aligned local stack frame to save them. */
9297 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9298 offset = (offset + 16 - 1) & -16;
9299 offset += frame->nsseregs * 16;
9301 frame->sse_reg_save_offset = offset;
9303 /* The re-aligned stack starts here. Values before this point are not
9304 directly comparable with values below this point. In order to make
9305 sure that no value happens to be the same before and after, force
9306 the alignment computation below to add a non-zero value. */
9307 if (stack_realign_fp)
9308 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9311 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9312 offset += frame->va_arg_size;
9314 /* Align start of frame for local function. */
9315 if (stack_realign_fp
9316 || offset != frame->sse_reg_save_offset
9318 || !current_function_is_leaf
9319 || cfun->calls_alloca
9320 || ix86_current_function_calls_tls_descriptor)
9321 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9323 /* Frame pointer points here. */
9324 frame->frame_pointer_offset = offset;
9328 /* Add outgoing arguments area. Can be skipped if we eliminated
9329 all the function calls as dead code.
9330 Skipping is however impossible when function calls alloca. Alloca
9331 expander assumes that last crtl->outgoing_args_size
9332 of stack frame are unused. */
9333 if (ACCUMULATE_OUTGOING_ARGS
9334 && (!current_function_is_leaf || cfun->calls_alloca
9335 || ix86_current_function_calls_tls_descriptor))
9337 offset += crtl->outgoing_args_size;
9338 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9341 frame->outgoing_arguments_size = 0;
9343 /* Align stack boundary. Only needed if we're calling another function
9345 if (!current_function_is_leaf || cfun->calls_alloca
9346 || ix86_current_function_calls_tls_descriptor)
9347 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9349 /* We've reached end of stack frame. */
9350 frame->stack_pointer_offset = offset;
9352 /* Size prologue needs to allocate. */
9353 to_allocate = offset - frame->sse_reg_save_offset;
9355 if ((!to_allocate && frame->nregs <= 1)
9356 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9357 frame->save_regs_using_mov = false;
9359 if (ix86_using_red_zone ()
9360 && current_function_sp_is_unchanging
9361 && current_function_is_leaf
9362 && !ix86_current_function_calls_tls_descriptor)
9364 frame->red_zone_size = to_allocate;
9365 if (frame->save_regs_using_mov)
9366 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9367 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9368 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9371 frame->red_zone_size = 0;
9372 frame->stack_pointer_offset -= frame->red_zone_size;
9374 /* The SEH frame pointer location is near the bottom of the frame.
9375 This is enforced by the fact that the difference between the
9376 stack pointer and the frame pointer is limited to 240 bytes in
9377 the unwind data structure. */
9382 /* If we can leave the frame pointer where it is, do so. */
9383 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9384 if (diff > 240 || (diff & 15) != 0)
9386 /* Ideally we'd determine what portion of the local stack frame
9387 (within the constraint of the lowest 240) is most heavily used.
9388 But without that complication, simply bias the frame pointer
9389 by 128 bytes so as to maximize the amount of the local stack
9390 frame that is addressable with 8-bit offsets. */
9391 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9396 /* This is semi-inlined memory_address_length, but simplified
9397 since we know that we're always dealing with reg+offset, and
9398 to avoid having to create and discard all that rtl. */
9401 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9407 /* EBP and R13 cannot be encoded without an offset. */
9408 len = (regno == BP_REG || regno == R13_REG);
9410 else if (IN_RANGE (offset, -128, 127))
9413 /* ESP and R12 must be encoded with a SIB byte. */
9414 if (regno == SP_REG || regno == R12_REG)
9420 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9421 The valid base registers are taken from CFUN->MACHINE->FS. */
9424 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9426 const struct machine_function *m = cfun->machine;
9427 rtx base_reg = NULL;
9428 HOST_WIDE_INT base_offset = 0;
9430 if (m->use_fast_prologue_epilogue)
9432 /* Choose the base register most likely to allow the most scheduling
9433 opportunities. Generally FP is valid througout the function,
9434 while DRAP must be reloaded within the epilogue. But choose either
9435 over the SP due to increased encoding size. */
9439 base_reg = hard_frame_pointer_rtx;
9440 base_offset = m->fs.fp_offset - cfa_offset;
9442 else if (m->fs.drap_valid)
9444 base_reg = crtl->drap_reg;
9445 base_offset = 0 - cfa_offset;
9447 else if (m->fs.sp_valid)
9449 base_reg = stack_pointer_rtx;
9450 base_offset = m->fs.sp_offset - cfa_offset;
9455 HOST_WIDE_INT toffset;
9458 /* Choose the base register with the smallest address encoding.
9459 With a tie, choose FP > DRAP > SP. */
9462 base_reg = stack_pointer_rtx;
9463 base_offset = m->fs.sp_offset - cfa_offset;
9464 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9466 if (m->fs.drap_valid)
9468 toffset = 0 - cfa_offset;
9469 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9472 base_reg = crtl->drap_reg;
9473 base_offset = toffset;
9479 toffset = m->fs.fp_offset - cfa_offset;
9480 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9483 base_reg = hard_frame_pointer_rtx;
9484 base_offset = toffset;
9489 gcc_assert (base_reg != NULL);
9491 return plus_constant (base_reg, base_offset);
9494 /* Emit code to save registers in the prologue. */
9497 ix86_emit_save_regs (void)
9502 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9503 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9505 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9506 RTX_FRAME_RELATED_P (insn) = 1;
9510 /* Emit a single register save at CFA - CFA_OFFSET. */
9513 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9514 HOST_WIDE_INT cfa_offset)
9516 struct machine_function *m = cfun->machine;
9517 rtx reg = gen_rtx_REG (mode, regno);
9518 rtx mem, addr, base, insn;
9520 addr = choose_baseaddr (cfa_offset);
9521 mem = gen_frame_mem (mode, addr);
9523 /* For SSE saves, we need to indicate the 128-bit alignment. */
9524 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9526 insn = emit_move_insn (mem, reg);
9527 RTX_FRAME_RELATED_P (insn) = 1;
9530 if (GET_CODE (base) == PLUS)
9531 base = XEXP (base, 0);
9532 gcc_checking_assert (REG_P (base));
9534 /* When saving registers into a re-aligned local stack frame, avoid
9535 any tricky guessing by dwarf2out. */
9536 if (m->fs.realigned)
9538 gcc_checking_assert (stack_realign_drap);
9540 if (regno == REGNO (crtl->drap_reg))
9542 /* A bit of a hack. We force the DRAP register to be saved in
9543 the re-aligned stack frame, which provides us with a copy
9544 of the CFA that will last past the prologue. Install it. */
9545 gcc_checking_assert (cfun->machine->fs.fp_valid);
9546 addr = plus_constant (hard_frame_pointer_rtx,
9547 cfun->machine->fs.fp_offset - cfa_offset);
9548 mem = gen_rtx_MEM (mode, addr);
9549 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9553 /* The frame pointer is a stable reference within the
9554 aligned frame. Use it. */
9555 gcc_checking_assert (cfun->machine->fs.fp_valid);
9556 addr = plus_constant (hard_frame_pointer_rtx,
9557 cfun->machine->fs.fp_offset - cfa_offset);
9558 mem = gen_rtx_MEM (mode, addr);
9559 add_reg_note (insn, REG_CFA_EXPRESSION,
9560 gen_rtx_SET (VOIDmode, mem, reg));
9564 /* The memory may not be relative to the current CFA register,
9565 which means that we may need to generate a new pattern for
9566 use by the unwind info. */
9567 else if (base != m->fs.cfa_reg)
9569 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9570 mem = gen_rtx_MEM (mode, addr);
9571 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9575 /* Emit code to save registers using MOV insns.
9576 First register is stored at CFA - CFA_OFFSET. */
9578 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9582 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9583 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9585 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9586 cfa_offset -= UNITS_PER_WORD;
9590 /* Emit code to save SSE registers using MOV insns.
9591 First register is stored at CFA - CFA_OFFSET. */
9593 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9597 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9598 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9600 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9605 static GTY(()) rtx queued_cfa_restores;
9607 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9608 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9609 Don't add the note if the previously saved value will be left untouched
9610 within stack red-zone till return, as unwinders can find the same value
9611 in the register and on the stack. */
9614 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9616 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9621 add_reg_note (insn, REG_CFA_RESTORE, reg);
9622 RTX_FRAME_RELATED_P (insn) = 1;
9626 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9629 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9632 ix86_add_queued_cfa_restore_notes (rtx insn)
9635 if (!queued_cfa_restores)
9637 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9639 XEXP (last, 1) = REG_NOTES (insn);
9640 REG_NOTES (insn) = queued_cfa_restores;
9641 queued_cfa_restores = NULL_RTX;
9642 RTX_FRAME_RELATED_P (insn) = 1;
9645 /* Expand prologue or epilogue stack adjustment.
9646 The pattern exist to put a dependency on all ebp-based memory accesses.
9647 STYLE should be negative if instructions should be marked as frame related,
9648 zero if %r11 register is live and cannot be freely used and positive
9652 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9653 int style, bool set_cfa)
9655 struct machine_function *m = cfun->machine;
9657 bool add_frame_related_expr = false;
9660 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9661 else if (x86_64_immediate_operand (offset, DImode))
9662 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9666 /* r11 is used by indirect sibcall return as well, set before the
9667 epilogue and used after the epilogue. */
9669 tmp = gen_rtx_REG (DImode, R11_REG);
9672 gcc_assert (src != hard_frame_pointer_rtx
9673 && dest != hard_frame_pointer_rtx);
9674 tmp = hard_frame_pointer_rtx;
9676 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9678 add_frame_related_expr = true;
9680 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9683 insn = emit_insn (insn);
9685 ix86_add_queued_cfa_restore_notes (insn);
9691 gcc_assert (m->fs.cfa_reg == src);
9692 m->fs.cfa_offset += INTVAL (offset);
9693 m->fs.cfa_reg = dest;
9695 r = gen_rtx_PLUS (Pmode, src, offset);
9696 r = gen_rtx_SET (VOIDmode, dest, r);
9697 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9698 RTX_FRAME_RELATED_P (insn) = 1;
9702 RTX_FRAME_RELATED_P (insn) = 1;
9703 if (add_frame_related_expr)
9705 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9706 r = gen_rtx_SET (VOIDmode, dest, r);
9707 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9711 if (dest == stack_pointer_rtx)
9713 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9714 bool valid = m->fs.sp_valid;
9716 if (src == hard_frame_pointer_rtx)
9718 valid = m->fs.fp_valid;
9719 ooffset = m->fs.fp_offset;
9721 else if (src == crtl->drap_reg)
9723 valid = m->fs.drap_valid;
9728 /* Else there are two possibilities: SP itself, which we set
9729 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9730 taken care of this by hand along the eh_return path. */
9731 gcc_checking_assert (src == stack_pointer_rtx
9732 || offset == const0_rtx);
9735 m->fs.sp_offset = ooffset - INTVAL (offset);
9736 m->fs.sp_valid = valid;
9740 /* Find an available register to be used as dynamic realign argument
9741 pointer regsiter. Such a register will be written in prologue and
9742 used in begin of body, so it must not be
9743 1. parameter passing register.
9745 We reuse static-chain register if it is available. Otherwise, we
9746 use DI for i386 and R13 for x86-64. We chose R13 since it has
9749 Return: the regno of chosen register. */
9752 find_drap_reg (void)
9754 tree decl = cfun->decl;
9758 /* Use R13 for nested function or function need static chain.
9759 Since function with tail call may use any caller-saved
9760 registers in epilogue, DRAP must not use caller-saved
9761 register in such case. */
9762 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9769 /* Use DI for nested function or function need static chain.
9770 Since function with tail call may use any caller-saved
9771 registers in epilogue, DRAP must not use caller-saved
9772 register in such case. */
9773 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9776 /* Reuse static chain register if it isn't used for parameter
9778 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9780 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9781 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9788 /* Return minimum incoming stack alignment. */
9791 ix86_minimum_incoming_stack_boundary (bool sibcall)
9793 unsigned int incoming_stack_boundary;
9795 /* Prefer the one specified at command line. */
9796 if (ix86_user_incoming_stack_boundary)
9797 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9798 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9799 if -mstackrealign is used, it isn't used for sibcall check and
9800 estimated stack alignment is 128bit. */
9803 && ix86_force_align_arg_pointer
9804 && crtl->stack_alignment_estimated == 128)
9805 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9807 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9809 /* Incoming stack alignment can be changed on individual functions
9810 via force_align_arg_pointer attribute. We use the smallest
9811 incoming stack boundary. */
9812 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9813 && lookup_attribute (ix86_force_align_arg_pointer_string,
9814 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9815 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9817 /* The incoming stack frame has to be aligned at least at
9818 parm_stack_boundary. */
9819 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9820 incoming_stack_boundary = crtl->parm_stack_boundary;
9822 /* Stack at entrance of main is aligned by runtime. We use the
9823 smallest incoming stack boundary. */
9824 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9825 && DECL_NAME (current_function_decl)
9826 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9827 && DECL_FILE_SCOPE_P (current_function_decl))
9828 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9830 return incoming_stack_boundary;
9833 /* Update incoming stack boundary and estimated stack alignment. */
9836 ix86_update_stack_boundary (void)
9838 ix86_incoming_stack_boundary
9839 = ix86_minimum_incoming_stack_boundary (false);
9841 /* x86_64 vararg needs 16byte stack alignment for register save
9845 && crtl->stack_alignment_estimated < 128)
9846 crtl->stack_alignment_estimated = 128;
9849 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9850 needed or an rtx for DRAP otherwise. */
9853 ix86_get_drap_rtx (void)
9855 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9856 crtl->need_drap = true;
9858 if (stack_realign_drap)
9860 /* Assign DRAP to vDRAP and returns vDRAP */
9861 unsigned int regno = find_drap_reg ();
9866 arg_ptr = gen_rtx_REG (Pmode, regno);
9867 crtl->drap_reg = arg_ptr;
9870 drap_vreg = copy_to_reg (arg_ptr);
9874 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9877 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9878 RTX_FRAME_RELATED_P (insn) = 1;
9886 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9889 ix86_internal_arg_pointer (void)
9891 return virtual_incoming_args_rtx;
9894 struct scratch_reg {
9899 /* Return a short-lived scratch register for use on function entry.
9900 In 32-bit mode, it is valid only after the registers are saved
9901 in the prologue. This register must be released by means of
9902 release_scratch_register_on_entry once it is dead. */
9905 get_scratch_register_on_entry (struct scratch_reg *sr)
9913 /* We always use R11 in 64-bit mode. */
9918 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9920 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9921 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9922 int regparm = ix86_function_regparm (fntype, decl);
9924 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9926 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9927 for the static chain register. */
9928 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9929 && drap_regno != AX_REG)
9931 else if (regparm < 2 && drap_regno != DX_REG)
9933 /* ecx is the static chain register. */
9934 else if (regparm < 3 && !fastcall_p && !static_chain_p
9935 && drap_regno != CX_REG)
9937 else if (ix86_save_reg (BX_REG, true))
9939 /* esi is the static chain register. */
9940 else if (!(regparm == 3 && static_chain_p)
9941 && ix86_save_reg (SI_REG, true))
9943 else if (ix86_save_reg (DI_REG, true))
9947 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9952 sr->reg = gen_rtx_REG (Pmode, regno);
9955 rtx insn = emit_insn (gen_push (sr->reg));
9956 RTX_FRAME_RELATED_P (insn) = 1;
9960 /* Release a scratch register obtained from the preceding function. */
9963 release_scratch_register_on_entry (struct scratch_reg *sr)
9967 rtx x, insn = emit_insn (gen_pop (sr->reg));
9969 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9970 RTX_FRAME_RELATED_P (insn) = 1;
9971 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9972 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9973 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9977 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9979 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9982 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9984 /* We skip the probe for the first interval + a small dope of 4 words and
9985 probe that many bytes past the specified size to maintain a protection
9986 area at the botton of the stack. */
9987 const int dope = 4 * UNITS_PER_WORD;
9988 rtx size_rtx = GEN_INT (size), last;
9990 /* See if we have a constant small number of probes to generate. If so,
9991 that's the easy case. The run-time loop is made up of 11 insns in the
9992 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9993 for n # of intervals. */
9994 if (size <= 5 * PROBE_INTERVAL)
9996 HOST_WIDE_INT i, adjust;
9997 bool first_probe = true;
9999 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10000 values of N from 1 until it exceeds SIZE. If only one probe is
10001 needed, this will not generate any code. Then adjust and probe
10002 to PROBE_INTERVAL + SIZE. */
10003 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10007 adjust = 2 * PROBE_INTERVAL + dope;
10008 first_probe = false;
10011 adjust = PROBE_INTERVAL;
10013 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10014 plus_constant (stack_pointer_rtx, -adjust)));
10015 emit_stack_probe (stack_pointer_rtx);
10019 adjust = size + PROBE_INTERVAL + dope;
10021 adjust = size + PROBE_INTERVAL - i;
10023 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10024 plus_constant (stack_pointer_rtx, -adjust)));
10025 emit_stack_probe (stack_pointer_rtx);
10027 /* Adjust back to account for the additional first interval. */
10028 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10029 plus_constant (stack_pointer_rtx,
10030 PROBE_INTERVAL + dope)));
10033 /* Otherwise, do the same as above, but in a loop. Note that we must be
10034 extra careful with variables wrapping around because we might be at
10035 the very top (or the very bottom) of the address space and we have
10036 to be able to handle this case properly; in particular, we use an
10037 equality test for the loop condition. */
10040 HOST_WIDE_INT rounded_size;
10041 struct scratch_reg sr;
10043 get_scratch_register_on_entry (&sr);
10046 /* Step 1: round SIZE to the previous multiple of the interval. */
10048 rounded_size = size & -PROBE_INTERVAL;
10051 /* Step 2: compute initial and final value of the loop counter. */
10053 /* SP = SP_0 + PROBE_INTERVAL. */
10054 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10055 plus_constant (stack_pointer_rtx,
10056 - (PROBE_INTERVAL + dope))));
10058 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10059 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10060 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10061 gen_rtx_PLUS (Pmode, sr.reg,
10062 stack_pointer_rtx)));
10065 /* Step 3: the loop
10067 while (SP != LAST_ADDR)
10069 SP = SP + PROBE_INTERVAL
10073 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10074 values of N from 1 until it is equal to ROUNDED_SIZE. */
10076 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10079 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10080 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10082 if (size != rounded_size)
10084 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10085 plus_constant (stack_pointer_rtx,
10086 rounded_size - size)));
10087 emit_stack_probe (stack_pointer_rtx);
10090 /* Adjust back to account for the additional first interval. */
10091 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10092 plus_constant (stack_pointer_rtx,
10093 PROBE_INTERVAL + dope)));
10095 release_scratch_register_on_entry (&sr);
10098 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10100 /* Even if the stack pointer isn't the CFA register, we need to correctly
10101 describe the adjustments made to it, in particular differentiate the
10102 frame-related ones from the frame-unrelated ones. */
10105 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10106 XVECEXP (expr, 0, 0)
10107 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10108 plus_constant (stack_pointer_rtx, -size));
10109 XVECEXP (expr, 0, 1)
10110 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10111 plus_constant (stack_pointer_rtx,
10112 PROBE_INTERVAL + dope + size));
10113 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10114 RTX_FRAME_RELATED_P (last) = 1;
10116 cfun->machine->fs.sp_offset += size;
10119 /* Make sure nothing is scheduled before we are done. */
10120 emit_insn (gen_blockage ());
10123 /* Adjust the stack pointer up to REG while probing it. */
10126 output_adjust_stack_and_probe (rtx reg)
10128 static int labelno = 0;
10129 char loop_lab[32], end_lab[32];
10132 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10133 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10135 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10137 /* Jump to END_LAB if SP == LAST_ADDR. */
10138 xops[0] = stack_pointer_rtx;
10140 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10141 fputs ("\tje\t", asm_out_file);
10142 assemble_name_raw (asm_out_file, end_lab);
10143 fputc ('\n', asm_out_file);
10145 /* SP = SP + PROBE_INTERVAL. */
10146 xops[1] = GEN_INT (PROBE_INTERVAL);
10147 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10150 xops[1] = const0_rtx;
10151 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10153 fprintf (asm_out_file, "\tjmp\t");
10154 assemble_name_raw (asm_out_file, loop_lab);
10155 fputc ('\n', asm_out_file);
10157 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10162 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10163 inclusive. These are offsets from the current stack pointer. */
10166 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10168 /* See if we have a constant small number of probes to generate. If so,
10169 that's the easy case. The run-time loop is made up of 7 insns in the
10170 generic case while the compile-time loop is made up of n insns for n #
10172 if (size <= 7 * PROBE_INTERVAL)
10176 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10177 it exceeds SIZE. If only one probe is needed, this will not
10178 generate any code. Then probe at FIRST + SIZE. */
10179 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10180 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
10182 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
10185 /* Otherwise, do the same as above, but in a loop. Note that we must be
10186 extra careful with variables wrapping around because we might be at
10187 the very top (or the very bottom) of the address space and we have
10188 to be able to handle this case properly; in particular, we use an
10189 equality test for the loop condition. */
10192 HOST_WIDE_INT rounded_size, last;
10193 struct scratch_reg sr;
10195 get_scratch_register_on_entry (&sr);
10198 /* Step 1: round SIZE to the previous multiple of the interval. */
10200 rounded_size = size & -PROBE_INTERVAL;
10203 /* Step 2: compute initial and final value of the loop counter. */
10205 /* TEST_OFFSET = FIRST. */
10206 emit_move_insn (sr.reg, GEN_INT (-first));
10208 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10209 last = first + rounded_size;
10212 /* Step 3: the loop
10214 while (TEST_ADDR != LAST_ADDR)
10216 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10220 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10221 until it is equal to ROUNDED_SIZE. */
10223 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10226 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10227 that SIZE is equal to ROUNDED_SIZE. */
10229 if (size != rounded_size)
10230 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
10233 rounded_size - size));
10235 release_scratch_register_on_entry (&sr);
10238 /* Make sure nothing is scheduled before we are done. */
10239 emit_insn (gen_blockage ());
10242 /* Probe a range of stack addresses from REG to END, inclusive. These are
10243 offsets from the current stack pointer. */
10246 output_probe_stack_range (rtx reg, rtx end)
10248 static int labelno = 0;
10249 char loop_lab[32], end_lab[32];
10252 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10253 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10255 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10257 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10260 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10261 fputs ("\tje\t", asm_out_file);
10262 assemble_name_raw (asm_out_file, end_lab);
10263 fputc ('\n', asm_out_file);
10265 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10266 xops[1] = GEN_INT (PROBE_INTERVAL);
10267 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10269 /* Probe at TEST_ADDR. */
10270 xops[0] = stack_pointer_rtx;
10272 xops[2] = const0_rtx;
10273 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10275 fprintf (asm_out_file, "\tjmp\t");
10276 assemble_name_raw (asm_out_file, loop_lab);
10277 fputc ('\n', asm_out_file);
10279 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10284 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10285 to be generated in correct form. */
10287 ix86_finalize_stack_realign_flags (void)
10289 /* Check if stack realign is really needed after reload, and
10290 stores result in cfun */
10291 unsigned int incoming_stack_boundary
10292 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10293 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10294 unsigned int stack_realign = (incoming_stack_boundary
10295 < (current_function_is_leaf
10296 ? crtl->max_used_stack_slot_alignment
10297 : crtl->stack_alignment_needed));
10299 if (crtl->stack_realign_finalized)
10301 /* After stack_realign_needed is finalized, we can't no longer
10303 gcc_assert (crtl->stack_realign_needed == stack_realign);
10307 crtl->stack_realign_needed = stack_realign;
10308 crtl->stack_realign_finalized = true;
10312 /* Expand the prologue into a bunch of separate insns. */
10315 ix86_expand_prologue (void)
10317 struct machine_function *m = cfun->machine;
10320 struct ix86_frame frame;
10321 HOST_WIDE_INT allocate;
10322 bool int_registers_saved;
10324 ix86_finalize_stack_realign_flags ();
10326 /* DRAP should not coexist with stack_realign_fp */
10327 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10329 memset (&m->fs, 0, sizeof (m->fs));
10331 /* Initialize CFA state for before the prologue. */
10332 m->fs.cfa_reg = stack_pointer_rtx;
10333 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10335 /* Track SP offset to the CFA. We continue tracking this after we've
10336 swapped the CFA register away from SP. In the case of re-alignment
10337 this is fudged; we're interested to offsets within the local frame. */
10338 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10339 m->fs.sp_valid = true;
10341 ix86_compute_frame_layout (&frame);
10343 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10345 /* We should have already generated an error for any use of
10346 ms_hook on a nested function. */
10347 gcc_checking_assert (!ix86_static_chain_on_stack);
10349 /* Check if profiling is active and we shall use profiling before
10350 prologue variant. If so sorry. */
10351 if (crtl->profile && flag_fentry != 0)
10352 sorry ("ms_hook_prologue attribute isn%'t compatible "
10353 "with -mfentry for 32-bit");
10355 /* In ix86_asm_output_function_label we emitted:
10356 8b ff movl.s %edi,%edi
10358 8b ec movl.s %esp,%ebp
10360 This matches the hookable function prologue in Win32 API
10361 functions in Microsoft Windows XP Service Pack 2 and newer.
10362 Wine uses this to enable Windows apps to hook the Win32 API
10363 functions provided by Wine.
10365 What that means is that we've already set up the frame pointer. */
10367 if (frame_pointer_needed
10368 && !(crtl->drap_reg && crtl->stack_realign_needed))
10372 /* We've decided to use the frame pointer already set up.
10373 Describe this to the unwinder by pretending that both
10374 push and mov insns happen right here.
10376 Putting the unwind info here at the end of the ms_hook
10377 is done so that we can make absolutely certain we get
10378 the required byte sequence at the start of the function,
10379 rather than relying on an assembler that can produce
10380 the exact encoding required.
10382 However it does mean (in the unpatched case) that we have
10383 a 1 insn window where the asynchronous unwind info is
10384 incorrect. However, if we placed the unwind info at
10385 its correct location we would have incorrect unwind info
10386 in the patched case. Which is probably all moot since
10387 I don't expect Wine generates dwarf2 unwind info for the
10388 system libraries that use this feature. */
10390 insn = emit_insn (gen_blockage ());
10392 push = gen_push (hard_frame_pointer_rtx);
10393 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10394 stack_pointer_rtx);
10395 RTX_FRAME_RELATED_P (push) = 1;
10396 RTX_FRAME_RELATED_P (mov) = 1;
10398 RTX_FRAME_RELATED_P (insn) = 1;
10399 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10400 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10402 /* Note that gen_push incremented m->fs.cfa_offset, even
10403 though we didn't emit the push insn here. */
10404 m->fs.cfa_reg = hard_frame_pointer_rtx;
10405 m->fs.fp_offset = m->fs.cfa_offset;
10406 m->fs.fp_valid = true;
10410 /* The frame pointer is not needed so pop %ebp again.
10411 This leaves us with a pristine state. */
10412 emit_insn (gen_pop (hard_frame_pointer_rtx));
10416 /* The first insn of a function that accepts its static chain on the
10417 stack is to push the register that would be filled in by a direct
10418 call. This insn will be skipped by the trampoline. */
10419 else if (ix86_static_chain_on_stack)
10421 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10422 emit_insn (gen_blockage ());
10424 /* We don't want to interpret this push insn as a register save,
10425 only as a stack adjustment. The real copy of the register as
10426 a save will be done later, if needed. */
10427 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10428 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10429 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10430 RTX_FRAME_RELATED_P (insn) = 1;
10433 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10434 of DRAP is needed and stack realignment is really needed after reload */
10435 if (stack_realign_drap)
10437 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10439 /* Only need to push parameter pointer reg if it is caller saved. */
10440 if (!call_used_regs[REGNO (crtl->drap_reg)])
10442 /* Push arg pointer reg */
10443 insn = emit_insn (gen_push (crtl->drap_reg));
10444 RTX_FRAME_RELATED_P (insn) = 1;
10447 /* Grab the argument pointer. */
10448 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10449 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10450 RTX_FRAME_RELATED_P (insn) = 1;
10451 m->fs.cfa_reg = crtl->drap_reg;
10452 m->fs.cfa_offset = 0;
10454 /* Align the stack. */
10455 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10457 GEN_INT (-align_bytes)));
10458 RTX_FRAME_RELATED_P (insn) = 1;
10460 /* Replicate the return address on the stack so that return
10461 address can be reached via (argp - 1) slot. This is needed
10462 to implement macro RETURN_ADDR_RTX and intrinsic function
10463 expand_builtin_return_addr etc. */
10464 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10465 t = gen_frame_mem (Pmode, t);
10466 insn = emit_insn (gen_push (t));
10467 RTX_FRAME_RELATED_P (insn) = 1;
10469 /* For the purposes of frame and register save area addressing,
10470 we've started over with a new frame. */
10471 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10472 m->fs.realigned = true;
10475 if (frame_pointer_needed && !m->fs.fp_valid)
10477 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10478 slower on all targets. Also sdb doesn't like it. */
10479 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10480 RTX_FRAME_RELATED_P (insn) = 1;
10482 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10484 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10485 RTX_FRAME_RELATED_P (insn) = 1;
10487 if (m->fs.cfa_reg == stack_pointer_rtx)
10488 m->fs.cfa_reg = hard_frame_pointer_rtx;
10489 m->fs.fp_offset = m->fs.sp_offset;
10490 m->fs.fp_valid = true;
10494 int_registers_saved = (frame.nregs == 0);
10496 if (!int_registers_saved)
10498 /* If saving registers via PUSH, do so now. */
10499 if (!frame.save_regs_using_mov)
10501 ix86_emit_save_regs ();
10502 int_registers_saved = true;
10503 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10506 /* When using red zone we may start register saving before allocating
10507 the stack frame saving one cycle of the prologue. However, avoid
10508 doing this if we have to probe the stack; at least on x86_64 the
10509 stack probe can turn into a call that clobbers a red zone location. */
10510 else if (ix86_using_red_zone ()
10511 && (! TARGET_STACK_PROBE
10512 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10514 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10515 int_registers_saved = true;
10519 if (stack_realign_fp)
10521 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10522 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10524 /* The computation of the size of the re-aligned stack frame means
10525 that we must allocate the size of the register save area before
10526 performing the actual alignment. Otherwise we cannot guarantee
10527 that there's enough storage above the realignment point. */
10528 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10529 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10530 GEN_INT (m->fs.sp_offset
10531 - frame.sse_reg_save_offset),
10534 /* Align the stack. */
10535 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10537 GEN_INT (-align_bytes)));
10539 /* For the purposes of register save area addressing, the stack
10540 pointer is no longer valid. As for the value of sp_offset,
10541 see ix86_compute_frame_layout, which we need to match in order
10542 to pass verification of stack_pointer_offset at the end. */
10543 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10544 m->fs.sp_valid = false;
10547 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10549 if (flag_stack_usage_info)
10551 /* We start to count from ARG_POINTER. */
10552 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10554 /* If it was realigned, take into account the fake frame. */
10555 if (stack_realign_drap)
10557 if (ix86_static_chain_on_stack)
10558 stack_size += UNITS_PER_WORD;
10560 if (!call_used_regs[REGNO (crtl->drap_reg)])
10561 stack_size += UNITS_PER_WORD;
10563 /* This over-estimates by 1 minimal-stack-alignment-unit but
10564 mitigates that by counting in the new return address slot. */
10565 current_function_dynamic_stack_size
10566 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10569 current_function_static_stack_size = stack_size;
10572 /* The stack has already been decremented by the instruction calling us
10573 so probe if the size is non-negative to preserve the protection area. */
10574 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10576 /* We expect the registers to be saved when probes are used. */
10577 gcc_assert (int_registers_saved);
10579 if (STACK_CHECK_MOVING_SP)
10581 ix86_adjust_stack_and_probe (allocate);
10586 HOST_WIDE_INT size = allocate;
10588 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10589 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10591 if (TARGET_STACK_PROBE)
10592 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10594 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10600 else if (!ix86_target_stack_probe ()
10601 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10603 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10604 GEN_INT (-allocate), -1,
10605 m->fs.cfa_reg == stack_pointer_rtx);
10609 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10611 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10613 bool eax_live = false;
10614 bool r10_live = false;
10617 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10618 if (!TARGET_64BIT_MS_ABI)
10619 eax_live = ix86_eax_live_at_start_p ();
10623 emit_insn (gen_push (eax));
10624 allocate -= UNITS_PER_WORD;
10628 r10 = gen_rtx_REG (Pmode, R10_REG);
10629 emit_insn (gen_push (r10));
10630 allocate -= UNITS_PER_WORD;
10633 emit_move_insn (eax, GEN_INT (allocate));
10634 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10636 /* Use the fact that AX still contains ALLOCATE. */
10637 adjust_stack_insn = (TARGET_64BIT
10638 ? gen_pro_epilogue_adjust_stack_di_sub
10639 : gen_pro_epilogue_adjust_stack_si_sub);
10641 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10642 stack_pointer_rtx, eax));
10644 /* Note that SEH directives need to continue tracking the stack
10645 pointer even after the frame pointer has been set up. */
10646 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10648 if (m->fs.cfa_reg == stack_pointer_rtx)
10649 m->fs.cfa_offset += allocate;
10651 RTX_FRAME_RELATED_P (insn) = 1;
10652 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10653 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10654 plus_constant (stack_pointer_rtx,
10657 m->fs.sp_offset += allocate;
10659 if (r10_live && eax_live)
10661 t = choose_baseaddr (m->fs.sp_offset - allocate);
10662 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10663 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10664 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10666 else if (eax_live || r10_live)
10668 t = choose_baseaddr (m->fs.sp_offset - allocate);
10669 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10672 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10674 /* If we havn't already set up the frame pointer, do so now. */
10675 if (frame_pointer_needed && !m->fs.fp_valid)
10677 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10678 GEN_INT (frame.stack_pointer_offset
10679 - frame.hard_frame_pointer_offset));
10680 insn = emit_insn (insn);
10681 RTX_FRAME_RELATED_P (insn) = 1;
10682 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10684 if (m->fs.cfa_reg == stack_pointer_rtx)
10685 m->fs.cfa_reg = hard_frame_pointer_rtx;
10686 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10687 m->fs.fp_valid = true;
10690 if (!int_registers_saved)
10691 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10692 if (frame.nsseregs)
10693 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10695 pic_reg_used = false;
10696 if (pic_offset_table_rtx
10697 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10700 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10702 if (alt_pic_reg_used != INVALID_REGNUM)
10703 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10705 pic_reg_used = true;
10712 if (ix86_cmodel == CM_LARGE_PIC)
10714 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10715 rtx label = gen_label_rtx ();
10716 emit_label (label);
10717 LABEL_PRESERVE_P (label) = 1;
10718 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10719 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10720 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10721 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10722 pic_offset_table_rtx, tmp_reg));
10725 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10728 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10731 /* In the pic_reg_used case, make sure that the got load isn't deleted
10732 when mcount needs it. Blockage to avoid call movement across mcount
10733 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10735 if (crtl->profile && !flag_fentry && pic_reg_used)
10736 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10738 if (crtl->drap_reg && !crtl->stack_realign_needed)
10740 /* vDRAP is setup but after reload it turns out stack realign
10741 isn't necessary, here we will emit prologue to setup DRAP
10742 without stack realign adjustment */
10743 t = choose_baseaddr (0);
10744 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10747 /* Prevent instructions from being scheduled into register save push
10748 sequence when access to the redzone area is done through frame pointer.
10749 The offset between the frame pointer and the stack pointer is calculated
10750 relative to the value of the stack pointer at the end of the function
10751 prologue, and moving instructions that access redzone area via frame
10752 pointer inside push sequence violates this assumption. */
10753 if (frame_pointer_needed && frame.red_zone_size)
10754 emit_insn (gen_memory_blockage ());
10756 /* Emit cld instruction if stringops are used in the function. */
10757 if (TARGET_CLD && ix86_current_function_needs_cld)
10758 emit_insn (gen_cld ());
10760 /* SEH requires that the prologue end within 256 bytes of the start of
10761 the function. Prevent instruction schedules that would extend that. */
10763 emit_insn (gen_blockage ());
10766 /* Emit code to restore REG using a POP insn. */
10769 ix86_emit_restore_reg_using_pop (rtx reg)
10771 struct machine_function *m = cfun->machine;
10772 rtx insn = emit_insn (gen_pop (reg));
10774 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10775 m->fs.sp_offset -= UNITS_PER_WORD;
10777 if (m->fs.cfa_reg == crtl->drap_reg
10778 && REGNO (reg) == REGNO (crtl->drap_reg))
10780 /* Previously we'd represented the CFA as an expression
10781 like *(%ebp - 8). We've just popped that value from
10782 the stack, which means we need to reset the CFA to
10783 the drap register. This will remain until we restore
10784 the stack pointer. */
10785 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10786 RTX_FRAME_RELATED_P (insn) = 1;
10788 /* This means that the DRAP register is valid for addressing too. */
10789 m->fs.drap_valid = true;
10793 if (m->fs.cfa_reg == stack_pointer_rtx)
10795 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10796 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10797 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10798 RTX_FRAME_RELATED_P (insn) = 1;
10800 m->fs.cfa_offset -= UNITS_PER_WORD;
10803 /* When the frame pointer is the CFA, and we pop it, we are
10804 swapping back to the stack pointer as the CFA. This happens
10805 for stack frames that don't allocate other data, so we assume
10806 the stack pointer is now pointing at the return address, i.e.
10807 the function entry state, which makes the offset be 1 word. */
10808 if (reg == hard_frame_pointer_rtx)
10810 m->fs.fp_valid = false;
10811 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10813 m->fs.cfa_reg = stack_pointer_rtx;
10814 m->fs.cfa_offset -= UNITS_PER_WORD;
10816 add_reg_note (insn, REG_CFA_DEF_CFA,
10817 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10818 GEN_INT (m->fs.cfa_offset)));
10819 RTX_FRAME_RELATED_P (insn) = 1;
10824 /* Emit code to restore saved registers using POP insns. */
10827 ix86_emit_restore_regs_using_pop (void)
10829 unsigned int regno;
10831 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10832 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10833 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10836 /* Emit code and notes for the LEAVE instruction. */
10839 ix86_emit_leave (void)
10841 struct machine_function *m = cfun->machine;
10842 rtx insn = emit_insn (ix86_gen_leave ());
10844 ix86_add_queued_cfa_restore_notes (insn);
10846 gcc_assert (m->fs.fp_valid);
10847 m->fs.sp_valid = true;
10848 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10849 m->fs.fp_valid = false;
10851 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10853 m->fs.cfa_reg = stack_pointer_rtx;
10854 m->fs.cfa_offset = m->fs.sp_offset;
10856 add_reg_note (insn, REG_CFA_DEF_CFA,
10857 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10858 RTX_FRAME_RELATED_P (insn) = 1;
10859 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10864 /* Emit code to restore saved registers using MOV insns.
10865 First register is restored from CFA - CFA_OFFSET. */
10867 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10868 bool maybe_eh_return)
10870 struct machine_function *m = cfun->machine;
10871 unsigned int regno;
10873 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10874 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10876 rtx reg = gen_rtx_REG (Pmode, regno);
10879 mem = choose_baseaddr (cfa_offset);
10880 mem = gen_frame_mem (Pmode, mem);
10881 insn = emit_move_insn (reg, mem);
10883 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10885 /* Previously we'd represented the CFA as an expression
10886 like *(%ebp - 8). We've just popped that value from
10887 the stack, which means we need to reset the CFA to
10888 the drap register. This will remain until we restore
10889 the stack pointer. */
10890 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10891 RTX_FRAME_RELATED_P (insn) = 1;
10893 /* This means that the DRAP register is valid for addressing. */
10894 m->fs.drap_valid = true;
10897 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10899 cfa_offset -= UNITS_PER_WORD;
10903 /* Emit code to restore saved registers using MOV insns.
10904 First register is restored from CFA - CFA_OFFSET. */
10906 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10907 bool maybe_eh_return)
10909 unsigned int regno;
10911 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10912 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10914 rtx reg = gen_rtx_REG (V4SFmode, regno);
10917 mem = choose_baseaddr (cfa_offset);
10918 mem = gen_rtx_MEM (V4SFmode, mem);
10919 set_mem_align (mem, 128);
10920 emit_move_insn (reg, mem);
10922 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10928 /* Restore function stack, frame, and registers. */
10931 ix86_expand_epilogue (int style)
10933 struct machine_function *m = cfun->machine;
10934 struct machine_frame_state frame_state_save = m->fs;
10935 struct ix86_frame frame;
10936 bool restore_regs_via_mov;
10939 ix86_finalize_stack_realign_flags ();
10940 ix86_compute_frame_layout (&frame);
10942 m->fs.sp_valid = (!frame_pointer_needed
10943 || (current_function_sp_is_unchanging
10944 && !stack_realign_fp));
10945 gcc_assert (!m->fs.sp_valid
10946 || m->fs.sp_offset == frame.stack_pointer_offset);
10948 /* The FP must be valid if the frame pointer is present. */
10949 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10950 gcc_assert (!m->fs.fp_valid
10951 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10953 /* We must have *some* valid pointer to the stack frame. */
10954 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10956 /* The DRAP is never valid at this point. */
10957 gcc_assert (!m->fs.drap_valid);
10959 /* See the comment about red zone and frame
10960 pointer usage in ix86_expand_prologue. */
10961 if (frame_pointer_needed && frame.red_zone_size)
10962 emit_insn (gen_memory_blockage ());
10964 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10965 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10967 /* Determine the CFA offset of the end of the red-zone. */
10968 m->fs.red_zone_offset = 0;
10969 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10971 /* The red-zone begins below the return address. */
10972 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10974 /* When the register save area is in the aligned portion of
10975 the stack, determine the maximum runtime displacement that
10976 matches up with the aligned frame. */
10977 if (stack_realign_drap)
10978 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10982 /* Special care must be taken for the normal return case of a function
10983 using eh_return: the eax and edx registers are marked as saved, but
10984 not restored along this path. Adjust the save location to match. */
10985 if (crtl->calls_eh_return && style != 2)
10986 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10988 /* EH_RETURN requires the use of moves to function properly. */
10989 if (crtl->calls_eh_return)
10990 restore_regs_via_mov = true;
10991 /* SEH requires the use of pops to identify the epilogue. */
10992 else if (TARGET_SEH)
10993 restore_regs_via_mov = false;
10994 /* If we're only restoring one register and sp is not valid then
10995 using a move instruction to restore the register since it's
10996 less work than reloading sp and popping the register. */
10997 else if (!m->fs.sp_valid && frame.nregs <= 1)
10998 restore_regs_via_mov = true;
10999 else if (TARGET_EPILOGUE_USING_MOVE
11000 && cfun->machine->use_fast_prologue_epilogue
11001 && (frame.nregs > 1
11002 || m->fs.sp_offset != frame.reg_save_offset))
11003 restore_regs_via_mov = true;
11004 else if (frame_pointer_needed
11006 && m->fs.sp_offset != frame.reg_save_offset)
11007 restore_regs_via_mov = true;
11008 else if (frame_pointer_needed
11009 && TARGET_USE_LEAVE
11010 && cfun->machine->use_fast_prologue_epilogue
11011 && frame.nregs == 1)
11012 restore_regs_via_mov = true;
11014 restore_regs_via_mov = false;
11016 if (restore_regs_via_mov || frame.nsseregs)
11018 /* Ensure that the entire register save area is addressable via
11019 the stack pointer, if we will restore via sp. */
11021 && m->fs.sp_offset > 0x7fffffff
11022 && !(m->fs.fp_valid || m->fs.drap_valid)
11023 && (frame.nsseregs + frame.nregs) != 0)
11025 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11026 GEN_INT (m->fs.sp_offset
11027 - frame.sse_reg_save_offset),
11029 m->fs.cfa_reg == stack_pointer_rtx);
11033 /* If there are any SSE registers to restore, then we have to do it
11034 via moves, since there's obviously no pop for SSE regs. */
11035 if (frame.nsseregs)
11036 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11039 if (restore_regs_via_mov)
11044 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11046 /* eh_return epilogues need %ecx added to the stack pointer. */
11049 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11051 /* Stack align doesn't work with eh_return. */
11052 gcc_assert (!stack_realign_drap);
11053 /* Neither does regparm nested functions. */
11054 gcc_assert (!ix86_static_chain_on_stack);
11056 if (frame_pointer_needed)
11058 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11059 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
11060 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11062 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11063 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11065 /* Note that we use SA as a temporary CFA, as the return
11066 address is at the proper place relative to it. We
11067 pretend this happens at the FP restore insn because
11068 prior to this insn the FP would be stored at the wrong
11069 offset relative to SA, and after this insn we have no
11070 other reasonable register to use for the CFA. We don't
11071 bother resetting the CFA to the SP for the duration of
11072 the return insn. */
11073 add_reg_note (insn, REG_CFA_DEF_CFA,
11074 plus_constant (sa, UNITS_PER_WORD));
11075 ix86_add_queued_cfa_restore_notes (insn);
11076 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11077 RTX_FRAME_RELATED_P (insn) = 1;
11079 m->fs.cfa_reg = sa;
11080 m->fs.cfa_offset = UNITS_PER_WORD;
11081 m->fs.fp_valid = false;
11083 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11084 const0_rtx, style, false);
11088 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11089 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
11090 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11091 ix86_add_queued_cfa_restore_notes (insn);
11093 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11094 if (m->fs.cfa_offset != UNITS_PER_WORD)
11096 m->fs.cfa_offset = UNITS_PER_WORD;
11097 add_reg_note (insn, REG_CFA_DEF_CFA,
11098 plus_constant (stack_pointer_rtx,
11100 RTX_FRAME_RELATED_P (insn) = 1;
11103 m->fs.sp_offset = UNITS_PER_WORD;
11104 m->fs.sp_valid = true;
11109 /* SEH requires that the function end with (1) a stack adjustment
11110 if necessary, (2) a sequence of pops, and (3) a return or
11111 jump instruction. Prevent insns from the function body from
11112 being scheduled into this sequence. */
11115 /* Prevent a catch region from being adjacent to the standard
11116 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11117 several other flags that would be interesting to test are
11119 if (flag_non_call_exceptions)
11120 emit_insn (gen_nops (const1_rtx));
11122 emit_insn (gen_blockage ());
11125 /* First step is to deallocate the stack frame so that we can
11126 pop the registers. */
11127 if (!m->fs.sp_valid)
11129 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11130 GEN_INT (m->fs.fp_offset
11131 - frame.reg_save_offset),
11134 else if (m->fs.sp_offset != frame.reg_save_offset)
11136 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11137 GEN_INT (m->fs.sp_offset
11138 - frame.reg_save_offset),
11140 m->fs.cfa_reg == stack_pointer_rtx);
11143 ix86_emit_restore_regs_using_pop ();
11146 /* If we used a stack pointer and haven't already got rid of it,
11148 if (m->fs.fp_valid)
11150 /* If the stack pointer is valid and pointing at the frame
11151 pointer store address, then we only need a pop. */
11152 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11153 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11154 /* Leave results in shorter dependency chains on CPUs that are
11155 able to grok it fast. */
11156 else if (TARGET_USE_LEAVE
11157 || optimize_function_for_size_p (cfun)
11158 || !cfun->machine->use_fast_prologue_epilogue)
11159 ix86_emit_leave ();
11162 pro_epilogue_adjust_stack (stack_pointer_rtx,
11163 hard_frame_pointer_rtx,
11164 const0_rtx, style, !using_drap);
11165 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11171 int param_ptr_offset = UNITS_PER_WORD;
11174 gcc_assert (stack_realign_drap);
11176 if (ix86_static_chain_on_stack)
11177 param_ptr_offset += UNITS_PER_WORD;
11178 if (!call_used_regs[REGNO (crtl->drap_reg)])
11179 param_ptr_offset += UNITS_PER_WORD;
11181 insn = emit_insn (gen_rtx_SET
11182 (VOIDmode, stack_pointer_rtx,
11183 gen_rtx_PLUS (Pmode,
11185 GEN_INT (-param_ptr_offset))));
11186 m->fs.cfa_reg = stack_pointer_rtx;
11187 m->fs.cfa_offset = param_ptr_offset;
11188 m->fs.sp_offset = param_ptr_offset;
11189 m->fs.realigned = false;
11191 add_reg_note (insn, REG_CFA_DEF_CFA,
11192 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11193 GEN_INT (param_ptr_offset)));
11194 RTX_FRAME_RELATED_P (insn) = 1;
11196 if (!call_used_regs[REGNO (crtl->drap_reg)])
11197 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11200 /* At this point the stack pointer must be valid, and we must have
11201 restored all of the registers. We may not have deallocated the
11202 entire stack frame. We've delayed this until now because it may
11203 be possible to merge the local stack deallocation with the
11204 deallocation forced by ix86_static_chain_on_stack. */
11205 gcc_assert (m->fs.sp_valid);
11206 gcc_assert (!m->fs.fp_valid);
11207 gcc_assert (!m->fs.realigned);
11208 if (m->fs.sp_offset != UNITS_PER_WORD)
11210 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11211 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11215 /* Sibcall epilogues don't want a return instruction. */
11218 m->fs = frame_state_save;
11222 /* Emit vzeroupper if needed. */
11223 if (TARGET_VZEROUPPER
11224 && !TREE_THIS_VOLATILE (cfun->decl)
11225 && !cfun->machine->caller_return_avx256_p)
11226 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
11228 if (crtl->args.pops_args && crtl->args.size)
11230 rtx popc = GEN_INT (crtl->args.pops_args);
11232 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11233 address, do explicit add, and jump indirectly to the caller. */
11235 if (crtl->args.pops_args >= 65536)
11237 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11240 /* There is no "pascal" calling convention in any 64bit ABI. */
11241 gcc_assert (!TARGET_64BIT);
11243 insn = emit_insn (gen_pop (ecx));
11244 m->fs.cfa_offset -= UNITS_PER_WORD;
11245 m->fs.sp_offset -= UNITS_PER_WORD;
11247 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11248 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11249 add_reg_note (insn, REG_CFA_REGISTER,
11250 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11251 RTX_FRAME_RELATED_P (insn) = 1;
11253 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11255 emit_jump_insn (gen_return_indirect_internal (ecx));
11258 emit_jump_insn (gen_return_pop_internal (popc));
11261 emit_jump_insn (gen_return_internal ());
11263 /* Restore the state back to the state from the prologue,
11264 so that it's correct for the next epilogue. */
11265 m->fs = frame_state_save;
11268 /* Reset from the function's potential modifications. */
11271 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11272 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11274 if (pic_offset_table_rtx)
11275 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11277 /* Mach-O doesn't support labels at the end of objects, so if
11278 it looks like we might want one, insert a NOP. */
11280 rtx insn = get_last_insn ();
11283 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11284 insn = PREV_INSN (insn);
11288 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11289 fputs ("\tnop\n", file);
11295 /* Return a scratch register to use in the split stack prologue. The
11296 split stack prologue is used for -fsplit-stack. It is the first
11297 instructions in the function, even before the regular prologue.
11298 The scratch register can be any caller-saved register which is not
11299 used for parameters or for the static chain. */
11301 static unsigned int
11302 split_stack_prologue_scratch_regno (void)
11311 is_fastcall = (lookup_attribute ("fastcall",
11312 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11314 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11318 if (DECL_STATIC_CHAIN (cfun->decl))
11320 sorry ("-fsplit-stack does not support fastcall with "
11321 "nested function");
11322 return INVALID_REGNUM;
11326 else if (regparm < 3)
11328 if (!DECL_STATIC_CHAIN (cfun->decl))
11334 sorry ("-fsplit-stack does not support 2 register "
11335 " parameters for a nested function");
11336 return INVALID_REGNUM;
11343 /* FIXME: We could make this work by pushing a register
11344 around the addition and comparison. */
11345 sorry ("-fsplit-stack does not support 3 register parameters");
11346 return INVALID_REGNUM;
11351 /* A SYMBOL_REF for the function which allocates new stackspace for
11354 static GTY(()) rtx split_stack_fn;
11356 /* A SYMBOL_REF for the more stack function when using the large
11359 static GTY(()) rtx split_stack_fn_large;
11361 /* Handle -fsplit-stack. These are the first instructions in the
11362 function, even before the regular prologue. */
11365 ix86_expand_split_stack_prologue (void)
11367 struct ix86_frame frame;
11368 HOST_WIDE_INT allocate;
11369 unsigned HOST_WIDE_INT args_size;
11370 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11371 rtx scratch_reg = NULL_RTX;
11372 rtx varargs_label = NULL_RTX;
11375 gcc_assert (flag_split_stack && reload_completed);
11377 ix86_finalize_stack_realign_flags ();
11378 ix86_compute_frame_layout (&frame);
11379 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11381 /* This is the label we will branch to if we have enough stack
11382 space. We expect the basic block reordering pass to reverse this
11383 branch if optimizing, so that we branch in the unlikely case. */
11384 label = gen_label_rtx ();
11386 /* We need to compare the stack pointer minus the frame size with
11387 the stack boundary in the TCB. The stack boundary always gives
11388 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11389 can compare directly. Otherwise we need to do an addition. */
11391 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11392 UNSPEC_STACK_CHECK);
11393 limit = gen_rtx_CONST (Pmode, limit);
11394 limit = gen_rtx_MEM (Pmode, limit);
11395 if (allocate < SPLIT_STACK_AVAILABLE)
11396 current = stack_pointer_rtx;
11399 unsigned int scratch_regno;
11402 /* We need a scratch register to hold the stack pointer minus
11403 the required frame size. Since this is the very start of the
11404 function, the scratch register can be any caller-saved
11405 register which is not used for parameters. */
11406 offset = GEN_INT (- allocate);
11407 scratch_regno = split_stack_prologue_scratch_regno ();
11408 if (scratch_regno == INVALID_REGNUM)
11410 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11411 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11413 /* We don't use ix86_gen_add3 in this case because it will
11414 want to split to lea, but when not optimizing the insn
11415 will not be split after this point. */
11416 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11417 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11422 emit_move_insn (scratch_reg, offset);
11423 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11424 stack_pointer_rtx));
11426 current = scratch_reg;
11429 ix86_expand_branch (GEU, current, limit, label);
11430 jump_insn = get_last_insn ();
11431 JUMP_LABEL (jump_insn) = label;
11433 /* Mark the jump as very likely to be taken. */
11434 add_reg_note (jump_insn, REG_BR_PROB,
11435 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11437 if (split_stack_fn == NULL_RTX)
11438 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11439 fn = split_stack_fn;
11441 /* Get more stack space. We pass in the desired stack space and the
11442 size of the arguments to copy to the new stack. In 32-bit mode
11443 we push the parameters; __morestack will return on a new stack
11444 anyhow. In 64-bit mode we pass the parameters in r10 and
11446 allocate_rtx = GEN_INT (allocate);
11447 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11448 call_fusage = NULL_RTX;
11453 reg10 = gen_rtx_REG (Pmode, R10_REG);
11454 reg11 = gen_rtx_REG (Pmode, R11_REG);
11456 /* If this function uses a static chain, it will be in %r10.
11457 Preserve it across the call to __morestack. */
11458 if (DECL_STATIC_CHAIN (cfun->decl))
11462 rax = gen_rtx_REG (Pmode, AX_REG);
11463 emit_move_insn (rax, reg10);
11464 use_reg (&call_fusage, rax);
11467 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11469 HOST_WIDE_INT argval;
11471 /* When using the large model we need to load the address
11472 into a register, and we've run out of registers. So we
11473 switch to a different calling convention, and we call a
11474 different function: __morestack_large. We pass the
11475 argument size in the upper 32 bits of r10 and pass the
11476 frame size in the lower 32 bits. */
11477 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11478 gcc_assert ((args_size & 0xffffffff) == args_size);
11480 if (split_stack_fn_large == NULL_RTX)
11481 split_stack_fn_large =
11482 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11484 if (ix86_cmodel == CM_LARGE_PIC)
11488 label = gen_label_rtx ();
11489 emit_label (label);
11490 LABEL_PRESERVE_P (label) = 1;
11491 emit_insn (gen_set_rip_rex64 (reg10, label));
11492 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11493 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11494 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11496 x = gen_rtx_CONST (Pmode, x);
11497 emit_move_insn (reg11, x);
11498 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11499 x = gen_const_mem (Pmode, x);
11500 emit_move_insn (reg11, x);
11503 emit_move_insn (reg11, split_stack_fn_large);
11507 argval = ((args_size << 16) << 16) + allocate;
11508 emit_move_insn (reg10, GEN_INT (argval));
11512 emit_move_insn (reg10, allocate_rtx);
11513 emit_move_insn (reg11, GEN_INT (args_size));
11514 use_reg (&call_fusage, reg11);
11517 use_reg (&call_fusage, reg10);
11521 emit_insn (gen_push (GEN_INT (args_size)));
11522 emit_insn (gen_push (allocate_rtx));
11524 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11525 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11527 add_function_usage_to (call_insn, call_fusage);
11529 /* In order to make call/return prediction work right, we now need
11530 to execute a return instruction. See
11531 libgcc/config/i386/morestack.S for the details on how this works.
11533 For flow purposes gcc must not see this as a return
11534 instruction--we need control flow to continue at the subsequent
11535 label. Therefore, we use an unspec. */
11536 gcc_assert (crtl->args.pops_args < 65536);
11537 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11539 /* If we are in 64-bit mode and this function uses a static chain,
11540 we saved %r10 in %rax before calling _morestack. */
11541 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11542 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11543 gen_rtx_REG (Pmode, AX_REG));
11545 /* If this function calls va_start, we need to store a pointer to
11546 the arguments on the old stack, because they may not have been
11547 all copied to the new stack. At this point the old stack can be
11548 found at the frame pointer value used by __morestack, because
11549 __morestack has set that up before calling back to us. Here we
11550 store that pointer in a scratch register, and in
11551 ix86_expand_prologue we store the scratch register in a stack
11553 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11555 unsigned int scratch_regno;
11559 scratch_regno = split_stack_prologue_scratch_regno ();
11560 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11561 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11565 return address within this function
11566 return address of caller of this function
11568 So we add three words to get to the stack arguments.
11572 return address within this function
11573 first argument to __morestack
11574 second argument to __morestack
11575 return address of caller of this function
11577 So we add five words to get to the stack arguments.
11579 words = TARGET_64BIT ? 3 : 5;
11580 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11581 gen_rtx_PLUS (Pmode, frame_reg,
11582 GEN_INT (words * UNITS_PER_WORD))));
11584 varargs_label = gen_label_rtx ();
11585 emit_jump_insn (gen_jump (varargs_label));
11586 JUMP_LABEL (get_last_insn ()) = varargs_label;
11591 emit_label (label);
11592 LABEL_NUSES (label) = 1;
11594 /* If this function calls va_start, we now have to set the scratch
11595 register for the case where we do not call __morestack. In this
11596 case we need to set it based on the stack pointer. */
11597 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11599 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11600 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11601 GEN_INT (UNITS_PER_WORD))));
11603 emit_label (varargs_label);
11604 LABEL_NUSES (varargs_label) = 1;
11608 /* We may have to tell the dataflow pass that the split stack prologue
11609 is initializing a scratch register. */
11612 ix86_live_on_entry (bitmap regs)
11614 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11616 gcc_assert (flag_split_stack);
11617 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11621 /* Extract the parts of an RTL expression that is a valid memory address
11622 for an instruction. Return 0 if the structure of the address is
11623 grossly off. Return -1 if the address contains ASHIFT, so it is not
11624 strictly valid, but still used for computing length of lea instruction. */
11627 ix86_decompose_address (rtx addr, struct ix86_address *out)
11629 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11630 rtx base_reg, index_reg;
11631 HOST_WIDE_INT scale = 1;
11632 rtx scale_rtx = NULL_RTX;
11635 enum ix86_address_seg seg = SEG_DEFAULT;
11637 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
11639 else if (GET_CODE (addr) == PLUS)
11641 rtx addends[4], op;
11649 addends[n++] = XEXP (op, 1);
11652 while (GET_CODE (op) == PLUS);
11657 for (i = n; i >= 0; --i)
11660 switch (GET_CODE (op))
11665 index = XEXP (op, 0);
11666 scale_rtx = XEXP (op, 1);
11672 index = XEXP (op, 0);
11673 tmp = XEXP (op, 1);
11674 if (!CONST_INT_P (tmp))
11676 scale = INTVAL (tmp);
11677 if ((unsigned HOST_WIDE_INT) scale > 3)
11679 scale = 1 << scale;
11683 if (XINT (op, 1) == UNSPEC_TP
11684 && TARGET_TLS_DIRECT_SEG_REFS
11685 && seg == SEG_DEFAULT)
11686 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11715 else if (GET_CODE (addr) == MULT)
11717 index = XEXP (addr, 0); /* index*scale */
11718 scale_rtx = XEXP (addr, 1);
11720 else if (GET_CODE (addr) == ASHIFT)
11722 /* We're called for lea too, which implements ashift on occasion. */
11723 index = XEXP (addr, 0);
11724 tmp = XEXP (addr, 1);
11725 if (!CONST_INT_P (tmp))
11727 scale = INTVAL (tmp);
11728 if ((unsigned HOST_WIDE_INT) scale > 3)
11730 scale = 1 << scale;
11734 disp = addr; /* displacement */
11736 /* Extract the integral value of scale. */
11739 if (!CONST_INT_P (scale_rtx))
11741 scale = INTVAL (scale_rtx);
11744 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11745 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11747 /* Avoid useless 0 displacement. */
11748 if (disp == const0_rtx && (base || index))
11751 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11752 if (base_reg && index_reg && scale == 1
11753 && (index_reg == arg_pointer_rtx
11754 || index_reg == frame_pointer_rtx
11755 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11758 tmp = base, base = index, index = tmp;
11759 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11762 /* Special case: %ebp cannot be encoded as a base without a displacement.
11766 && (base_reg == hard_frame_pointer_rtx
11767 || base_reg == frame_pointer_rtx
11768 || base_reg == arg_pointer_rtx
11769 || (REG_P (base_reg)
11770 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11771 || REGNO (base_reg) == R13_REG))))
11774 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11775 Avoid this by transforming to [%esi+0].
11776 Reload calls address legitimization without cfun defined, so we need
11777 to test cfun for being non-NULL. */
11778 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11779 && base_reg && !index_reg && !disp
11780 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11783 /* Special case: encode reg+reg instead of reg*2. */
11784 if (!base && index && scale == 2)
11785 base = index, base_reg = index_reg, scale = 1;
11787 /* Special case: scaling cannot be encoded without base or displacement. */
11788 if (!base && !disp && index && scale != 1)
11792 out->index = index;
11794 out->scale = scale;
11800 /* Return cost of the memory address x.
11801 For i386, it is better to use a complex address than let gcc copy
11802 the address into a reg and make a new pseudo. But not if the address
11803 requires to two regs - that would mean more pseudos with longer
11806 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11808 struct ix86_address parts;
11810 int ok = ix86_decompose_address (x, &parts);
11814 if (parts.base && GET_CODE (parts.base) == SUBREG)
11815 parts.base = SUBREG_REG (parts.base);
11816 if (parts.index && GET_CODE (parts.index) == SUBREG)
11817 parts.index = SUBREG_REG (parts.index);
11819 /* Attempt to minimize number of registers in the address. */
11821 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11823 && (!REG_P (parts.index)
11824 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11828 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11830 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11831 && parts.base != parts.index)
11834 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11835 since it's predecode logic can't detect the length of instructions
11836 and it degenerates to vector decoded. Increase cost of such
11837 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11838 to split such addresses or even refuse such addresses at all.
11840 Following addressing modes are affected:
11845 The first and last case may be avoidable by explicitly coding the zero in
11846 memory address, but I don't have AMD-K6 machine handy to check this
11850 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11851 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11852 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11858 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11859 this is used for to form addresses to local data when -fPIC is in
11863 darwin_local_data_pic (rtx disp)
11865 return (GET_CODE (disp) == UNSPEC
11866 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11869 /* Determine if a given RTX is a valid constant. We already know this
11870 satisfies CONSTANT_P. */
11873 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11875 switch (GET_CODE (x))
11880 if (GET_CODE (x) == PLUS)
11882 if (!CONST_INT_P (XEXP (x, 1)))
11887 if (TARGET_MACHO && darwin_local_data_pic (x))
11890 /* Only some unspecs are valid as "constants". */
11891 if (GET_CODE (x) == UNSPEC)
11892 switch (XINT (x, 1))
11895 case UNSPEC_GOTOFF:
11896 case UNSPEC_PLTOFF:
11897 return TARGET_64BIT;
11899 case UNSPEC_NTPOFF:
11900 x = XVECEXP (x, 0, 0);
11901 return (GET_CODE (x) == SYMBOL_REF
11902 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11903 case UNSPEC_DTPOFF:
11904 x = XVECEXP (x, 0, 0);
11905 return (GET_CODE (x) == SYMBOL_REF
11906 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11911 /* We must have drilled down to a symbol. */
11912 if (GET_CODE (x) == LABEL_REF)
11914 if (GET_CODE (x) != SYMBOL_REF)
11919 /* TLS symbols are never valid. */
11920 if (SYMBOL_REF_TLS_MODEL (x))
11923 /* DLLIMPORT symbols are never valid. */
11924 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11925 && SYMBOL_REF_DLLIMPORT_P (x))
11929 /* mdynamic-no-pic */
11930 if (MACHO_DYNAMIC_NO_PIC_P)
11931 return machopic_symbol_defined_p (x);
11936 if (GET_MODE (x) == TImode
11937 && x != CONST0_RTX (TImode)
11943 if (!standard_sse_constant_p (x))
11950 /* Otherwise we handle everything else in the move patterns. */
11954 /* Determine if it's legal to put X into the constant pool. This
11955 is not possible for the address of thread-local symbols, which
11956 is checked above. */
11959 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11961 /* We can always put integral constants and vectors in memory. */
11962 switch (GET_CODE (x))
11972 return !ix86_legitimate_constant_p (mode, x);
11976 /* Nonzero if the constant value X is a legitimate general operand
11977 when generating PIC code. It is given that flag_pic is on and
11978 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11981 legitimate_pic_operand_p (rtx x)
11985 switch (GET_CODE (x))
11988 inner = XEXP (x, 0);
11989 if (GET_CODE (inner) == PLUS
11990 && CONST_INT_P (XEXP (inner, 1)))
11991 inner = XEXP (inner, 0);
11993 /* Only some unspecs are valid as "constants". */
11994 if (GET_CODE (inner) == UNSPEC)
11995 switch (XINT (inner, 1))
11998 case UNSPEC_GOTOFF:
11999 case UNSPEC_PLTOFF:
12000 return TARGET_64BIT;
12002 x = XVECEXP (inner, 0, 0);
12003 return (GET_CODE (x) == SYMBOL_REF
12004 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12005 case UNSPEC_MACHOPIC_OFFSET:
12006 return legitimate_pic_address_disp_p (x);
12014 return legitimate_pic_address_disp_p (x);
12021 /* Determine if a given CONST RTX is a valid memory displacement
12025 legitimate_pic_address_disp_p (rtx disp)
12029 /* In 64bit mode we can allow direct addresses of symbols and labels
12030 when they are not dynamic symbols. */
12033 rtx op0 = disp, op1;
12035 switch (GET_CODE (disp))
12041 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12043 op0 = XEXP (XEXP (disp, 0), 0);
12044 op1 = XEXP (XEXP (disp, 0), 1);
12045 if (!CONST_INT_P (op1)
12046 || INTVAL (op1) >= 16*1024*1024
12047 || INTVAL (op1) < -16*1024*1024)
12049 if (GET_CODE (op0) == LABEL_REF)
12051 if (GET_CODE (op0) != SYMBOL_REF)
12056 /* TLS references should always be enclosed in UNSPEC. */
12057 if (SYMBOL_REF_TLS_MODEL (op0))
12059 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
12060 && ix86_cmodel != CM_LARGE_PIC)
12068 if (GET_CODE (disp) != CONST)
12070 disp = XEXP (disp, 0);
12074 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12075 of GOT tables. We should not need these anyway. */
12076 if (GET_CODE (disp) != UNSPEC
12077 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12078 && XINT (disp, 1) != UNSPEC_GOTOFF
12079 && XINT (disp, 1) != UNSPEC_PCREL
12080 && XINT (disp, 1) != UNSPEC_PLTOFF))
12083 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12084 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12090 if (GET_CODE (disp) == PLUS)
12092 if (!CONST_INT_P (XEXP (disp, 1)))
12094 disp = XEXP (disp, 0);
12098 if (TARGET_MACHO && darwin_local_data_pic (disp))
12101 if (GET_CODE (disp) != UNSPEC)
12104 switch (XINT (disp, 1))
12109 /* We need to check for both symbols and labels because VxWorks loads
12110 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12112 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12113 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12114 case UNSPEC_GOTOFF:
12115 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12116 While ABI specify also 32bit relocation but we don't produce it in
12117 small PIC model at all. */
12118 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12119 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12121 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12123 case UNSPEC_GOTTPOFF:
12124 case UNSPEC_GOTNTPOFF:
12125 case UNSPEC_INDNTPOFF:
12128 disp = XVECEXP (disp, 0, 0);
12129 return (GET_CODE (disp) == SYMBOL_REF
12130 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12131 case UNSPEC_NTPOFF:
12132 disp = XVECEXP (disp, 0, 0);
12133 return (GET_CODE (disp) == SYMBOL_REF
12134 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12135 case UNSPEC_DTPOFF:
12136 disp = XVECEXP (disp, 0, 0);
12137 return (GET_CODE (disp) == SYMBOL_REF
12138 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12144 /* Recognizes RTL expressions that are valid memory addresses for an
12145 instruction. The MODE argument is the machine mode for the MEM
12146 expression that wants to use this address.
12148 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12149 convert common non-canonical forms to canonical form so that they will
12153 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12154 rtx addr, bool strict)
12156 struct ix86_address parts;
12157 rtx base, index, disp;
12158 HOST_WIDE_INT scale;
12160 if (ix86_decompose_address (addr, &parts) <= 0)
12161 /* Decomposition failed. */
12165 index = parts.index;
12167 scale = parts.scale;
12169 /* Validate base register.
12171 Don't allow SUBREG's that span more than a word here. It can lead to spill
12172 failures when the base is one word out of a two word structure, which is
12173 represented internally as a DImode int. */
12181 else if (GET_CODE (base) == SUBREG
12182 && REG_P (SUBREG_REG (base))
12183 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
12185 reg = SUBREG_REG (base);
12187 /* Base is not a register. */
12190 if (GET_MODE (base) != Pmode)
12191 /* Base is not in Pmode. */
12194 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12195 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12196 /* Base is not valid. */
12200 /* Validate index register.
12202 Don't allow SUBREG's that span more than a word here -- same as above. */
12210 else if (GET_CODE (index) == SUBREG
12211 && REG_P (SUBREG_REG (index))
12212 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
12214 reg = SUBREG_REG (index);
12216 /* Index is not a register. */
12219 if (GET_MODE (index) != Pmode)
12220 /* Index is not in Pmode. */
12223 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12224 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12225 /* Index is not valid. */
12229 /* Validate scale factor. */
12233 /* Scale without index. */
12236 if (scale != 2 && scale != 4 && scale != 8)
12237 /* Scale is not a valid multiplier. */
12241 /* Validate displacement. */
12244 if (GET_CODE (disp) == CONST
12245 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12246 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12247 switch (XINT (XEXP (disp, 0), 1))
12249 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12250 used. While ABI specify also 32bit relocations, we don't produce
12251 them at all and use IP relative instead. */
12253 case UNSPEC_GOTOFF:
12254 gcc_assert (flag_pic);
12256 goto is_legitimate_pic;
12258 /* 64bit address unspec. */
12261 case UNSPEC_GOTPCREL:
12263 gcc_assert (flag_pic);
12264 goto is_legitimate_pic;
12266 case UNSPEC_GOTTPOFF:
12267 case UNSPEC_GOTNTPOFF:
12268 case UNSPEC_INDNTPOFF:
12269 case UNSPEC_NTPOFF:
12270 case UNSPEC_DTPOFF:
12273 case UNSPEC_STACK_CHECK:
12274 gcc_assert (flag_split_stack);
12278 /* Invalid address unspec. */
12282 else if (SYMBOLIC_CONST (disp)
12286 && MACHOPIC_INDIRECT
12287 && !machopic_operand_p (disp)
12293 if (TARGET_64BIT && (index || base))
12295 /* foo@dtpoff(%rX) is ok. */
12296 if (GET_CODE (disp) != CONST
12297 || GET_CODE (XEXP (disp, 0)) != PLUS
12298 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12299 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12300 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12301 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12302 /* Non-constant pic memory reference. */
12305 else if ((!TARGET_MACHO || flag_pic)
12306 && ! legitimate_pic_address_disp_p (disp))
12307 /* Displacement is an invalid pic construct. */
12310 else if (MACHO_DYNAMIC_NO_PIC_P
12311 && !ix86_legitimate_constant_p (Pmode, disp))
12312 /* displacment must be referenced via non_lazy_pointer */
12316 /* This code used to verify that a symbolic pic displacement
12317 includes the pic_offset_table_rtx register.
12319 While this is good idea, unfortunately these constructs may
12320 be created by "adds using lea" optimization for incorrect
12329 This code is nonsensical, but results in addressing
12330 GOT table with pic_offset_table_rtx base. We can't
12331 just refuse it easily, since it gets matched by
12332 "addsi3" pattern, that later gets split to lea in the
12333 case output register differs from input. While this
12334 can be handled by separate addsi pattern for this case
12335 that never results in lea, this seems to be easier and
12336 correct fix for crash to disable this test. */
12338 else if (GET_CODE (disp) != LABEL_REF
12339 && !CONST_INT_P (disp)
12340 && (GET_CODE (disp) != CONST
12341 || !ix86_legitimate_constant_p (Pmode, disp))
12342 && (GET_CODE (disp) != SYMBOL_REF
12343 || !ix86_legitimate_constant_p (Pmode, disp)))
12344 /* Displacement is not constant. */
12346 else if (TARGET_64BIT
12347 && !x86_64_immediate_operand (disp, VOIDmode))
12348 /* Displacement is out of range. */
12352 /* Everything looks valid. */
12356 /* Determine if a given RTX is a valid constant address. */
12359 constant_address_p (rtx x)
12361 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12364 /* Return a unique alias set for the GOT. */
12366 static alias_set_type
12367 ix86_GOT_alias_set (void)
12369 static alias_set_type set = -1;
12371 set = new_alias_set ();
12375 /* Return a legitimate reference for ORIG (an address) using the
12376 register REG. If REG is 0, a new pseudo is generated.
12378 There are two types of references that must be handled:
12380 1. Global data references must load the address from the GOT, via
12381 the PIC reg. An insn is emitted to do this load, and the reg is
12384 2. Static data references, constant pool addresses, and code labels
12385 compute the address as an offset from the GOT, whose base is in
12386 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12387 differentiate them from global data objects. The returned
12388 address is the PIC reg + an unspec constant.
12390 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12391 reg also appears in the address. */
12394 legitimize_pic_address (rtx orig, rtx reg)
12397 rtx new_rtx = orig;
12401 if (TARGET_MACHO && !TARGET_64BIT)
12404 reg = gen_reg_rtx (Pmode);
12405 /* Use the generic Mach-O PIC machinery. */
12406 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12410 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12412 else if (TARGET_64BIT
12413 && ix86_cmodel != CM_SMALL_PIC
12414 && gotoff_operand (addr, Pmode))
12417 /* This symbol may be referenced via a displacement from the PIC
12418 base address (@GOTOFF). */
12420 if (reload_in_progress)
12421 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12422 if (GET_CODE (addr) == CONST)
12423 addr = XEXP (addr, 0);
12424 if (GET_CODE (addr) == PLUS)
12426 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12428 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12431 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12432 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12434 tmpreg = gen_reg_rtx (Pmode);
12437 emit_move_insn (tmpreg, new_rtx);
12441 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12442 tmpreg, 1, OPTAB_DIRECT);
12445 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12447 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12449 /* This symbol may be referenced via a displacement from the PIC
12450 base address (@GOTOFF). */
12452 if (reload_in_progress)
12453 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12454 if (GET_CODE (addr) == CONST)
12455 addr = XEXP (addr, 0);
12456 if (GET_CODE (addr) == PLUS)
12458 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12460 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12463 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12464 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12465 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12469 emit_move_insn (reg, new_rtx);
12473 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12474 /* We can't use @GOTOFF for text labels on VxWorks;
12475 see gotoff_operand. */
12476 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12478 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12480 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12481 return legitimize_dllimport_symbol (addr, true);
12482 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12483 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12484 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12486 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12487 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12491 /* For x64 PE-COFF there is no GOT table. So we use address
12493 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12495 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12496 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12499 reg = gen_reg_rtx (Pmode);
12500 emit_move_insn (reg, new_rtx);
12503 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12505 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12506 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12507 new_rtx = gen_const_mem (Pmode, new_rtx);
12508 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12511 reg = gen_reg_rtx (Pmode);
12512 /* Use directly gen_movsi, otherwise the address is loaded
12513 into register for CSE. We don't want to CSE this addresses,
12514 instead we CSE addresses from the GOT table, so skip this. */
12515 emit_insn (gen_movsi (reg, new_rtx));
12520 /* This symbol must be referenced via a load from the
12521 Global Offset Table (@GOT). */
12523 if (reload_in_progress)
12524 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12525 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12526 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12528 new_rtx = force_reg (Pmode, new_rtx);
12529 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12530 new_rtx = gen_const_mem (Pmode, new_rtx);
12531 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12534 reg = gen_reg_rtx (Pmode);
12535 emit_move_insn (reg, new_rtx);
12541 if (CONST_INT_P (addr)
12542 && !x86_64_immediate_operand (addr, VOIDmode))
12546 emit_move_insn (reg, addr);
12550 new_rtx = force_reg (Pmode, addr);
12552 else if (GET_CODE (addr) == CONST)
12554 addr = XEXP (addr, 0);
12556 /* We must match stuff we generate before. Assume the only
12557 unspecs that can get here are ours. Not that we could do
12558 anything with them anyway.... */
12559 if (GET_CODE (addr) == UNSPEC
12560 || (GET_CODE (addr) == PLUS
12561 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12563 gcc_assert (GET_CODE (addr) == PLUS);
12565 if (GET_CODE (addr) == PLUS)
12567 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12569 /* Check first to see if this is a constant offset from a @GOTOFF
12570 symbol reference. */
12571 if (gotoff_operand (op0, Pmode)
12572 && CONST_INT_P (op1))
12576 if (reload_in_progress)
12577 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12578 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12580 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12581 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12582 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12586 emit_move_insn (reg, new_rtx);
12592 if (INTVAL (op1) < -16*1024*1024
12593 || INTVAL (op1) >= 16*1024*1024)
12595 if (!x86_64_immediate_operand (op1, Pmode))
12596 op1 = force_reg (Pmode, op1);
12597 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12603 base = legitimize_pic_address (XEXP (addr, 0), reg);
12604 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12605 base == reg ? NULL_RTX : reg);
12607 if (CONST_INT_P (new_rtx))
12608 new_rtx = plus_constant (base, INTVAL (new_rtx));
12611 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12613 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12614 new_rtx = XEXP (new_rtx, 1);
12616 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12624 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12627 get_thread_pointer (bool to_reg)
12631 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12635 reg = gen_reg_rtx (Pmode);
12636 insn = gen_rtx_SET (VOIDmode, reg, tp);
12637 insn = emit_insn (insn);
12642 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12644 static GTY(()) rtx ix86_tls_symbol;
12647 ix86_tls_get_addr (void)
12649 if (!ix86_tls_symbol)
12652 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12653 ? "___tls_get_addr" : "__tls_get_addr");
12655 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12658 return ix86_tls_symbol;
12661 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12663 static GTY(()) rtx ix86_tls_module_base_symbol;
12666 ix86_tls_module_base (void)
12668 if (!ix86_tls_module_base_symbol)
12670 ix86_tls_module_base_symbol
12671 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12673 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12674 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12677 return ix86_tls_module_base_symbol;
12680 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12681 false if we expect this to be used for a memory address and true if
12682 we expect to load the address into a register. */
12685 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12687 rtx dest, base, off;
12688 rtx pic = NULL_RTX, tp = NULL_RTX;
12693 case TLS_MODEL_GLOBAL_DYNAMIC:
12694 dest = gen_reg_rtx (Pmode);
12699 pic = pic_offset_table_rtx;
12702 pic = gen_reg_rtx (Pmode);
12703 emit_insn (gen_set_got (pic));
12707 if (TARGET_GNU2_TLS)
12710 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12712 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12714 tp = get_thread_pointer (true);
12715 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12717 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12721 rtx caddr = ix86_tls_get_addr ();
12725 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12728 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12729 insns = get_insns ();
12732 RTL_CONST_CALL_P (insns) = 1;
12733 emit_libcall_block (insns, dest, rax, x);
12736 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12740 case TLS_MODEL_LOCAL_DYNAMIC:
12741 base = gen_reg_rtx (Pmode);
12746 pic = pic_offset_table_rtx;
12749 pic = gen_reg_rtx (Pmode);
12750 emit_insn (gen_set_got (pic));
12754 if (TARGET_GNU2_TLS)
12756 rtx tmp = ix86_tls_module_base ();
12759 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12761 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12763 tp = get_thread_pointer (true);
12764 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12765 gen_rtx_MINUS (Pmode, tmp, tp));
12769 rtx caddr = ix86_tls_get_addr ();
12773 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12776 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12777 insns = get_insns ();
12780 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
12781 share the LD_BASE result with other LD model accesses. */
12782 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12783 UNSPEC_TLS_LD_BASE);
12785 RTL_CONST_CALL_P (insns) = 1;
12786 emit_libcall_block (insns, base, rax, eqv);
12789 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12792 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12793 off = gen_rtx_CONST (Pmode, off);
12795 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12797 if (TARGET_GNU2_TLS)
12799 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12801 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12805 case TLS_MODEL_INITIAL_EXEC:
12808 if (TARGET_SUN_TLS)
12810 /* The Sun linker took the AMD64 TLS spec literally
12811 and can only handle %rax as destination of the
12812 initial executable code sequence. */
12814 dest = gen_reg_rtx (Pmode);
12815 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12820 type = UNSPEC_GOTNTPOFF;
12824 if (reload_in_progress)
12825 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12826 pic = pic_offset_table_rtx;
12827 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12829 else if (!TARGET_ANY_GNU_TLS)
12831 pic = gen_reg_rtx (Pmode);
12832 emit_insn (gen_set_got (pic));
12833 type = UNSPEC_GOTTPOFF;
12838 type = UNSPEC_INDNTPOFF;
12841 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12842 off = gen_rtx_CONST (Pmode, off);
12844 off = gen_rtx_PLUS (Pmode, pic, off);
12845 off = gen_const_mem (Pmode, off);
12846 set_mem_alias_set (off, ix86_GOT_alias_set ());
12848 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12850 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12851 off = force_reg (Pmode, off);
12852 return gen_rtx_PLUS (Pmode, base, off);
12856 base = get_thread_pointer (true);
12857 dest = gen_reg_rtx (Pmode);
12858 emit_insn (gen_subsi3 (dest, base, off));
12862 case TLS_MODEL_LOCAL_EXEC:
12863 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12864 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12865 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12866 off = gen_rtx_CONST (Pmode, off);
12868 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12870 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12871 return gen_rtx_PLUS (Pmode, base, off);
12875 base = get_thread_pointer (true);
12876 dest = gen_reg_rtx (Pmode);
12877 emit_insn (gen_subsi3 (dest, base, off));
12882 gcc_unreachable ();
12888 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12891 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12892 htab_t dllimport_map;
12895 get_dllimport_decl (tree decl)
12897 struct tree_map *h, in;
12900 const char *prefix;
12901 size_t namelen, prefixlen;
12906 if (!dllimport_map)
12907 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12909 in.hash = htab_hash_pointer (decl);
12910 in.base.from = decl;
12911 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12912 h = (struct tree_map *) *loc;
12916 *loc = h = ggc_alloc_tree_map ();
12918 h->base.from = decl;
12919 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12920 VAR_DECL, NULL, ptr_type_node);
12921 DECL_ARTIFICIAL (to) = 1;
12922 DECL_IGNORED_P (to) = 1;
12923 DECL_EXTERNAL (to) = 1;
12924 TREE_READONLY (to) = 1;
12926 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12927 name = targetm.strip_name_encoding (name);
12928 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12929 ? "*__imp_" : "*__imp__";
12930 namelen = strlen (name);
12931 prefixlen = strlen (prefix);
12932 imp_name = (char *) alloca (namelen + prefixlen + 1);
12933 memcpy (imp_name, prefix, prefixlen);
12934 memcpy (imp_name + prefixlen, name, namelen + 1);
12936 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12937 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12938 SET_SYMBOL_REF_DECL (rtl, to);
12939 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12941 rtl = gen_const_mem (Pmode, rtl);
12942 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12944 SET_DECL_RTL (to, rtl);
12945 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12950 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12951 true if we require the result be a register. */
12954 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12959 gcc_assert (SYMBOL_REF_DECL (symbol));
12960 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12962 x = DECL_RTL (imp_decl);
12964 x = force_reg (Pmode, x);
12968 /* Try machine-dependent ways of modifying an illegitimate address
12969 to be legitimate. If we find one, return the new, valid address.
12970 This macro is used in only one place: `memory_address' in explow.c.
12972 OLDX is the address as it was before break_out_memory_refs was called.
12973 In some cases it is useful to look at this to decide what needs to be done.
12975 It is always safe for this macro to do nothing. It exists to recognize
12976 opportunities to optimize the output.
12978 For the 80386, we handle X+REG by loading X into a register R and
12979 using R+REG. R will go in a general reg and indexing will be used.
12980 However, if REG is a broken-out memory address or multiplication,
12981 nothing needs to be done because REG can certainly go in a general reg.
12983 When -fpic is used, special handling is needed for symbolic references.
12984 See comments by legitimize_pic_address in i386.c for details. */
12987 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12988 enum machine_mode mode)
12993 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12995 return legitimize_tls_address (x, (enum tls_model) log, false);
12996 if (GET_CODE (x) == CONST
12997 && GET_CODE (XEXP (x, 0)) == PLUS
12998 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12999 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13001 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13002 (enum tls_model) log, false);
13003 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13006 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13008 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
13009 return legitimize_dllimport_symbol (x, true);
13010 if (GET_CODE (x) == CONST
13011 && GET_CODE (XEXP (x, 0)) == PLUS
13012 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13013 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
13015 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
13016 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13020 if (flag_pic && SYMBOLIC_CONST (x))
13021 return legitimize_pic_address (x, 0);
13024 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13025 return machopic_indirect_data_reference (x, 0);
13028 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13029 if (GET_CODE (x) == ASHIFT
13030 && CONST_INT_P (XEXP (x, 1))
13031 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13034 log = INTVAL (XEXP (x, 1));
13035 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13036 GEN_INT (1 << log));
13039 if (GET_CODE (x) == PLUS)
13041 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13043 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13044 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13045 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13048 log = INTVAL (XEXP (XEXP (x, 0), 1));
13049 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13050 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13051 GEN_INT (1 << log));
13054 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13055 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13056 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13059 log = INTVAL (XEXP (XEXP (x, 1), 1));
13060 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13061 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13062 GEN_INT (1 << log));
13065 /* Put multiply first if it isn't already. */
13066 if (GET_CODE (XEXP (x, 1)) == MULT)
13068 rtx tmp = XEXP (x, 0);
13069 XEXP (x, 0) = XEXP (x, 1);
13074 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13075 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13076 created by virtual register instantiation, register elimination, and
13077 similar optimizations. */
13078 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13081 x = gen_rtx_PLUS (Pmode,
13082 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13083 XEXP (XEXP (x, 1), 0)),
13084 XEXP (XEXP (x, 1), 1));
13088 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13089 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13090 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13091 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13092 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13093 && CONSTANT_P (XEXP (x, 1)))
13096 rtx other = NULL_RTX;
13098 if (CONST_INT_P (XEXP (x, 1)))
13100 constant = XEXP (x, 1);
13101 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13103 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13105 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13106 other = XEXP (x, 1);
13114 x = gen_rtx_PLUS (Pmode,
13115 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13116 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13117 plus_constant (other, INTVAL (constant)));
13121 if (changed && ix86_legitimate_address_p (mode, x, false))
13124 if (GET_CODE (XEXP (x, 0)) == MULT)
13127 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13130 if (GET_CODE (XEXP (x, 1)) == MULT)
13133 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13137 && REG_P (XEXP (x, 1))
13138 && REG_P (XEXP (x, 0)))
13141 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13144 x = legitimize_pic_address (x, 0);
13147 if (changed && ix86_legitimate_address_p (mode, x, false))
13150 if (REG_P (XEXP (x, 0)))
13152 rtx temp = gen_reg_rtx (Pmode);
13153 rtx val = force_operand (XEXP (x, 1), temp);
13155 emit_move_insn (temp, val);
13157 XEXP (x, 1) = temp;
13161 else if (REG_P (XEXP (x, 1)))
13163 rtx temp = gen_reg_rtx (Pmode);
13164 rtx val = force_operand (XEXP (x, 0), temp);
13166 emit_move_insn (temp, val);
13168 XEXP (x, 0) = temp;
13176 /* Print an integer constant expression in assembler syntax. Addition
13177 and subtraction are the only arithmetic that may appear in these
13178 expressions. FILE is the stdio stream to write to, X is the rtx, and
13179 CODE is the operand print code from the output string. */
13182 output_pic_addr_const (FILE *file, rtx x, int code)
13186 switch (GET_CODE (x))
13189 gcc_assert (flag_pic);
13194 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13195 output_addr_const (file, x);
13198 const char *name = XSTR (x, 0);
13200 /* Mark the decl as referenced so that cgraph will
13201 output the function. */
13202 if (SYMBOL_REF_DECL (x))
13203 mark_decl_referenced (SYMBOL_REF_DECL (x));
13206 if (MACHOPIC_INDIRECT
13207 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13208 name = machopic_indirection_name (x, /*stub_p=*/true);
13210 assemble_name (file, name);
13212 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
13213 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13214 fputs ("@PLT", file);
13221 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13222 assemble_name (asm_out_file, buf);
13226 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13230 /* This used to output parentheses around the expression,
13231 but that does not work on the 386 (either ATT or BSD assembler). */
13232 output_pic_addr_const (file, XEXP (x, 0), code);
13236 if (GET_MODE (x) == VOIDmode)
13238 /* We can use %d if the number is <32 bits and positive. */
13239 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13240 fprintf (file, "0x%lx%08lx",
13241 (unsigned long) CONST_DOUBLE_HIGH (x),
13242 (unsigned long) CONST_DOUBLE_LOW (x));
13244 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13247 /* We can't handle floating point constants;
13248 TARGET_PRINT_OPERAND must handle them. */
13249 output_operand_lossage ("floating constant misused");
13253 /* Some assemblers need integer constants to appear first. */
13254 if (CONST_INT_P (XEXP (x, 0)))
13256 output_pic_addr_const (file, XEXP (x, 0), code);
13258 output_pic_addr_const (file, XEXP (x, 1), code);
13262 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13263 output_pic_addr_const (file, XEXP (x, 1), code);
13265 output_pic_addr_const (file, XEXP (x, 0), code);
13271 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13272 output_pic_addr_const (file, XEXP (x, 0), code);
13274 output_pic_addr_const (file, XEXP (x, 1), code);
13276 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13280 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13282 bool f = i386_asm_output_addr_const_extra (file, x);
13287 gcc_assert (XVECLEN (x, 0) == 1);
13288 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13289 switch (XINT (x, 1))
13292 fputs ("@GOT", file);
13294 case UNSPEC_GOTOFF:
13295 fputs ("@GOTOFF", file);
13297 case UNSPEC_PLTOFF:
13298 fputs ("@PLTOFF", file);
13301 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13302 "(%rip)" : "[rip]", file);
13304 case UNSPEC_GOTPCREL:
13305 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13306 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13308 case UNSPEC_GOTTPOFF:
13309 /* FIXME: This might be @TPOFF in Sun ld too. */
13310 fputs ("@gottpoff", file);
13313 fputs ("@tpoff", file);
13315 case UNSPEC_NTPOFF:
13317 fputs ("@tpoff", file);
13319 fputs ("@ntpoff", file);
13321 case UNSPEC_DTPOFF:
13322 fputs ("@dtpoff", file);
13324 case UNSPEC_GOTNTPOFF:
13326 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13327 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13329 fputs ("@gotntpoff", file);
13331 case UNSPEC_INDNTPOFF:
13332 fputs ("@indntpoff", file);
13335 case UNSPEC_MACHOPIC_OFFSET:
13337 machopic_output_function_base_name (file);
13341 output_operand_lossage ("invalid UNSPEC as operand");
13347 output_operand_lossage ("invalid expression as operand");
13351 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13352 We need to emit DTP-relative relocations. */
13354 static void ATTRIBUTE_UNUSED
13355 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13357 fputs (ASM_LONG, file);
13358 output_addr_const (file, x);
13359 fputs ("@dtpoff", file);
13365 fputs (", 0", file);
13368 gcc_unreachable ();
13372 /* Return true if X is a representation of the PIC register. This copes
13373 with calls from ix86_find_base_term, where the register might have
13374 been replaced by a cselib value. */
13377 ix86_pic_register_p (rtx x)
13379 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13380 return (pic_offset_table_rtx
13381 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13383 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13386 /* Helper function for ix86_delegitimize_address.
13387 Attempt to delegitimize TLS local-exec accesses. */
13390 ix86_delegitimize_tls_address (rtx orig_x)
13392 rtx x = orig_x, unspec;
13393 struct ix86_address addr;
13395 if (!TARGET_TLS_DIRECT_SEG_REFS)
13399 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13401 if (ix86_decompose_address (x, &addr) == 0
13402 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13403 || addr.disp == NULL_RTX
13404 || GET_CODE (addr.disp) != CONST)
13406 unspec = XEXP (addr.disp, 0);
13407 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13408 unspec = XEXP (unspec, 0);
13409 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13411 x = XVECEXP (unspec, 0, 0);
13412 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13413 if (unspec != XEXP (addr.disp, 0))
13414 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13417 rtx idx = addr.index;
13418 if (addr.scale != 1)
13419 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13420 x = gen_rtx_PLUS (Pmode, idx, x);
13423 x = gen_rtx_PLUS (Pmode, addr.base, x);
13424 if (MEM_P (orig_x))
13425 x = replace_equiv_address_nv (orig_x, x);
13429 /* In the name of slightly smaller debug output, and to cater to
13430 general assembler lossage, recognize PIC+GOTOFF and turn it back
13431 into a direct symbol reference.
13433 On Darwin, this is necessary to avoid a crash, because Darwin
13434 has a different PIC label for each routine but the DWARF debugging
13435 information is not associated with any particular routine, so it's
13436 necessary to remove references to the PIC label from RTL stored by
13437 the DWARF output code. */
13440 ix86_delegitimize_address (rtx x)
13442 rtx orig_x = delegitimize_mem_from_attrs (x);
13443 /* addend is NULL or some rtx if x is something+GOTOFF where
13444 something doesn't include the PIC register. */
13445 rtx addend = NULL_RTX;
13446 /* reg_addend is NULL or a multiple of some register. */
13447 rtx reg_addend = NULL_RTX;
13448 /* const_addend is NULL or a const_int. */
13449 rtx const_addend = NULL_RTX;
13450 /* This is the result, or NULL. */
13451 rtx result = NULL_RTX;
13460 if (GET_CODE (x) != CONST
13461 || GET_CODE (XEXP (x, 0)) != UNSPEC
13462 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13463 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13464 || !MEM_P (orig_x))
13465 return ix86_delegitimize_tls_address (orig_x);
13466 x = XVECEXP (XEXP (x, 0), 0, 0);
13467 if (GET_MODE (orig_x) != Pmode)
13469 x = simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
13476 if (GET_CODE (x) != PLUS
13477 || GET_CODE (XEXP (x, 1)) != CONST)
13478 return ix86_delegitimize_tls_address (orig_x);
13480 if (ix86_pic_register_p (XEXP (x, 0)))
13481 /* %ebx + GOT/GOTOFF */
13483 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13485 /* %ebx + %reg * scale + GOT/GOTOFF */
13486 reg_addend = XEXP (x, 0);
13487 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13488 reg_addend = XEXP (reg_addend, 1);
13489 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13490 reg_addend = XEXP (reg_addend, 0);
13493 reg_addend = NULL_RTX;
13494 addend = XEXP (x, 0);
13498 addend = XEXP (x, 0);
13500 x = XEXP (XEXP (x, 1), 0);
13501 if (GET_CODE (x) == PLUS
13502 && CONST_INT_P (XEXP (x, 1)))
13504 const_addend = XEXP (x, 1);
13508 if (GET_CODE (x) == UNSPEC
13509 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13510 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13511 result = XVECEXP (x, 0, 0);
13513 if (TARGET_MACHO && darwin_local_data_pic (x)
13514 && !MEM_P (orig_x))
13515 result = XVECEXP (x, 0, 0);
13518 return ix86_delegitimize_tls_address (orig_x);
13521 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13523 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13526 /* If the rest of original X doesn't involve the PIC register, add
13527 addend and subtract pic_offset_table_rtx. This can happen e.g.
13529 leal (%ebx, %ecx, 4), %ecx
13531 movl foo@GOTOFF(%ecx), %edx
13532 in which case we return (%ecx - %ebx) + foo. */
13533 if (pic_offset_table_rtx)
13534 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13535 pic_offset_table_rtx),
13540 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13542 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13543 if (result == NULL_RTX)
13549 /* If X is a machine specific address (i.e. a symbol or label being
13550 referenced as a displacement from the GOT implemented using an
13551 UNSPEC), then return the base term. Otherwise return X. */
13554 ix86_find_base_term (rtx x)
13560 if (GET_CODE (x) != CONST)
13562 term = XEXP (x, 0);
13563 if (GET_CODE (term) == PLUS
13564 && (CONST_INT_P (XEXP (term, 1))
13565 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13566 term = XEXP (term, 0);
13567 if (GET_CODE (term) != UNSPEC
13568 || (XINT (term, 1) != UNSPEC_GOTPCREL
13569 && XINT (term, 1) != UNSPEC_PCREL))
13572 return XVECEXP (term, 0, 0);
13575 return ix86_delegitimize_address (x);
13579 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13580 int fp, FILE *file)
13582 const char *suffix;
13584 if (mode == CCFPmode || mode == CCFPUmode)
13586 code = ix86_fp_compare_code_to_integer (code);
13590 code = reverse_condition (code);
13641 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13645 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13646 Those same assemblers have the same but opposite lossage on cmov. */
13647 if (mode == CCmode)
13648 suffix = fp ? "nbe" : "a";
13649 else if (mode == CCCmode)
13652 gcc_unreachable ();
13668 gcc_unreachable ();
13672 gcc_assert (mode == CCmode || mode == CCCmode);
13689 gcc_unreachable ();
13693 /* ??? As above. */
13694 gcc_assert (mode == CCmode || mode == CCCmode);
13695 suffix = fp ? "nb" : "ae";
13698 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13702 /* ??? As above. */
13703 if (mode == CCmode)
13705 else if (mode == CCCmode)
13706 suffix = fp ? "nb" : "ae";
13708 gcc_unreachable ();
13711 suffix = fp ? "u" : "p";
13714 suffix = fp ? "nu" : "np";
13717 gcc_unreachable ();
13719 fputs (suffix, file);
13722 /* Print the name of register X to FILE based on its machine mode and number.
13723 If CODE is 'w', pretend the mode is HImode.
13724 If CODE is 'b', pretend the mode is QImode.
13725 If CODE is 'k', pretend the mode is SImode.
13726 If CODE is 'q', pretend the mode is DImode.
13727 If CODE is 'x', pretend the mode is V4SFmode.
13728 If CODE is 't', pretend the mode is V8SFmode.
13729 If CODE is 'h', pretend the reg is the 'high' byte register.
13730 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13731 If CODE is 'd', duplicate the operand for AVX instruction.
13735 print_reg (rtx x, int code, FILE *file)
13738 bool duplicated = code == 'd' && TARGET_AVX;
13740 gcc_assert (x == pc_rtx
13741 || (REGNO (x) != ARG_POINTER_REGNUM
13742 && REGNO (x) != FRAME_POINTER_REGNUM
13743 && REGNO (x) != FLAGS_REG
13744 && REGNO (x) != FPSR_REG
13745 && REGNO (x) != FPCR_REG));
13747 if (ASSEMBLER_DIALECT == ASM_ATT)
13752 gcc_assert (TARGET_64BIT);
13753 fputs ("rip", file);
13757 if (code == 'w' || MMX_REG_P (x))
13759 else if (code == 'b')
13761 else if (code == 'k')
13763 else if (code == 'q')
13765 else if (code == 'y')
13767 else if (code == 'h')
13769 else if (code == 'x')
13771 else if (code == 't')
13774 code = GET_MODE_SIZE (GET_MODE (x));
13776 /* Irritatingly, AMD extended registers use different naming convention
13777 from the normal registers. */
13778 if (REX_INT_REG_P (x))
13780 gcc_assert (TARGET_64BIT);
13784 error ("extended registers have no high halves");
13787 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13790 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13793 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13796 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13799 error ("unsupported operand size for extended register");
13809 if (STACK_TOP_P (x))
13818 if (! ANY_FP_REG_P (x))
13819 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13824 reg = hi_reg_name[REGNO (x)];
13827 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13829 reg = qi_reg_name[REGNO (x)];
13832 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13834 reg = qi_high_reg_name[REGNO (x)];
13839 gcc_assert (!duplicated);
13841 fputs (hi_reg_name[REGNO (x)] + 1, file);
13846 gcc_unreachable ();
13852 if (ASSEMBLER_DIALECT == ASM_ATT)
13853 fprintf (file, ", %%%s", reg);
13855 fprintf (file, ", %s", reg);
13859 /* Locate some local-dynamic symbol still in use by this function
13860 so that we can print its name in some tls_local_dynamic_base
13864 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13868 if (GET_CODE (x) == SYMBOL_REF
13869 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13871 cfun->machine->some_ld_name = XSTR (x, 0);
13878 static const char *
13879 get_some_local_dynamic_name (void)
13883 if (cfun->machine->some_ld_name)
13884 return cfun->machine->some_ld_name;
13886 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13887 if (NONDEBUG_INSN_P (insn)
13888 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13889 return cfun->machine->some_ld_name;
13894 /* Meaning of CODE:
13895 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13896 C -- print opcode suffix for set/cmov insn.
13897 c -- like C, but print reversed condition
13898 F,f -- likewise, but for floating-point.
13899 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13901 R -- print the prefix for register names.
13902 z -- print the opcode suffix for the size of the current operand.
13903 Z -- likewise, with special suffixes for x87 instructions.
13904 * -- print a star (in certain assembler syntax)
13905 A -- print an absolute memory reference.
13906 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13907 s -- print a shift double count, followed by the assemblers argument
13909 b -- print the QImode name of the register for the indicated operand.
13910 %b0 would print %al if operands[0] is reg 0.
13911 w -- likewise, print the HImode name of the register.
13912 k -- likewise, print the SImode name of the register.
13913 q -- likewise, print the DImode name of the register.
13914 x -- likewise, print the V4SFmode name of the register.
13915 t -- likewise, print the V8SFmode name of the register.
13916 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13917 y -- print "st(0)" instead of "st" as a register.
13918 d -- print duplicated register operand for AVX instruction.
13919 D -- print condition for SSE cmp instruction.
13920 P -- if PIC, print an @PLT suffix.
13921 X -- don't print any sort of PIC '@' suffix for a symbol.
13922 & -- print some in-use local-dynamic symbol name.
13923 H -- print a memory address offset by 8; used for sse high-parts
13924 Y -- print condition for XOP pcom* instruction.
13925 + -- print a branch hint as 'cs' or 'ds' prefix
13926 ; -- print a semicolon (after prefixes due to bug in older gas).
13927 @ -- print a segment register of thread base pointer load
13931 ix86_print_operand (FILE *file, rtx x, int code)
13938 if (ASSEMBLER_DIALECT == ASM_ATT)
13944 const char *name = get_some_local_dynamic_name ();
13946 output_operand_lossage ("'%%&' used without any "
13947 "local dynamic TLS references");
13949 assemble_name (file, name);
13954 switch (ASSEMBLER_DIALECT)
13961 /* Intel syntax. For absolute addresses, registers should not
13962 be surrounded by braces. */
13966 ix86_print_operand (file, x, 0);
13973 gcc_unreachable ();
13976 ix86_print_operand (file, x, 0);
13981 if (ASSEMBLER_DIALECT == ASM_ATT)
13986 if (ASSEMBLER_DIALECT == ASM_ATT)
13991 if (ASSEMBLER_DIALECT == ASM_ATT)
13996 if (ASSEMBLER_DIALECT == ASM_ATT)
14001 if (ASSEMBLER_DIALECT == ASM_ATT)
14006 if (ASSEMBLER_DIALECT == ASM_ATT)
14011 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14013 /* Opcodes don't get size suffixes if using Intel opcodes. */
14014 if (ASSEMBLER_DIALECT == ASM_INTEL)
14017 switch (GET_MODE_SIZE (GET_MODE (x)))
14036 output_operand_lossage
14037 ("invalid operand size for operand code '%c'", code);
14042 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14044 (0, "non-integer operand used with operand code '%c'", code);
14048 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14049 if (ASSEMBLER_DIALECT == ASM_INTEL)
14052 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14054 switch (GET_MODE_SIZE (GET_MODE (x)))
14057 #ifdef HAVE_AS_IX86_FILDS
14067 #ifdef HAVE_AS_IX86_FILDQ
14070 fputs ("ll", file);
14078 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14080 /* 387 opcodes don't get size suffixes
14081 if the operands are registers. */
14082 if (STACK_REG_P (x))
14085 switch (GET_MODE_SIZE (GET_MODE (x)))
14106 output_operand_lossage
14107 ("invalid operand type used with operand code '%c'", code);
14111 output_operand_lossage
14112 ("invalid operand size for operand code '%c'", code);
14129 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14131 ix86_print_operand (file, x, 0);
14132 fputs (", ", file);
14137 /* Little bit of braindamage here. The SSE compare instructions
14138 does use completely different names for the comparisons that the
14139 fp conditional moves. */
14142 switch (GET_CODE (x))
14145 fputs ("eq", file);
14148 fputs ("eq_us", file);
14151 fputs ("lt", file);
14154 fputs ("nge", file);
14157 fputs ("le", file);
14160 fputs ("ngt", file);
14163 fputs ("unord", file);
14166 fputs ("neq", file);
14169 fputs ("neq_oq", file);
14172 fputs ("ge", file);
14175 fputs ("nlt", file);
14178 fputs ("gt", file);
14181 fputs ("nle", file);
14184 fputs ("ord", file);
14187 output_operand_lossage ("operand is not a condition code, "
14188 "invalid operand code 'D'");
14194 switch (GET_CODE (x))
14198 fputs ("eq", file);
14202 fputs ("lt", file);
14206 fputs ("le", file);
14209 fputs ("unord", file);
14213 fputs ("neq", file);
14217 fputs ("nlt", file);
14221 fputs ("nle", file);
14224 fputs ("ord", file);
14227 output_operand_lossage ("operand is not a condition code, "
14228 "invalid operand code 'D'");
14234 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14235 if (ASSEMBLER_DIALECT == ASM_ATT)
14237 switch (GET_MODE (x))
14239 case HImode: putc ('w', file); break;
14241 case SFmode: putc ('l', file); break;
14243 case DFmode: putc ('q', file); break;
14244 default: gcc_unreachable ();
14251 if (!COMPARISON_P (x))
14253 output_operand_lossage ("operand is neither a constant nor a "
14254 "condition code, invalid operand code "
14258 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
14261 if (!COMPARISON_P (x))
14263 output_operand_lossage ("operand is neither a constant nor a "
14264 "condition code, invalid operand code "
14268 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14269 if (ASSEMBLER_DIALECT == ASM_ATT)
14272 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
14275 /* Like above, but reverse condition */
14277 /* Check to see if argument to %c is really a constant
14278 and not a condition code which needs to be reversed. */
14279 if (!COMPARISON_P (x))
14281 output_operand_lossage ("operand is neither a constant nor a "
14282 "condition code, invalid operand "
14286 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
14289 if (!COMPARISON_P (x))
14291 output_operand_lossage ("operand is neither a constant nor a "
14292 "condition code, invalid operand "
14296 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14297 if (ASSEMBLER_DIALECT == ASM_ATT)
14300 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
14304 /* It doesn't actually matter what mode we use here, as we're
14305 only going to use this for printing. */
14306 x = adjust_address_nv (x, DImode, 8);
14314 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
14317 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14320 int pred_val = INTVAL (XEXP (x, 0));
14322 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14323 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14325 int taken = pred_val > REG_BR_PROB_BASE / 2;
14326 int cputaken = final_forward_branch_p (current_output_insn) == 0;
14328 /* Emit hints only in the case default branch prediction
14329 heuristics would fail. */
14330 if (taken != cputaken)
14332 /* We use 3e (DS) prefix for taken branches and
14333 2e (CS) prefix for not taken branches. */
14335 fputs ("ds ; ", file);
14337 fputs ("cs ; ", file);
14345 switch (GET_CODE (x))
14348 fputs ("neq", file);
14351 fputs ("eq", file);
14355 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14359 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14363 fputs ("le", file);
14367 fputs ("lt", file);
14370 fputs ("unord", file);
14373 fputs ("ord", file);
14376 fputs ("ueq", file);
14379 fputs ("nlt", file);
14382 fputs ("nle", file);
14385 fputs ("ule", file);
14388 fputs ("ult", file);
14391 fputs ("une", file);
14394 output_operand_lossage ("operand is not a condition code, "
14395 "invalid operand code 'Y'");
14401 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14407 if (ASSEMBLER_DIALECT == ASM_ATT)
14410 /* The kernel uses a different segment register for performance
14411 reasons; a system call would not have to trash the userspace
14412 segment register, which would be expensive. */
14413 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14414 fputs ("fs", file);
14416 fputs ("gs", file);
14420 output_operand_lossage ("invalid operand code '%c'", code);
14425 print_reg (x, code, file);
14427 else if (MEM_P (x))
14429 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14430 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14431 && GET_MODE (x) != BLKmode)
14434 switch (GET_MODE_SIZE (GET_MODE (x)))
14436 case 1: size = "BYTE"; break;
14437 case 2: size = "WORD"; break;
14438 case 4: size = "DWORD"; break;
14439 case 8: size = "QWORD"; break;
14440 case 12: size = "TBYTE"; break;
14442 if (GET_MODE (x) == XFmode)
14447 case 32: size = "YMMWORD"; break;
14449 gcc_unreachable ();
14452 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14455 else if (code == 'w')
14457 else if (code == 'k')
14460 fputs (size, file);
14461 fputs (" PTR ", file);
14465 /* Avoid (%rip) for call operands. */
14466 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14467 && !CONST_INT_P (x))
14468 output_addr_const (file, x);
14469 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14470 output_operand_lossage ("invalid constraints for operand");
14472 output_address (x);
14475 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14480 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14481 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14483 if (ASSEMBLER_DIALECT == ASM_ATT)
14485 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14487 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14489 fprintf (file, "0x%08x", (unsigned int) l);
14492 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14497 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14498 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14500 if (ASSEMBLER_DIALECT == ASM_ATT)
14502 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14505 /* These float cases don't actually occur as immediate operands. */
14506 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14510 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14511 fputs (dstr, file);
14516 /* We have patterns that allow zero sets of memory, for instance.
14517 In 64-bit mode, we should probably support all 8-byte vectors,
14518 since we can in fact encode that into an immediate. */
14519 if (GET_CODE (x) == CONST_VECTOR)
14521 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14527 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14529 if (ASSEMBLER_DIALECT == ASM_ATT)
14532 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14533 || GET_CODE (x) == LABEL_REF)
14535 if (ASSEMBLER_DIALECT == ASM_ATT)
14538 fputs ("OFFSET FLAT:", file);
14541 if (CONST_INT_P (x))
14542 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14543 else if (flag_pic || MACHOPIC_INDIRECT)
14544 output_pic_addr_const (file, x, code);
14546 output_addr_const (file, x);
14551 ix86_print_operand_punct_valid_p (unsigned char code)
14553 return (code == '@' || code == '*' || code == '+'
14554 || code == '&' || code == ';');
14557 /* Print a memory operand whose address is ADDR. */
14560 ix86_print_operand_address (FILE *file, rtx addr)
14562 struct ix86_address parts;
14563 rtx base, index, disp;
14565 int ok = ix86_decompose_address (addr, &parts);
14570 index = parts.index;
14572 scale = parts.scale;
14580 if (ASSEMBLER_DIALECT == ASM_ATT)
14582 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14585 gcc_unreachable ();
14588 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14589 if (TARGET_64BIT && !base && !index)
14593 if (GET_CODE (disp) == CONST
14594 && GET_CODE (XEXP (disp, 0)) == PLUS
14595 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14596 symbol = XEXP (XEXP (disp, 0), 0);
14598 if (GET_CODE (symbol) == LABEL_REF
14599 || (GET_CODE (symbol) == SYMBOL_REF
14600 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14603 if (!base && !index)
14605 /* Displacement only requires special attention. */
14607 if (CONST_INT_P (disp))
14609 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14610 fputs ("ds:", file);
14611 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14614 output_pic_addr_const (file, disp, 0);
14616 output_addr_const (file, disp);
14620 if (ASSEMBLER_DIALECT == ASM_ATT)
14625 output_pic_addr_const (file, disp, 0);
14626 else if (GET_CODE (disp) == LABEL_REF)
14627 output_asm_label (disp);
14629 output_addr_const (file, disp);
14634 print_reg (base, 0, file);
14638 print_reg (index, 0, file);
14640 fprintf (file, ",%d", scale);
14646 rtx offset = NULL_RTX;
14650 /* Pull out the offset of a symbol; print any symbol itself. */
14651 if (GET_CODE (disp) == CONST
14652 && GET_CODE (XEXP (disp, 0)) == PLUS
14653 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14655 offset = XEXP (XEXP (disp, 0), 1);
14656 disp = gen_rtx_CONST (VOIDmode,
14657 XEXP (XEXP (disp, 0), 0));
14661 output_pic_addr_const (file, disp, 0);
14662 else if (GET_CODE (disp) == LABEL_REF)
14663 output_asm_label (disp);
14664 else if (CONST_INT_P (disp))
14667 output_addr_const (file, disp);
14673 print_reg (base, 0, file);
14676 if (INTVAL (offset) >= 0)
14678 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14682 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14689 print_reg (index, 0, file);
14691 fprintf (file, "*%d", scale);
14698 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14701 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14705 if (GET_CODE (x) != UNSPEC)
14708 op = XVECEXP (x, 0, 0);
14709 switch (XINT (x, 1))
14711 case UNSPEC_GOTTPOFF:
14712 output_addr_const (file, op);
14713 /* FIXME: This might be @TPOFF in Sun ld. */
14714 fputs ("@gottpoff", file);
14717 output_addr_const (file, op);
14718 fputs ("@tpoff", file);
14720 case UNSPEC_NTPOFF:
14721 output_addr_const (file, op);
14723 fputs ("@tpoff", file);
14725 fputs ("@ntpoff", file);
14727 case UNSPEC_DTPOFF:
14728 output_addr_const (file, op);
14729 fputs ("@dtpoff", file);
14731 case UNSPEC_GOTNTPOFF:
14732 output_addr_const (file, op);
14734 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14735 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14737 fputs ("@gotntpoff", file);
14739 case UNSPEC_INDNTPOFF:
14740 output_addr_const (file, op);
14741 fputs ("@indntpoff", file);
14744 case UNSPEC_MACHOPIC_OFFSET:
14745 output_addr_const (file, op);
14747 machopic_output_function_base_name (file);
14751 case UNSPEC_STACK_CHECK:
14755 gcc_assert (flag_split_stack);
14757 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14758 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14760 gcc_unreachable ();
14763 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14774 /* Split one or more double-mode RTL references into pairs of half-mode
14775 references. The RTL can be REG, offsettable MEM, integer constant, or
14776 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14777 split and "num" is its length. lo_half and hi_half are output arrays
14778 that parallel "operands". */
14781 split_double_mode (enum machine_mode mode, rtx operands[],
14782 int num, rtx lo_half[], rtx hi_half[])
14784 enum machine_mode half_mode;
14790 half_mode = DImode;
14793 half_mode = SImode;
14796 gcc_unreachable ();
14799 byte = GET_MODE_SIZE (half_mode);
14803 rtx op = operands[num];
14805 /* simplify_subreg refuse to split volatile memory addresses,
14806 but we still have to handle it. */
14809 lo_half[num] = adjust_address (op, half_mode, 0);
14810 hi_half[num] = adjust_address (op, half_mode, byte);
14814 lo_half[num] = simplify_gen_subreg (half_mode, op,
14815 GET_MODE (op) == VOIDmode
14816 ? mode : GET_MODE (op), 0);
14817 hi_half[num] = simplify_gen_subreg (half_mode, op,
14818 GET_MODE (op) == VOIDmode
14819 ? mode : GET_MODE (op), byte);
14824 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14825 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14826 is the expression of the binary operation. The output may either be
14827 emitted here, or returned to the caller, like all output_* functions.
14829 There is no guarantee that the operands are the same mode, as they
14830 might be within FLOAT or FLOAT_EXTEND expressions. */
14832 #ifndef SYSV386_COMPAT
14833 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14834 wants to fix the assemblers because that causes incompatibility
14835 with gcc. No-one wants to fix gcc because that causes
14836 incompatibility with assemblers... You can use the option of
14837 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14838 #define SYSV386_COMPAT 1
14842 output_387_binary_op (rtx insn, rtx *operands)
14844 static char buf[40];
14847 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14849 #ifdef ENABLE_CHECKING
14850 /* Even if we do not want to check the inputs, this documents input
14851 constraints. Which helps in understanding the following code. */
14852 if (STACK_REG_P (operands[0])
14853 && ((REG_P (operands[1])
14854 && REGNO (operands[0]) == REGNO (operands[1])
14855 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14856 || (REG_P (operands[2])
14857 && REGNO (operands[0]) == REGNO (operands[2])
14858 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14859 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14862 gcc_assert (is_sse);
14865 switch (GET_CODE (operands[3]))
14868 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14869 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14877 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14878 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14886 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14887 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14895 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14896 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14904 gcc_unreachable ();
14911 strcpy (buf, ssep);
14912 if (GET_MODE (operands[0]) == SFmode)
14913 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14915 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14919 strcpy (buf, ssep + 1);
14920 if (GET_MODE (operands[0]) == SFmode)
14921 strcat (buf, "ss\t{%2, %0|%0, %2}");
14923 strcat (buf, "sd\t{%2, %0|%0, %2}");
14929 switch (GET_CODE (operands[3]))
14933 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14935 rtx temp = operands[2];
14936 operands[2] = operands[1];
14937 operands[1] = temp;
14940 /* know operands[0] == operands[1]. */
14942 if (MEM_P (operands[2]))
14948 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14950 if (STACK_TOP_P (operands[0]))
14951 /* How is it that we are storing to a dead operand[2]?
14952 Well, presumably operands[1] is dead too. We can't
14953 store the result to st(0) as st(0) gets popped on this
14954 instruction. Instead store to operands[2] (which I
14955 think has to be st(1)). st(1) will be popped later.
14956 gcc <= 2.8.1 didn't have this check and generated
14957 assembly code that the Unixware assembler rejected. */
14958 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14960 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14964 if (STACK_TOP_P (operands[0]))
14965 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14967 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14972 if (MEM_P (operands[1]))
14978 if (MEM_P (operands[2]))
14984 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14987 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14988 derived assemblers, confusingly reverse the direction of
14989 the operation for fsub{r} and fdiv{r} when the
14990 destination register is not st(0). The Intel assembler
14991 doesn't have this brain damage. Read !SYSV386_COMPAT to
14992 figure out what the hardware really does. */
14993 if (STACK_TOP_P (operands[0]))
14994 p = "{p\t%0, %2|rp\t%2, %0}";
14996 p = "{rp\t%2, %0|p\t%0, %2}";
14998 if (STACK_TOP_P (operands[0]))
14999 /* As above for fmul/fadd, we can't store to st(0). */
15000 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15002 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15007 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15010 if (STACK_TOP_P (operands[0]))
15011 p = "{rp\t%0, %1|p\t%1, %0}";
15013 p = "{p\t%1, %0|rp\t%0, %1}";
15015 if (STACK_TOP_P (operands[0]))
15016 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15018 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15023 if (STACK_TOP_P (operands[0]))
15025 if (STACK_TOP_P (operands[1]))
15026 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15028 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15031 else if (STACK_TOP_P (operands[1]))
15034 p = "{\t%1, %0|r\t%0, %1}";
15036 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15042 p = "{r\t%2, %0|\t%0, %2}";
15044 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15050 gcc_unreachable ();
15057 /* Return needed mode for entity in optimize_mode_switching pass. */
15060 ix86_mode_needed (int entity, rtx insn)
15062 enum attr_i387_cw mode;
15064 /* The mode UNINITIALIZED is used to store control word after a
15065 function call or ASM pattern. The mode ANY specify that function
15066 has no requirements on the control word and make no changes in the
15067 bits we are interested in. */
15070 || (NONJUMP_INSN_P (insn)
15071 && (asm_noperands (PATTERN (insn)) >= 0
15072 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15073 return I387_CW_UNINITIALIZED;
15075 if (recog_memoized (insn) < 0)
15076 return I387_CW_ANY;
15078 mode = get_attr_i387_cw (insn);
15083 if (mode == I387_CW_TRUNC)
15088 if (mode == I387_CW_FLOOR)
15093 if (mode == I387_CW_CEIL)
15098 if (mode == I387_CW_MASK_PM)
15103 gcc_unreachable ();
15106 return I387_CW_ANY;
15109 /* Output code to initialize control word copies used by trunc?f?i and
15110 rounding patterns. CURRENT_MODE is set to current control word,
15111 while NEW_MODE is set to new control word. */
15114 emit_i387_cw_initialization (int mode)
15116 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15119 enum ix86_stack_slot slot;
15121 rtx reg = gen_reg_rtx (HImode);
15123 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15124 emit_move_insn (reg, copy_rtx (stored_mode));
15126 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
15127 || optimize_function_for_size_p (cfun))
15131 case I387_CW_TRUNC:
15132 /* round toward zero (truncate) */
15133 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15134 slot = SLOT_CW_TRUNC;
15137 case I387_CW_FLOOR:
15138 /* round down toward -oo */
15139 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15140 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15141 slot = SLOT_CW_FLOOR;
15145 /* round up toward +oo */
15146 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15147 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15148 slot = SLOT_CW_CEIL;
15151 case I387_CW_MASK_PM:
15152 /* mask precision exception for nearbyint() */
15153 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15154 slot = SLOT_CW_MASK_PM;
15158 gcc_unreachable ();
15165 case I387_CW_TRUNC:
15166 /* round toward zero (truncate) */
15167 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
15168 slot = SLOT_CW_TRUNC;
15171 case I387_CW_FLOOR:
15172 /* round down toward -oo */
15173 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
15174 slot = SLOT_CW_FLOOR;
15178 /* round up toward +oo */
15179 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
15180 slot = SLOT_CW_CEIL;
15183 case I387_CW_MASK_PM:
15184 /* mask precision exception for nearbyint() */
15185 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15186 slot = SLOT_CW_MASK_PM;
15190 gcc_unreachable ();
15194 gcc_assert (slot < MAX_386_STACK_LOCALS);
15196 new_mode = assign_386_stack_local (HImode, slot);
15197 emit_move_insn (new_mode, reg);
15200 /* Output code for INSN to convert a float to a signed int. OPERANDS
15201 are the insn operands. The output may be [HSD]Imode and the input
15202 operand may be [SDX]Fmode. */
15205 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
15207 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15208 int dimode_p = GET_MODE (operands[0]) == DImode;
15209 int round_mode = get_attr_i387_cw (insn);
15211 /* Jump through a hoop or two for DImode, since the hardware has no
15212 non-popping instruction. We used to do this a different way, but
15213 that was somewhat fragile and broke with post-reload splitters. */
15214 if ((dimode_p || fisttp) && !stack_top_dies)
15215 output_asm_insn ("fld\t%y1", operands);
15217 gcc_assert (STACK_TOP_P (operands[1]));
15218 gcc_assert (MEM_P (operands[0]));
15219 gcc_assert (GET_MODE (operands[1]) != TFmode);
15222 output_asm_insn ("fisttp%Z0\t%0", operands);
15225 if (round_mode != I387_CW_ANY)
15226 output_asm_insn ("fldcw\t%3", operands);
15227 if (stack_top_dies || dimode_p)
15228 output_asm_insn ("fistp%Z0\t%0", operands);
15230 output_asm_insn ("fist%Z0\t%0", operands);
15231 if (round_mode != I387_CW_ANY)
15232 output_asm_insn ("fldcw\t%2", operands);
15238 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15239 have the values zero or one, indicates the ffreep insn's operand
15240 from the OPERANDS array. */
15242 static const char *
15243 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15245 if (TARGET_USE_FFREEP)
15246 #ifdef HAVE_AS_IX86_FFREEP
15247 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15250 static char retval[32];
15251 int regno = REGNO (operands[opno]);
15253 gcc_assert (FP_REGNO_P (regno));
15255 regno -= FIRST_STACK_REG;
15257 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15262 return opno ? "fstp\t%y1" : "fstp\t%y0";
15266 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15267 should be used. UNORDERED_P is true when fucom should be used. */
15270 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
15272 int stack_top_dies;
15273 rtx cmp_op0, cmp_op1;
15274 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
15278 cmp_op0 = operands[0];
15279 cmp_op1 = operands[1];
15283 cmp_op0 = operands[1];
15284 cmp_op1 = operands[2];
15289 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
15290 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
15291 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
15292 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
15294 if (GET_MODE (operands[0]) == SFmode)
15296 return &ucomiss[TARGET_AVX ? 0 : 1];
15298 return &comiss[TARGET_AVX ? 0 : 1];
15301 return &ucomisd[TARGET_AVX ? 0 : 1];
15303 return &comisd[TARGET_AVX ? 0 : 1];
15306 gcc_assert (STACK_TOP_P (cmp_op0));
15308 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15310 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15312 if (stack_top_dies)
15314 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15315 return output_387_ffreep (operands, 1);
15318 return "ftst\n\tfnstsw\t%0";
15321 if (STACK_REG_P (cmp_op1)
15323 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15324 && REGNO (cmp_op1) != FIRST_STACK_REG)
15326 /* If both the top of the 387 stack dies, and the other operand
15327 is also a stack register that dies, then this must be a
15328 `fcompp' float compare */
15332 /* There is no double popping fcomi variant. Fortunately,
15333 eflags is immune from the fstp's cc clobbering. */
15335 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15337 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15338 return output_387_ffreep (operands, 0);
15343 return "fucompp\n\tfnstsw\t%0";
15345 return "fcompp\n\tfnstsw\t%0";
15350 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15352 static const char * const alt[16] =
15354 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15355 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15356 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15357 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15359 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15360 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15364 "fcomi\t{%y1, %0|%0, %y1}",
15365 "fcomip\t{%y1, %0|%0, %y1}",
15366 "fucomi\t{%y1, %0|%0, %y1}",
15367 "fucomip\t{%y1, %0|%0, %y1}",
15378 mask = eflags_p << 3;
15379 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15380 mask |= unordered_p << 1;
15381 mask |= stack_top_dies;
15383 gcc_assert (mask < 16);
15392 ix86_output_addr_vec_elt (FILE *file, int value)
15394 const char *directive = ASM_LONG;
15398 directive = ASM_QUAD;
15400 gcc_assert (!TARGET_64BIT);
15403 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15407 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15409 const char *directive = ASM_LONG;
15412 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15413 directive = ASM_QUAD;
15415 gcc_assert (!TARGET_64BIT);
15417 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15418 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15419 fprintf (file, "%s%s%d-%s%d\n",
15420 directive, LPREFIX, value, LPREFIX, rel);
15421 else if (HAVE_AS_GOTOFF_IN_DATA)
15422 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15424 else if (TARGET_MACHO)
15426 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15427 machopic_output_function_base_name (file);
15432 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15433 GOT_SYMBOL_NAME, LPREFIX, value);
15436 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15440 ix86_expand_clear (rtx dest)
15444 /* We play register width games, which are only valid after reload. */
15445 gcc_assert (reload_completed);
15447 /* Avoid HImode and its attendant prefix byte. */
15448 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15449 dest = gen_rtx_REG (SImode, REGNO (dest));
15450 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15452 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15453 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15455 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15456 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15462 /* X is an unchanging MEM. If it is a constant pool reference, return
15463 the constant pool rtx, else NULL. */
15466 maybe_get_pool_constant (rtx x)
15468 x = ix86_delegitimize_address (XEXP (x, 0));
15470 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15471 return get_pool_constant (x);
15477 ix86_expand_move (enum machine_mode mode, rtx operands[])
15480 enum tls_model model;
15485 if (GET_CODE (op1) == SYMBOL_REF)
15487 model = SYMBOL_REF_TLS_MODEL (op1);
15490 op1 = legitimize_tls_address (op1, model, true);
15491 op1 = force_operand (op1, op0);
15495 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15496 && SYMBOL_REF_DLLIMPORT_P (op1))
15497 op1 = legitimize_dllimport_symbol (op1, false);
15499 else if (GET_CODE (op1) == CONST
15500 && GET_CODE (XEXP (op1, 0)) == PLUS
15501 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15503 rtx addend = XEXP (XEXP (op1, 0), 1);
15504 rtx symbol = XEXP (XEXP (op1, 0), 0);
15507 model = SYMBOL_REF_TLS_MODEL (symbol);
15509 tmp = legitimize_tls_address (symbol, model, true);
15510 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15511 && SYMBOL_REF_DLLIMPORT_P (symbol))
15512 tmp = legitimize_dllimport_symbol (symbol, true);
15516 tmp = force_operand (tmp, NULL);
15517 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15518 op0, 1, OPTAB_DIRECT);
15524 if ((flag_pic || MACHOPIC_INDIRECT)
15525 && mode == Pmode && symbolic_operand (op1, Pmode))
15527 if (TARGET_MACHO && !TARGET_64BIT)
15530 /* dynamic-no-pic */
15531 if (MACHOPIC_INDIRECT)
15533 rtx temp = ((reload_in_progress
15534 || ((op0 && REG_P (op0))
15536 ? op0 : gen_reg_rtx (Pmode));
15537 op1 = machopic_indirect_data_reference (op1, temp);
15539 op1 = machopic_legitimize_pic_address (op1, mode,
15540 temp == op1 ? 0 : temp);
15542 if (op0 != op1 && GET_CODE (op0) != MEM)
15544 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15548 if (GET_CODE (op0) == MEM)
15549 op1 = force_reg (Pmode, op1);
15553 if (GET_CODE (temp) != REG)
15554 temp = gen_reg_rtx (Pmode);
15555 temp = legitimize_pic_address (op1, temp);
15560 /* dynamic-no-pic */
15566 op1 = force_reg (Pmode, op1);
15567 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
15569 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15570 op1 = legitimize_pic_address (op1, reg);
15579 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15580 || !push_operand (op0, mode))
15582 op1 = force_reg (mode, op1);
15584 if (push_operand (op0, mode)
15585 && ! general_no_elim_operand (op1, mode))
15586 op1 = copy_to_mode_reg (mode, op1);
15588 /* Force large constants in 64bit compilation into register
15589 to get them CSEed. */
15590 if (can_create_pseudo_p ()
15591 && (mode == DImode) && TARGET_64BIT
15592 && immediate_operand (op1, mode)
15593 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15594 && !register_operand (op0, mode)
15596 op1 = copy_to_mode_reg (mode, op1);
15598 if (can_create_pseudo_p ()
15599 && FLOAT_MODE_P (mode)
15600 && GET_CODE (op1) == CONST_DOUBLE)
15602 /* If we are loading a floating point constant to a register,
15603 force the value to memory now, since we'll get better code
15604 out the back end. */
15606 op1 = validize_mem (force_const_mem (mode, op1));
15607 if (!register_operand (op0, mode))
15609 rtx temp = gen_reg_rtx (mode);
15610 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15611 emit_move_insn (op0, temp);
15617 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15621 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15623 rtx op0 = operands[0], op1 = operands[1];
15624 unsigned int align = GET_MODE_ALIGNMENT (mode);
15626 /* Force constants other than zero into memory. We do not know how
15627 the instructions used to build constants modify the upper 64 bits
15628 of the register, once we have that information we may be able
15629 to handle some of them more efficiently. */
15630 if (can_create_pseudo_p ()
15631 && register_operand (op0, mode)
15632 && (CONSTANT_P (op1)
15633 || (GET_CODE (op1) == SUBREG
15634 && CONSTANT_P (SUBREG_REG (op1))))
15635 && !standard_sse_constant_p (op1))
15636 op1 = validize_mem (force_const_mem (mode, op1));
15638 /* We need to check memory alignment for SSE mode since attribute
15639 can make operands unaligned. */
15640 if (can_create_pseudo_p ()
15641 && SSE_REG_MODE_P (mode)
15642 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15643 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15647 /* ix86_expand_vector_move_misalign() does not like constants ... */
15648 if (CONSTANT_P (op1)
15649 || (GET_CODE (op1) == SUBREG
15650 && CONSTANT_P (SUBREG_REG (op1))))
15651 op1 = validize_mem (force_const_mem (mode, op1));
15653 /* ... nor both arguments in memory. */
15654 if (!register_operand (op0, mode)
15655 && !register_operand (op1, mode))
15656 op1 = force_reg (mode, op1);
15658 tmp[0] = op0; tmp[1] = op1;
15659 ix86_expand_vector_move_misalign (mode, tmp);
15663 /* Make operand1 a register if it isn't already. */
15664 if (can_create_pseudo_p ()
15665 && !register_operand (op0, mode)
15666 && !register_operand (op1, mode))
15668 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15672 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15675 /* Split 32-byte AVX unaligned load and store if needed. */
15678 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15681 rtx (*extract) (rtx, rtx, rtx);
15682 rtx (*move_unaligned) (rtx, rtx);
15683 enum machine_mode mode;
15685 switch (GET_MODE (op0))
15688 gcc_unreachable ();
15690 extract = gen_avx_vextractf128v32qi;
15691 move_unaligned = gen_avx_movdqu256;
15695 extract = gen_avx_vextractf128v8sf;
15696 move_unaligned = gen_avx_movups256;
15700 extract = gen_avx_vextractf128v4df;
15701 move_unaligned = gen_avx_movupd256;
15706 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15708 rtx r = gen_reg_rtx (mode);
15709 m = adjust_address (op1, mode, 0);
15710 emit_move_insn (r, m);
15711 m = adjust_address (op1, mode, 16);
15712 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15713 emit_move_insn (op0, r);
15715 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15717 m = adjust_address (op0, mode, 0);
15718 emit_insn (extract (m, op1, const0_rtx));
15719 m = adjust_address (op0, mode, 16);
15720 emit_insn (extract (m, op1, const1_rtx));
15723 emit_insn (move_unaligned (op0, op1));
15726 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15727 straight to ix86_expand_vector_move. */
15728 /* Code generation for scalar reg-reg moves of single and double precision data:
15729 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15733 if (x86_sse_partial_reg_dependency == true)
15738 Code generation for scalar loads of double precision data:
15739 if (x86_sse_split_regs == true)
15740 movlpd mem, reg (gas syntax)
15744 Code generation for unaligned packed loads of single precision data
15745 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15746 if (x86_sse_unaligned_move_optimal)
15749 if (x86_sse_partial_reg_dependency == true)
15761 Code generation for unaligned packed loads of double precision data
15762 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15763 if (x86_sse_unaligned_move_optimal)
15766 if (x86_sse_split_regs == true)
15779 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15788 switch (GET_MODE_CLASS (mode))
15790 case MODE_VECTOR_INT:
15792 switch (GET_MODE_SIZE (mode))
15795 /* If we're optimizing for size, movups is the smallest. */
15796 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15798 op0 = gen_lowpart (V4SFmode, op0);
15799 op1 = gen_lowpart (V4SFmode, op1);
15800 emit_insn (gen_sse_movups (op0, op1));
15803 op0 = gen_lowpart (V16QImode, op0);
15804 op1 = gen_lowpart (V16QImode, op1);
15805 emit_insn (gen_sse2_movdqu (op0, op1));
15808 op0 = gen_lowpart (V32QImode, op0);
15809 op1 = gen_lowpart (V32QImode, op1);
15810 ix86_avx256_split_vector_move_misalign (op0, op1);
15813 gcc_unreachable ();
15816 case MODE_VECTOR_FLOAT:
15817 op0 = gen_lowpart (mode, op0);
15818 op1 = gen_lowpart (mode, op1);
15823 emit_insn (gen_sse_movups (op0, op1));
15826 ix86_avx256_split_vector_move_misalign (op0, op1);
15829 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15831 op0 = gen_lowpart (V4SFmode, op0);
15832 op1 = gen_lowpart (V4SFmode, op1);
15833 emit_insn (gen_sse_movups (op0, op1));
15836 emit_insn (gen_sse2_movupd (op0, op1));
15839 ix86_avx256_split_vector_move_misalign (op0, op1);
15842 gcc_unreachable ();
15847 gcc_unreachable ();
15855 /* If we're optimizing for size, movups is the smallest. */
15856 if (optimize_insn_for_size_p ()
15857 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15859 op0 = gen_lowpart (V4SFmode, op0);
15860 op1 = gen_lowpart (V4SFmode, op1);
15861 emit_insn (gen_sse_movups (op0, op1));
15865 /* ??? If we have typed data, then it would appear that using
15866 movdqu is the only way to get unaligned data loaded with
15868 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15870 op0 = gen_lowpart (V16QImode, op0);
15871 op1 = gen_lowpart (V16QImode, op1);
15872 emit_insn (gen_sse2_movdqu (op0, op1));
15876 if (TARGET_SSE2 && mode == V2DFmode)
15880 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15882 op0 = gen_lowpart (V2DFmode, op0);
15883 op1 = gen_lowpart (V2DFmode, op1);
15884 emit_insn (gen_sse2_movupd (op0, op1));
15888 /* When SSE registers are split into halves, we can avoid
15889 writing to the top half twice. */
15890 if (TARGET_SSE_SPLIT_REGS)
15892 emit_clobber (op0);
15897 /* ??? Not sure about the best option for the Intel chips.
15898 The following would seem to satisfy; the register is
15899 entirely cleared, breaking the dependency chain. We
15900 then store to the upper half, with a dependency depth
15901 of one. A rumor has it that Intel recommends two movsd
15902 followed by an unpacklpd, but this is unconfirmed. And
15903 given that the dependency depth of the unpacklpd would
15904 still be one, I'm not sure why this would be better. */
15905 zero = CONST0_RTX (V2DFmode);
15908 m = adjust_address (op1, DFmode, 0);
15909 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15910 m = adjust_address (op1, DFmode, 8);
15911 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15915 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15917 op0 = gen_lowpart (V4SFmode, op0);
15918 op1 = gen_lowpart (V4SFmode, op1);
15919 emit_insn (gen_sse_movups (op0, op1));
15923 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15924 emit_move_insn (op0, CONST0_RTX (mode));
15926 emit_clobber (op0);
15928 if (mode != V4SFmode)
15929 op0 = gen_lowpart (V4SFmode, op0);
15930 m = adjust_address (op1, V2SFmode, 0);
15931 emit_insn (gen_sse_loadlps (op0, op0, m));
15932 m = adjust_address (op1, V2SFmode, 8);
15933 emit_insn (gen_sse_loadhps (op0, op0, m));
15936 else if (MEM_P (op0))
15938 /* If we're optimizing for size, movups is the smallest. */
15939 if (optimize_insn_for_size_p ()
15940 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15942 op0 = gen_lowpart (V4SFmode, op0);
15943 op1 = gen_lowpart (V4SFmode, op1);
15944 emit_insn (gen_sse_movups (op0, op1));
15948 /* ??? Similar to above, only less clear because of quote
15949 typeless stores unquote. */
15950 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15951 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15953 op0 = gen_lowpart (V16QImode, op0);
15954 op1 = gen_lowpart (V16QImode, op1);
15955 emit_insn (gen_sse2_movdqu (op0, op1));
15959 if (TARGET_SSE2 && mode == V2DFmode)
15961 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15963 op0 = gen_lowpart (V2DFmode, op0);
15964 op1 = gen_lowpart (V2DFmode, op1);
15965 emit_insn (gen_sse2_movupd (op0, op1));
15969 m = adjust_address (op0, DFmode, 0);
15970 emit_insn (gen_sse2_storelpd (m, op1));
15971 m = adjust_address (op0, DFmode, 8);
15972 emit_insn (gen_sse2_storehpd (m, op1));
15977 if (mode != V4SFmode)
15978 op1 = gen_lowpart (V4SFmode, op1);
15980 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15982 op0 = gen_lowpart (V4SFmode, op0);
15983 emit_insn (gen_sse_movups (op0, op1));
15987 m = adjust_address (op0, V2SFmode, 0);
15988 emit_insn (gen_sse_storelps (m, op1));
15989 m = adjust_address (op0, V2SFmode, 8);
15990 emit_insn (gen_sse_storehps (m, op1));
15995 gcc_unreachable ();
15998 /* Expand a push in MODE. This is some mode for which we do not support
15999 proper push instructions, at least from the registers that we expect
16000 the value to live in. */
16003 ix86_expand_push (enum machine_mode mode, rtx x)
16007 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
16008 GEN_INT (-GET_MODE_SIZE (mode)),
16009 stack_pointer_rtx, 1, OPTAB_DIRECT);
16010 if (tmp != stack_pointer_rtx)
16011 emit_move_insn (stack_pointer_rtx, tmp);
16013 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
16015 /* When we push an operand onto stack, it has to be aligned at least
16016 at the function argument boundary. However since we don't have
16017 the argument type, we can't determine the actual argument
16019 emit_move_insn (tmp, x);
16022 /* Helper function of ix86_fixup_binary_operands to canonicalize
16023 operand order. Returns true if the operands should be swapped. */
16026 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
16029 rtx dst = operands[0];
16030 rtx src1 = operands[1];
16031 rtx src2 = operands[2];
16033 /* If the operation is not commutative, we can't do anything. */
16034 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
16037 /* Highest priority is that src1 should match dst. */
16038 if (rtx_equal_p (dst, src1))
16040 if (rtx_equal_p (dst, src2))
16043 /* Next highest priority is that immediate constants come second. */
16044 if (immediate_operand (src2, mode))
16046 if (immediate_operand (src1, mode))
16049 /* Lowest priority is that memory references should come second. */
16059 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16060 destination to use for the operation. If different from the true
16061 destination in operands[0], a copy operation will be required. */
16064 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
16067 rtx dst = operands[0];
16068 rtx src1 = operands[1];
16069 rtx src2 = operands[2];
16071 /* Canonicalize operand order. */
16072 if (ix86_swap_binary_operands_p (code, mode, operands))
16076 /* It is invalid to swap operands of different modes. */
16077 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
16084 /* Both source operands cannot be in memory. */
16085 if (MEM_P (src1) && MEM_P (src2))
16087 /* Optimization: Only read from memory once. */
16088 if (rtx_equal_p (src1, src2))
16090 src2 = force_reg (mode, src2);
16094 src2 = force_reg (mode, src2);
16097 /* If the destination is memory, and we do not have matching source
16098 operands, do things in registers. */
16099 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16100 dst = gen_reg_rtx (mode);
16102 /* Source 1 cannot be a constant. */
16103 if (CONSTANT_P (src1))
16104 src1 = force_reg (mode, src1);
16106 /* Source 1 cannot be a non-matching memory. */
16107 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16108 src1 = force_reg (mode, src1);
16110 operands[1] = src1;
16111 operands[2] = src2;
16115 /* Similarly, but assume that the destination has already been
16116 set up properly. */
16119 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
16120 enum machine_mode mode, rtx operands[])
16122 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
16123 gcc_assert (dst == operands[0]);
16126 /* Attempt to expand a binary operator. Make the expansion closer to the
16127 actual machine, then just general_operand, which will allow 3 separate
16128 memory references (one output, two input) in a single insn. */
16131 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
16134 rtx src1, src2, dst, op, clob;
16136 dst = ix86_fixup_binary_operands (code, mode, operands);
16137 src1 = operands[1];
16138 src2 = operands[2];
16140 /* Emit the instruction. */
16142 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
16143 if (reload_in_progress)
16145 /* Reload doesn't know about the flags register, and doesn't know that
16146 it doesn't want to clobber it. We can only do this with PLUS. */
16147 gcc_assert (code == PLUS);
16150 else if (reload_completed
16152 && !rtx_equal_p (dst, src1))
16154 /* This is going to be an LEA; avoid splitting it later. */
16159 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16160 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16163 /* Fix up the destination if needed. */
16164 if (dst != operands[0])
16165 emit_move_insn (operands[0], dst);
16168 /* Return TRUE or FALSE depending on whether the binary operator meets the
16169 appropriate constraints. */
16172 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
16175 rtx dst = operands[0];
16176 rtx src1 = operands[1];
16177 rtx src2 = operands[2];
16179 /* Both source operands cannot be in memory. */
16180 if (MEM_P (src1) && MEM_P (src2))
16183 /* Canonicalize operand order for commutative operators. */
16184 if (ix86_swap_binary_operands_p (code, mode, operands))
16191 /* If the destination is memory, we must have a matching source operand. */
16192 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16195 /* Source 1 cannot be a constant. */
16196 if (CONSTANT_P (src1))
16199 /* Source 1 cannot be a non-matching memory. */
16200 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16202 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16203 return (code == AND
16206 || (TARGET_64BIT && mode == DImode))
16207 && CONST_INT_P (src2)
16208 && (INTVAL (src2) == 0xff
16209 || INTVAL (src2) == 0xffff));
16215 /* Attempt to expand a unary operator. Make the expansion closer to the
16216 actual machine, then just general_operand, which will allow 2 separate
16217 memory references (one output, one input) in a single insn. */
16220 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
16223 int matching_memory;
16224 rtx src, dst, op, clob;
16229 /* If the destination is memory, and we do not have matching source
16230 operands, do things in registers. */
16231 matching_memory = 0;
16234 if (rtx_equal_p (dst, src))
16235 matching_memory = 1;
16237 dst = gen_reg_rtx (mode);
16240 /* When source operand is memory, destination must match. */
16241 if (MEM_P (src) && !matching_memory)
16242 src = force_reg (mode, src);
16244 /* Emit the instruction. */
16246 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
16247 if (reload_in_progress || code == NOT)
16249 /* Reload doesn't know about the flags register, and doesn't know that
16250 it doesn't want to clobber it. */
16251 gcc_assert (code == NOT);
16256 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16257 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16260 /* Fix up the destination if needed. */
16261 if (dst != operands[0])
16262 emit_move_insn (operands[0], dst);
16265 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16266 divisor are within the range [0-255]. */
16269 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
16272 rtx end_label, qimode_label;
16273 rtx insn, div, mod;
16274 rtx scratch, tmp0, tmp1, tmp2;
16275 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
16276 rtx (*gen_zero_extend) (rtx, rtx);
16277 rtx (*gen_test_ccno_1) (rtx, rtx);
16282 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
16283 gen_test_ccno_1 = gen_testsi_ccno_1;
16284 gen_zero_extend = gen_zero_extendqisi2;
16287 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
16288 gen_test_ccno_1 = gen_testdi_ccno_1;
16289 gen_zero_extend = gen_zero_extendqidi2;
16292 gcc_unreachable ();
16295 end_label = gen_label_rtx ();
16296 qimode_label = gen_label_rtx ();
16298 scratch = gen_reg_rtx (mode);
16300 /* Use 8bit unsigned divimod if dividend and divisor are within
16301 the range [0-255]. */
16302 emit_move_insn (scratch, operands[2]);
16303 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
16304 scratch, 1, OPTAB_DIRECT);
16305 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
16306 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
16307 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16308 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16309 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16311 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16312 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16313 JUMP_LABEL (insn) = qimode_label;
16315 /* Generate original signed/unsigned divimod. */
16316 div = gen_divmod4_1 (operands[0], operands[1],
16317 operands[2], operands[3]);
16320 /* Branch to the end. */
16321 emit_jump_insn (gen_jump (end_label));
16324 /* Generate 8bit unsigned divide. */
16325 emit_label (qimode_label);
16326 /* Don't use operands[0] for result of 8bit divide since not all
16327 registers support QImode ZERO_EXTRACT. */
16328 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16329 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16330 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16331 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16335 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16336 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16340 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16341 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16344 /* Extract remainder from AH. */
16345 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16346 if (REG_P (operands[1]))
16347 insn = emit_move_insn (operands[1], tmp1);
16350 /* Need a new scratch register since the old one has result
16352 scratch = gen_reg_rtx (mode);
16353 emit_move_insn (scratch, tmp1);
16354 insn = emit_move_insn (operands[1], scratch);
16356 set_unique_reg_note (insn, REG_EQUAL, mod);
16358 /* Zero extend quotient from AL. */
16359 tmp1 = gen_lowpart (QImode, tmp0);
16360 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16361 set_unique_reg_note (insn, REG_EQUAL, div);
16363 emit_label (end_label);
16366 #define LEA_SEARCH_THRESHOLD 12
16368 /* Search backward for non-agu definition of register number REGNO1
16369 or register number REGNO2 in INSN's basic block until
16370 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16371 2. Reach BB boundary, or
16372 3. Reach agu definition.
16373 Returns the distance between the non-agu definition point and INSN.
16374 If no definition point, returns -1. */
16377 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16380 basic_block bb = BLOCK_FOR_INSN (insn);
16383 enum attr_type insn_type;
16385 if (insn != BB_HEAD (bb))
16387 rtx prev = PREV_INSN (insn);
16388 while (prev && distance < LEA_SEARCH_THRESHOLD)
16390 if (NONDEBUG_INSN_P (prev))
16393 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16394 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16395 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16396 && (regno1 == DF_REF_REGNO (*def_rec)
16397 || regno2 == DF_REF_REGNO (*def_rec)))
16399 insn_type = get_attr_type (prev);
16400 if (insn_type != TYPE_LEA)
16404 if (prev == BB_HEAD (bb))
16406 prev = PREV_INSN (prev);
16410 if (distance < LEA_SEARCH_THRESHOLD)
16414 bool simple_loop = false;
16416 FOR_EACH_EDGE (e, ei, bb->preds)
16419 simple_loop = true;
16425 rtx prev = BB_END (bb);
16428 && distance < LEA_SEARCH_THRESHOLD)
16430 if (NONDEBUG_INSN_P (prev))
16433 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16434 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16435 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16436 && (regno1 == DF_REF_REGNO (*def_rec)
16437 || regno2 == DF_REF_REGNO (*def_rec)))
16439 insn_type = get_attr_type (prev);
16440 if (insn_type != TYPE_LEA)
16444 prev = PREV_INSN (prev);
16452 /* get_attr_type may modify recog data. We want to make sure
16453 that recog data is valid for instruction INSN, on which
16454 distance_non_agu_define is called. INSN is unchanged here. */
16455 extract_insn_cached (insn);
16459 /* Return the distance between INSN and the next insn that uses
16460 register number REGNO0 in memory address. Return -1 if no such
16461 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16464 distance_agu_use (unsigned int regno0, rtx insn)
16466 basic_block bb = BLOCK_FOR_INSN (insn);
16471 if (insn != BB_END (bb))
16473 rtx next = NEXT_INSN (insn);
16474 while (next && distance < LEA_SEARCH_THRESHOLD)
16476 if (NONDEBUG_INSN_P (next))
16480 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16481 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16482 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16483 && regno0 == DF_REF_REGNO (*use_rec))
16485 /* Return DISTANCE if OP0 is used in memory
16486 address in NEXT. */
16490 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16491 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16492 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16493 && regno0 == DF_REF_REGNO (*def_rec))
16495 /* Return -1 if OP0 is set in NEXT. */
16499 if (next == BB_END (bb))
16501 next = NEXT_INSN (next);
16505 if (distance < LEA_SEARCH_THRESHOLD)
16509 bool simple_loop = false;
16511 FOR_EACH_EDGE (e, ei, bb->succs)
16514 simple_loop = true;
16520 rtx next = BB_HEAD (bb);
16523 && distance < LEA_SEARCH_THRESHOLD)
16525 if (NONDEBUG_INSN_P (next))
16529 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16530 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16531 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16532 && regno0 == DF_REF_REGNO (*use_rec))
16534 /* Return DISTANCE if OP0 is used in memory
16535 address in NEXT. */
16539 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16540 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16541 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16542 && regno0 == DF_REF_REGNO (*def_rec))
16544 /* Return -1 if OP0 is set in NEXT. */
16549 next = NEXT_INSN (next);
16557 /* Define this macro to tune LEA priority vs ADD, it take effect when
16558 there is a dilemma of choicing LEA or ADD
16559 Negative value: ADD is more preferred than LEA
16561 Positive value: LEA is more preferred than ADD*/
16562 #define IX86_LEA_PRIORITY 2
16564 /* Return true if it is ok to optimize an ADD operation to LEA
16565 operation to avoid flag register consumation. For most processors,
16566 ADD is faster than LEA. For the processors like ATOM, if the
16567 destination register of LEA holds an actual address which will be
16568 used soon, LEA is better and otherwise ADD is better. */
16571 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16573 unsigned int regno0 = true_regnum (operands[0]);
16574 unsigned int regno1 = true_regnum (operands[1]);
16575 unsigned int regno2 = true_regnum (operands[2]);
16577 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16578 if (regno0 != regno1 && regno0 != regno2)
16581 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16585 int dist_define, dist_use;
16587 /* Return false if REGNO0 isn't used in memory address. */
16588 dist_use = distance_agu_use (regno0, insn);
16592 dist_define = distance_non_agu_define (regno1, regno2, insn);
16593 if (dist_define <= 0)
16596 /* If this insn has both backward non-agu dependence and forward
16597 agu dependence, the one with short distance take effect. */
16598 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16605 /* Return true if destination reg of SET_BODY is shift count of
16609 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16615 /* Retrieve destination of SET_BODY. */
16616 switch (GET_CODE (set_body))
16619 set_dest = SET_DEST (set_body);
16620 if (!set_dest || !REG_P (set_dest))
16624 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16625 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16633 /* Retrieve shift count of USE_BODY. */
16634 switch (GET_CODE (use_body))
16637 shift_rtx = XEXP (use_body, 1);
16640 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16641 if (ix86_dep_by_shift_count_body (set_body,
16642 XVECEXP (use_body, 0, i)))
16650 && (GET_CODE (shift_rtx) == ASHIFT
16651 || GET_CODE (shift_rtx) == LSHIFTRT
16652 || GET_CODE (shift_rtx) == ASHIFTRT
16653 || GET_CODE (shift_rtx) == ROTATE
16654 || GET_CODE (shift_rtx) == ROTATERT))
16656 rtx shift_count = XEXP (shift_rtx, 1);
16658 /* Return true if shift count is dest of SET_BODY. */
16659 if (REG_P (shift_count)
16660 && true_regnum (set_dest) == true_regnum (shift_count))
16667 /* Return true if destination reg of SET_INSN is shift count of
16671 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16673 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16674 PATTERN (use_insn));
16677 /* Return TRUE or FALSE depending on whether the unary operator meets the
16678 appropriate constraints. */
16681 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16682 enum machine_mode mode ATTRIBUTE_UNUSED,
16683 rtx operands[2] ATTRIBUTE_UNUSED)
16685 /* If one of operands is memory, source and destination must match. */
16686 if ((MEM_P (operands[0])
16687 || MEM_P (operands[1]))
16688 && ! rtx_equal_p (operands[0], operands[1]))
16693 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16694 are ok, keeping in mind the possible movddup alternative. */
16697 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16699 if (MEM_P (operands[0]))
16700 return rtx_equal_p (operands[0], operands[1 + high]);
16701 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16702 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16706 /* Post-reload splitter for converting an SF or DFmode value in an
16707 SSE register into an unsigned SImode. */
16710 ix86_split_convert_uns_si_sse (rtx operands[])
16712 enum machine_mode vecmode;
16713 rtx value, large, zero_or_two31, input, two31, x;
16715 large = operands[1];
16716 zero_or_two31 = operands[2];
16717 input = operands[3];
16718 two31 = operands[4];
16719 vecmode = GET_MODE (large);
16720 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16722 /* Load up the value into the low element. We must ensure that the other
16723 elements are valid floats -- zero is the easiest such value. */
16726 if (vecmode == V4SFmode)
16727 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16729 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16733 input = gen_rtx_REG (vecmode, REGNO (input));
16734 emit_move_insn (value, CONST0_RTX (vecmode));
16735 if (vecmode == V4SFmode)
16736 emit_insn (gen_sse_movss (value, value, input));
16738 emit_insn (gen_sse2_movsd (value, value, input));
16741 emit_move_insn (large, two31);
16742 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16744 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16745 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16747 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16748 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16750 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16751 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16753 large = gen_rtx_REG (V4SImode, REGNO (large));
16754 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16756 x = gen_rtx_REG (V4SImode, REGNO (value));
16757 if (vecmode == V4SFmode)
16758 emit_insn (gen_sse2_cvttps2dq (x, value));
16760 emit_insn (gen_sse2_cvttpd2dq (x, value));
16763 emit_insn (gen_xorv4si3 (value, value, large));
16766 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16767 Expects the 64-bit DImode to be supplied in a pair of integral
16768 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16769 -mfpmath=sse, !optimize_size only. */
16772 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16774 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16775 rtx int_xmm, fp_xmm;
16776 rtx biases, exponents;
16779 int_xmm = gen_reg_rtx (V4SImode);
16780 if (TARGET_INTER_UNIT_MOVES)
16781 emit_insn (gen_movdi_to_sse (int_xmm, input));
16782 else if (TARGET_SSE_SPLIT_REGS)
16784 emit_clobber (int_xmm);
16785 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16789 x = gen_reg_rtx (V2DImode);
16790 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16791 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16794 x = gen_rtx_CONST_VECTOR (V4SImode,
16795 gen_rtvec (4, GEN_INT (0x43300000UL),
16796 GEN_INT (0x45300000UL),
16797 const0_rtx, const0_rtx));
16798 exponents = validize_mem (force_const_mem (V4SImode, x));
16800 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16801 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16803 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16804 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16805 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16806 (0x1.0p84 + double(fp_value_hi_xmm)).
16807 Note these exponents differ by 32. */
16809 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16811 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16812 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16813 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16814 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16815 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16816 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16817 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16818 biases = validize_mem (force_const_mem (V2DFmode, biases));
16819 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16821 /* Add the upper and lower DFmode values together. */
16823 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16826 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16827 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16828 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16831 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16834 /* Not used, but eases macroization of patterns. */
16836 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16837 rtx input ATTRIBUTE_UNUSED)
16839 gcc_unreachable ();
16842 /* Convert an unsigned SImode value into a DFmode. Only currently used
16843 for SSE, but applicable anywhere. */
16846 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16848 REAL_VALUE_TYPE TWO31r;
16851 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16852 NULL, 1, OPTAB_DIRECT);
16854 fp = gen_reg_rtx (DFmode);
16855 emit_insn (gen_floatsidf2 (fp, x));
16857 real_ldexp (&TWO31r, &dconst1, 31);
16858 x = const_double_from_real_value (TWO31r, DFmode);
16860 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16862 emit_move_insn (target, x);
16865 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16866 32-bit mode; otherwise we have a direct convert instruction. */
16869 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16871 REAL_VALUE_TYPE TWO32r;
16872 rtx fp_lo, fp_hi, x;
16874 fp_lo = gen_reg_rtx (DFmode);
16875 fp_hi = gen_reg_rtx (DFmode);
16877 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16879 real_ldexp (&TWO32r, &dconst1, 32);
16880 x = const_double_from_real_value (TWO32r, DFmode);
16881 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16883 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16885 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16888 emit_move_insn (target, x);
16891 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16892 For x86_32, -mfpmath=sse, !optimize_size only. */
16894 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16896 REAL_VALUE_TYPE ONE16r;
16897 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16899 real_ldexp (&ONE16r, &dconst1, 16);
16900 x = const_double_from_real_value (ONE16r, SFmode);
16901 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16902 NULL, 0, OPTAB_DIRECT);
16903 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16904 NULL, 0, OPTAB_DIRECT);
16905 fp_hi = gen_reg_rtx (SFmode);
16906 fp_lo = gen_reg_rtx (SFmode);
16907 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16908 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16909 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16911 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16913 if (!rtx_equal_p (target, fp_hi))
16914 emit_move_insn (target, fp_hi);
16917 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16918 then replicate the value for all elements of the vector
16922 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16929 v = gen_rtvec (4, value, value, value, value);
16930 return gen_rtx_CONST_VECTOR (V4SImode, v);
16934 v = gen_rtvec (2, value, value);
16935 return gen_rtx_CONST_VECTOR (V2DImode, v);
16939 v = gen_rtvec (8, value, value, value, value,
16940 value, value, value, value);
16942 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16943 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16944 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16945 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16946 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16950 v = gen_rtvec (4, value, value, value, value);
16952 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16953 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16954 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16958 v = gen_rtvec (4, value, value, value, value);
16960 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16961 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16962 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16966 v = gen_rtvec (2, value, value);
16968 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16969 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16972 gcc_unreachable ();
16976 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16977 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16978 for an SSE register. If VECT is true, then replicate the mask for
16979 all elements of the vector register. If INVERT is true, then create
16980 a mask excluding the sign bit. */
16983 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16985 enum machine_mode vec_mode, imode;
16986 HOST_WIDE_INT hi, lo;
16991 /* Find the sign bit, sign extended to 2*HWI. */
16998 mode = GET_MODE_INNER (mode);
17000 lo = 0x80000000, hi = lo < 0;
17007 mode = GET_MODE_INNER (mode);
17009 if (HOST_BITS_PER_WIDE_INT >= 64)
17010 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
17012 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17017 vec_mode = VOIDmode;
17018 if (HOST_BITS_PER_WIDE_INT >= 64)
17021 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
17028 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17032 lo = ~lo, hi = ~hi;
17038 mask = immed_double_const (lo, hi, imode);
17040 vec = gen_rtvec (2, v, mask);
17041 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
17042 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
17049 gcc_unreachable ();
17053 lo = ~lo, hi = ~hi;
17055 /* Force this value into the low part of a fp vector constant. */
17056 mask = immed_double_const (lo, hi, imode);
17057 mask = gen_lowpart (mode, mask);
17059 if (vec_mode == VOIDmode)
17060 return force_reg (mode, mask);
17062 v = ix86_build_const_vector (vec_mode, vect, mask);
17063 return force_reg (vec_mode, v);
17066 /* Generate code for floating point ABS or NEG. */
17069 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
17072 rtx mask, set, dst, src;
17073 bool use_sse = false;
17074 bool vector_mode = VECTOR_MODE_P (mode);
17075 enum machine_mode vmode = mode;
17079 else if (mode == TFmode)
17081 else if (TARGET_SSE_MATH)
17083 use_sse = SSE_FLOAT_MODE_P (mode);
17084 if (mode == SFmode)
17086 else if (mode == DFmode)
17090 /* NEG and ABS performed with SSE use bitwise mask operations.
17091 Create the appropriate mask now. */
17093 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
17100 set = gen_rtx_fmt_e (code, mode, src);
17101 set = gen_rtx_SET (VOIDmode, dst, set);
17108 use = gen_rtx_USE (VOIDmode, mask);
17110 par = gen_rtvec (2, set, use);
17113 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17114 par = gen_rtvec (3, set, use, clob);
17116 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
17122 /* Expand a copysign operation. Special case operand 0 being a constant. */
17125 ix86_expand_copysign (rtx operands[])
17127 enum machine_mode mode, vmode;
17128 rtx dest, op0, op1, mask, nmask;
17130 dest = operands[0];
17134 mode = GET_MODE (dest);
17136 if (mode == SFmode)
17138 else if (mode == DFmode)
17143 if (GET_CODE (op0) == CONST_DOUBLE)
17145 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
17147 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
17148 op0 = simplify_unary_operation (ABS, mode, op0, mode);
17150 if (mode == SFmode || mode == DFmode)
17152 if (op0 == CONST0_RTX (mode))
17153 op0 = CONST0_RTX (vmode);
17156 rtx v = ix86_build_const_vector (vmode, false, op0);
17158 op0 = force_reg (vmode, v);
17161 else if (op0 != CONST0_RTX (mode))
17162 op0 = force_reg (mode, op0);
17164 mask = ix86_build_signbit_mask (vmode, 0, 0);
17166 if (mode == SFmode)
17167 copysign_insn = gen_copysignsf3_const;
17168 else if (mode == DFmode)
17169 copysign_insn = gen_copysigndf3_const;
17171 copysign_insn = gen_copysigntf3_const;
17173 emit_insn (copysign_insn (dest, op0, op1, mask));
17177 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17179 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17180 mask = ix86_build_signbit_mask (vmode, 0, 0);
17182 if (mode == SFmode)
17183 copysign_insn = gen_copysignsf3_var;
17184 else if (mode == DFmode)
17185 copysign_insn = gen_copysigndf3_var;
17187 copysign_insn = gen_copysigntf3_var;
17189 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17193 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17194 be a constant, and so has already been expanded into a vector constant. */
17197 ix86_split_copysign_const (rtx operands[])
17199 enum machine_mode mode, vmode;
17200 rtx dest, op0, mask, x;
17202 dest = operands[0];
17204 mask = operands[3];
17206 mode = GET_MODE (dest);
17207 vmode = GET_MODE (mask);
17209 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17210 x = gen_rtx_AND (vmode, dest, mask);
17211 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17213 if (op0 != CONST0_RTX (vmode))
17215 x = gen_rtx_IOR (vmode, dest, op0);
17216 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17220 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17221 so we have to do two masks. */
17224 ix86_split_copysign_var (rtx operands[])
17226 enum machine_mode mode, vmode;
17227 rtx dest, scratch, op0, op1, mask, nmask, x;
17229 dest = operands[0];
17230 scratch = operands[1];
17233 nmask = operands[4];
17234 mask = operands[5];
17236 mode = GET_MODE (dest);
17237 vmode = GET_MODE (mask);
17239 if (rtx_equal_p (op0, op1))
17241 /* Shouldn't happen often (it's useless, obviously), but when it does
17242 we'd generate incorrect code if we continue below. */
17243 emit_move_insn (dest, op0);
17247 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17249 gcc_assert (REGNO (op1) == REGNO (scratch));
17251 x = gen_rtx_AND (vmode, scratch, mask);
17252 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17255 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17256 x = gen_rtx_NOT (vmode, dest);
17257 x = gen_rtx_AND (vmode, x, op0);
17258 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17262 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17264 x = gen_rtx_AND (vmode, scratch, mask);
17266 else /* alternative 2,4 */
17268 gcc_assert (REGNO (mask) == REGNO (scratch));
17269 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17270 x = gen_rtx_AND (vmode, scratch, op1);
17272 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17274 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17276 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17277 x = gen_rtx_AND (vmode, dest, nmask);
17279 else /* alternative 3,4 */
17281 gcc_assert (REGNO (nmask) == REGNO (dest));
17283 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17284 x = gen_rtx_AND (vmode, dest, op0);
17286 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17289 x = gen_rtx_IOR (vmode, dest, scratch);
17290 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17293 /* Return TRUE or FALSE depending on whether the first SET in INSN
17294 has source and destination with matching CC modes, and that the
17295 CC mode is at least as constrained as REQ_MODE. */
17298 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17301 enum machine_mode set_mode;
17303 set = PATTERN (insn);
17304 if (GET_CODE (set) == PARALLEL)
17305 set = XVECEXP (set, 0, 0);
17306 gcc_assert (GET_CODE (set) == SET);
17307 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17309 set_mode = GET_MODE (SET_DEST (set));
17313 if (req_mode != CCNOmode
17314 && (req_mode != CCmode
17315 || XEXP (SET_SRC (set), 1) != const0_rtx))
17319 if (req_mode == CCGCmode)
17323 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17327 if (req_mode == CCZmode)
17337 if (set_mode != req_mode)
17342 gcc_unreachable ();
17345 return GET_MODE (SET_SRC (set)) == set_mode;
17348 /* Generate insn patterns to do an integer compare of OPERANDS. */
17351 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17353 enum machine_mode cmpmode;
17356 cmpmode = SELECT_CC_MODE (code, op0, op1);
17357 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17359 /* This is very simple, but making the interface the same as in the
17360 FP case makes the rest of the code easier. */
17361 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17362 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17364 /* Return the test that should be put into the flags user, i.e.
17365 the bcc, scc, or cmov instruction. */
17366 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17369 /* Figure out whether to use ordered or unordered fp comparisons.
17370 Return the appropriate mode to use. */
17373 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17375 /* ??? In order to make all comparisons reversible, we do all comparisons
17376 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17377 all forms trapping and nontrapping comparisons, we can make inequality
17378 comparisons trapping again, since it results in better code when using
17379 FCOM based compares. */
17380 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17384 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17386 enum machine_mode mode = GET_MODE (op0);
17388 if (SCALAR_FLOAT_MODE_P (mode))
17390 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17391 return ix86_fp_compare_mode (code);
17396 /* Only zero flag is needed. */
17397 case EQ: /* ZF=0 */
17398 case NE: /* ZF!=0 */
17400 /* Codes needing carry flag. */
17401 case GEU: /* CF=0 */
17402 case LTU: /* CF=1 */
17403 /* Detect overflow checks. They need just the carry flag. */
17404 if (GET_CODE (op0) == PLUS
17405 && rtx_equal_p (op1, XEXP (op0, 0)))
17409 case GTU: /* CF=0 & ZF=0 */
17410 case LEU: /* CF=1 | ZF=1 */
17411 /* Detect overflow checks. They need just the carry flag. */
17412 if (GET_CODE (op0) == MINUS
17413 && rtx_equal_p (op1, XEXP (op0, 0)))
17417 /* Codes possibly doable only with sign flag when
17418 comparing against zero. */
17419 case GE: /* SF=OF or SF=0 */
17420 case LT: /* SF<>OF or SF=1 */
17421 if (op1 == const0_rtx)
17424 /* For other cases Carry flag is not required. */
17426 /* Codes doable only with sign flag when comparing
17427 against zero, but we miss jump instruction for it
17428 so we need to use relational tests against overflow
17429 that thus needs to be zero. */
17430 case GT: /* ZF=0 & SF=OF */
17431 case LE: /* ZF=1 | SF<>OF */
17432 if (op1 == const0_rtx)
17436 /* strcmp pattern do (use flags) and combine may ask us for proper
17441 gcc_unreachable ();
17445 /* Return the fixed registers used for condition codes. */
17448 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17455 /* If two condition code modes are compatible, return a condition code
17456 mode which is compatible with both. Otherwise, return
17459 static enum machine_mode
17460 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17465 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17468 if ((m1 == CCGCmode && m2 == CCGOCmode)
17469 || (m1 == CCGOCmode && m2 == CCGCmode))
17475 gcc_unreachable ();
17505 /* These are only compatible with themselves, which we already
17512 /* Return a comparison we can do and that it is equivalent to
17513 swap_condition (code) apart possibly from orderedness.
17514 But, never change orderedness if TARGET_IEEE_FP, returning
17515 UNKNOWN in that case if necessary. */
17517 static enum rtx_code
17518 ix86_fp_swap_condition (enum rtx_code code)
17522 case GT: /* GTU - CF=0 & ZF=0 */
17523 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17524 case GE: /* GEU - CF=0 */
17525 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17526 case UNLT: /* LTU - CF=1 */
17527 return TARGET_IEEE_FP ? UNKNOWN : GT;
17528 case UNLE: /* LEU - CF=1 | ZF=1 */
17529 return TARGET_IEEE_FP ? UNKNOWN : GE;
17531 return swap_condition (code);
17535 /* Return cost of comparison CODE using the best strategy for performance.
17536 All following functions do use number of instructions as a cost metrics.
17537 In future this should be tweaked to compute bytes for optimize_size and
17538 take into account performance of various instructions on various CPUs. */
17541 ix86_fp_comparison_cost (enum rtx_code code)
17545 /* The cost of code using bit-twiddling on %ah. */
17562 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17566 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17569 gcc_unreachable ();
17572 switch (ix86_fp_comparison_strategy (code))
17574 case IX86_FPCMP_COMI:
17575 return arith_cost > 4 ? 3 : 2;
17576 case IX86_FPCMP_SAHF:
17577 return arith_cost > 4 ? 4 : 3;
17583 /* Return strategy to use for floating-point. We assume that fcomi is always
17584 preferrable where available, since that is also true when looking at size
17585 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17587 enum ix86_fpcmp_strategy
17588 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17590 /* Do fcomi/sahf based test when profitable. */
17593 return IX86_FPCMP_COMI;
17595 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17596 return IX86_FPCMP_SAHF;
17598 return IX86_FPCMP_ARITH;
17601 /* Swap, force into registers, or otherwise massage the two operands
17602 to a fp comparison. The operands are updated in place; the new
17603 comparison code is returned. */
17605 static enum rtx_code
17606 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17608 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17609 rtx op0 = *pop0, op1 = *pop1;
17610 enum machine_mode op_mode = GET_MODE (op0);
17611 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17613 /* All of the unordered compare instructions only work on registers.
17614 The same is true of the fcomi compare instructions. The XFmode
17615 compare instructions require registers except when comparing
17616 against zero or when converting operand 1 from fixed point to
17620 && (fpcmp_mode == CCFPUmode
17621 || (op_mode == XFmode
17622 && ! (standard_80387_constant_p (op0) == 1
17623 || standard_80387_constant_p (op1) == 1)
17624 && GET_CODE (op1) != FLOAT)
17625 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17627 op0 = force_reg (op_mode, op0);
17628 op1 = force_reg (op_mode, op1);
17632 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17633 things around if they appear profitable, otherwise force op0
17634 into a register. */
17636 if (standard_80387_constant_p (op0) == 0
17638 && ! (standard_80387_constant_p (op1) == 0
17641 enum rtx_code new_code = ix86_fp_swap_condition (code);
17642 if (new_code != UNKNOWN)
17645 tmp = op0, op0 = op1, op1 = tmp;
17651 op0 = force_reg (op_mode, op0);
17653 if (CONSTANT_P (op1))
17655 int tmp = standard_80387_constant_p (op1);
17657 op1 = validize_mem (force_const_mem (op_mode, op1));
17661 op1 = force_reg (op_mode, op1);
17664 op1 = force_reg (op_mode, op1);
17668 /* Try to rearrange the comparison to make it cheaper. */
17669 if (ix86_fp_comparison_cost (code)
17670 > ix86_fp_comparison_cost (swap_condition (code))
17671 && (REG_P (op1) || can_create_pseudo_p ()))
17674 tmp = op0, op0 = op1, op1 = tmp;
17675 code = swap_condition (code);
17677 op0 = force_reg (op_mode, op0);
17685 /* Convert comparison codes we use to represent FP comparison to integer
17686 code that will result in proper branch. Return UNKNOWN if no such code
17690 ix86_fp_compare_code_to_integer (enum rtx_code code)
17719 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17722 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17724 enum machine_mode fpcmp_mode, intcmp_mode;
17727 fpcmp_mode = ix86_fp_compare_mode (code);
17728 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17730 /* Do fcomi/sahf based test when profitable. */
17731 switch (ix86_fp_comparison_strategy (code))
17733 case IX86_FPCMP_COMI:
17734 intcmp_mode = fpcmp_mode;
17735 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17736 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17741 case IX86_FPCMP_SAHF:
17742 intcmp_mode = fpcmp_mode;
17743 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17744 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17748 scratch = gen_reg_rtx (HImode);
17749 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17750 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17753 case IX86_FPCMP_ARITH:
17754 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17755 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17756 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17758 scratch = gen_reg_rtx (HImode);
17759 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17761 /* In the unordered case, we have to check C2 for NaN's, which
17762 doesn't happen to work out to anything nice combination-wise.
17763 So do some bit twiddling on the value we've got in AH to come
17764 up with an appropriate set of condition codes. */
17766 intcmp_mode = CCNOmode;
17771 if (code == GT || !TARGET_IEEE_FP)
17773 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17778 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17779 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17780 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17781 intcmp_mode = CCmode;
17787 if (code == LT && TARGET_IEEE_FP)
17789 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17790 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17791 intcmp_mode = CCmode;
17796 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17802 if (code == GE || !TARGET_IEEE_FP)
17804 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17809 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17810 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17816 if (code == LE && TARGET_IEEE_FP)
17818 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17819 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17820 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17821 intcmp_mode = CCmode;
17826 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17832 if (code == EQ && TARGET_IEEE_FP)
17834 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17835 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17836 intcmp_mode = CCmode;
17841 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17847 if (code == NE && TARGET_IEEE_FP)
17849 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17850 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17856 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17862 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17866 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17871 gcc_unreachable ();
17879 /* Return the test that should be put into the flags user, i.e.
17880 the bcc, scc, or cmov instruction. */
17881 return gen_rtx_fmt_ee (code, VOIDmode,
17882 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17887 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17891 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17892 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17894 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17896 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17897 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17900 ret = ix86_expand_int_compare (code, op0, op1);
17906 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17908 enum machine_mode mode = GET_MODE (op0);
17920 tmp = ix86_expand_compare (code, op0, op1);
17921 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17922 gen_rtx_LABEL_REF (VOIDmode, label),
17924 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17931 /* Expand DImode branch into multiple compare+branch. */
17933 rtx lo[2], hi[2], label2;
17934 enum rtx_code code1, code2, code3;
17935 enum machine_mode submode;
17937 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17939 tmp = op0, op0 = op1, op1 = tmp;
17940 code = swap_condition (code);
17943 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17944 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17946 submode = mode == DImode ? SImode : DImode;
17948 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17949 avoid two branches. This costs one extra insn, so disable when
17950 optimizing for size. */
17952 if ((code == EQ || code == NE)
17953 && (!optimize_insn_for_size_p ()
17954 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17959 if (hi[1] != const0_rtx)
17960 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17961 NULL_RTX, 0, OPTAB_WIDEN);
17964 if (lo[1] != const0_rtx)
17965 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17966 NULL_RTX, 0, OPTAB_WIDEN);
17968 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17969 NULL_RTX, 0, OPTAB_WIDEN);
17971 ix86_expand_branch (code, tmp, const0_rtx, label);
17975 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17976 op1 is a constant and the low word is zero, then we can just
17977 examine the high word. Similarly for low word -1 and
17978 less-or-equal-than or greater-than. */
17980 if (CONST_INT_P (hi[1]))
17983 case LT: case LTU: case GE: case GEU:
17984 if (lo[1] == const0_rtx)
17986 ix86_expand_branch (code, hi[0], hi[1], label);
17990 case LE: case LEU: case GT: case GTU:
17991 if (lo[1] == constm1_rtx)
17993 ix86_expand_branch (code, hi[0], hi[1], label);
18001 /* Otherwise, we need two or three jumps. */
18003 label2 = gen_label_rtx ();
18006 code2 = swap_condition (code);
18007 code3 = unsigned_condition (code);
18011 case LT: case GT: case LTU: case GTU:
18014 case LE: code1 = LT; code2 = GT; break;
18015 case GE: code1 = GT; code2 = LT; break;
18016 case LEU: code1 = LTU; code2 = GTU; break;
18017 case GEU: code1 = GTU; code2 = LTU; break;
18019 case EQ: code1 = UNKNOWN; code2 = NE; break;
18020 case NE: code2 = UNKNOWN; break;
18023 gcc_unreachable ();
18028 * if (hi(a) < hi(b)) goto true;
18029 * if (hi(a) > hi(b)) goto false;
18030 * if (lo(a) < lo(b)) goto true;
18034 if (code1 != UNKNOWN)
18035 ix86_expand_branch (code1, hi[0], hi[1], label);
18036 if (code2 != UNKNOWN)
18037 ix86_expand_branch (code2, hi[0], hi[1], label2);
18039 ix86_expand_branch (code3, lo[0], lo[1], label);
18041 if (code2 != UNKNOWN)
18042 emit_label (label2);
18047 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
18052 /* Split branch based on floating point condition. */
18054 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
18055 rtx target1, rtx target2, rtx tmp, rtx pushed)
18060 if (target2 != pc_rtx)
18063 code = reverse_condition_maybe_unordered (code);
18068 condition = ix86_expand_fp_compare (code, op1, op2,
18071 /* Remove pushed operand from stack. */
18073 ix86_free_from_memory (GET_MODE (pushed));
18075 i = emit_jump_insn (gen_rtx_SET
18077 gen_rtx_IF_THEN_ELSE (VOIDmode,
18078 condition, target1, target2)));
18079 if (split_branch_probability >= 0)
18080 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
18084 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
18088 gcc_assert (GET_MODE (dest) == QImode);
18090 ret = ix86_expand_compare (code, op0, op1);
18091 PUT_MODE (ret, QImode);
18092 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
18095 /* Expand comparison setting or clearing carry flag. Return true when
18096 successful and set pop for the operation. */
18098 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
18100 enum machine_mode mode =
18101 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
18103 /* Do not handle double-mode compares that go through special path. */
18104 if (mode == (TARGET_64BIT ? TImode : DImode))
18107 if (SCALAR_FLOAT_MODE_P (mode))
18109 rtx compare_op, compare_seq;
18111 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18113 /* Shortcut: following common codes never translate
18114 into carry flag compares. */
18115 if (code == EQ || code == NE || code == UNEQ || code == LTGT
18116 || code == ORDERED || code == UNORDERED)
18119 /* These comparisons require zero flag; swap operands so they won't. */
18120 if ((code == GT || code == UNLE || code == LE || code == UNGT)
18121 && !TARGET_IEEE_FP)
18126 code = swap_condition (code);
18129 /* Try to expand the comparison and verify that we end up with
18130 carry flag based comparison. This fails to be true only when
18131 we decide to expand comparison using arithmetic that is not
18132 too common scenario. */
18134 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18135 compare_seq = get_insns ();
18138 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
18139 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
18140 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
18142 code = GET_CODE (compare_op);
18144 if (code != LTU && code != GEU)
18147 emit_insn (compare_seq);
18152 if (!INTEGRAL_MODE_P (mode))
18161 /* Convert a==0 into (unsigned)a<1. */
18164 if (op1 != const0_rtx)
18167 code = (code == EQ ? LTU : GEU);
18170 /* Convert a>b into b<a or a>=b-1. */
18173 if (CONST_INT_P (op1))
18175 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18176 /* Bail out on overflow. We still can swap operands but that
18177 would force loading of the constant into register. */
18178 if (op1 == const0_rtx
18179 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18181 code = (code == GTU ? GEU : LTU);
18188 code = (code == GTU ? LTU : GEU);
18192 /* Convert a>=0 into (unsigned)a<0x80000000. */
18195 if (mode == DImode || op1 != const0_rtx)
18197 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18198 code = (code == LT ? GEU : LTU);
18202 if (mode == DImode || op1 != constm1_rtx)
18204 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18205 code = (code == LE ? GEU : LTU);
18211 /* Swapping operands may cause constant to appear as first operand. */
18212 if (!nonimmediate_operand (op0, VOIDmode))
18214 if (!can_create_pseudo_p ())
18216 op0 = force_reg (mode, op0);
18218 *pop = ix86_expand_compare (code, op0, op1);
18219 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18224 ix86_expand_int_movcc (rtx operands[])
18226 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18227 rtx compare_seq, compare_op;
18228 enum machine_mode mode = GET_MODE (operands[0]);
18229 bool sign_bit_compare_p = false;
18230 rtx op0 = XEXP (operands[1], 0);
18231 rtx op1 = XEXP (operands[1], 1);
18234 compare_op = ix86_expand_compare (code, op0, op1);
18235 compare_seq = get_insns ();
18238 compare_code = GET_CODE (compare_op);
18240 if ((op1 == const0_rtx && (code == GE || code == LT))
18241 || (op1 == constm1_rtx && (code == GT || code == LE)))
18242 sign_bit_compare_p = true;
18244 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18245 HImode insns, we'd be swallowed in word prefix ops. */
18247 if ((mode != HImode || TARGET_FAST_PREFIX)
18248 && (mode != (TARGET_64BIT ? TImode : DImode))
18249 && CONST_INT_P (operands[2])
18250 && CONST_INT_P (operands[3]))
18252 rtx out = operands[0];
18253 HOST_WIDE_INT ct = INTVAL (operands[2]);
18254 HOST_WIDE_INT cf = INTVAL (operands[3]);
18255 HOST_WIDE_INT diff;
18258 /* Sign bit compares are better done using shifts than we do by using
18260 if (sign_bit_compare_p
18261 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18263 /* Detect overlap between destination and compare sources. */
18266 if (!sign_bit_compare_p)
18269 bool fpcmp = false;
18271 compare_code = GET_CODE (compare_op);
18273 flags = XEXP (compare_op, 0);
18275 if (GET_MODE (flags) == CCFPmode
18276 || GET_MODE (flags) == CCFPUmode)
18280 = ix86_fp_compare_code_to_integer (compare_code);
18283 /* To simplify rest of code, restrict to the GEU case. */
18284 if (compare_code == LTU)
18286 HOST_WIDE_INT tmp = ct;
18289 compare_code = reverse_condition (compare_code);
18290 code = reverse_condition (code);
18295 PUT_CODE (compare_op,
18296 reverse_condition_maybe_unordered
18297 (GET_CODE (compare_op)));
18299 PUT_CODE (compare_op,
18300 reverse_condition (GET_CODE (compare_op)));
18304 if (reg_overlap_mentioned_p (out, op0)
18305 || reg_overlap_mentioned_p (out, op1))
18306 tmp = gen_reg_rtx (mode);
18308 if (mode == DImode)
18309 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18311 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18312 flags, compare_op));
18316 if (code == GT || code == GE)
18317 code = reverse_condition (code);
18320 HOST_WIDE_INT tmp = ct;
18325 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18338 tmp = expand_simple_binop (mode, PLUS,
18340 copy_rtx (tmp), 1, OPTAB_DIRECT);
18351 tmp = expand_simple_binop (mode, IOR,
18353 copy_rtx (tmp), 1, OPTAB_DIRECT);
18355 else if (diff == -1 && ct)
18365 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18367 tmp = expand_simple_binop (mode, PLUS,
18368 copy_rtx (tmp), GEN_INT (cf),
18369 copy_rtx (tmp), 1, OPTAB_DIRECT);
18377 * andl cf - ct, dest
18387 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18390 tmp = expand_simple_binop (mode, AND,
18392 gen_int_mode (cf - ct, mode),
18393 copy_rtx (tmp), 1, OPTAB_DIRECT);
18395 tmp = expand_simple_binop (mode, PLUS,
18396 copy_rtx (tmp), GEN_INT (ct),
18397 copy_rtx (tmp), 1, OPTAB_DIRECT);
18400 if (!rtx_equal_p (tmp, out))
18401 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18408 enum machine_mode cmp_mode = GET_MODE (op0);
18411 tmp = ct, ct = cf, cf = tmp;
18414 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18416 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18418 /* We may be reversing unordered compare to normal compare, that
18419 is not valid in general (we may convert non-trapping condition
18420 to trapping one), however on i386 we currently emit all
18421 comparisons unordered. */
18422 compare_code = reverse_condition_maybe_unordered (compare_code);
18423 code = reverse_condition_maybe_unordered (code);
18427 compare_code = reverse_condition (compare_code);
18428 code = reverse_condition (code);
18432 compare_code = UNKNOWN;
18433 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18434 && CONST_INT_P (op1))
18436 if (op1 == const0_rtx
18437 && (code == LT || code == GE))
18438 compare_code = code;
18439 else if (op1 == constm1_rtx)
18443 else if (code == GT)
18448 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18449 if (compare_code != UNKNOWN
18450 && GET_MODE (op0) == GET_MODE (out)
18451 && (cf == -1 || ct == -1))
18453 /* If lea code below could be used, only optimize
18454 if it results in a 2 insn sequence. */
18456 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18457 || diff == 3 || diff == 5 || diff == 9)
18458 || (compare_code == LT && ct == -1)
18459 || (compare_code == GE && cf == -1))
18462 * notl op1 (if necessary)
18470 code = reverse_condition (code);
18473 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18475 out = expand_simple_binop (mode, IOR,
18477 out, 1, OPTAB_DIRECT);
18478 if (out != operands[0])
18479 emit_move_insn (operands[0], out);
18486 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18487 || diff == 3 || diff == 5 || diff == 9)
18488 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18490 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18496 * lea cf(dest*(ct-cf)),dest
18500 * This also catches the degenerate setcc-only case.
18506 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18509 /* On x86_64 the lea instruction operates on Pmode, so we need
18510 to get arithmetics done in proper mode to match. */
18512 tmp = copy_rtx (out);
18516 out1 = copy_rtx (out);
18517 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18521 tmp = gen_rtx_PLUS (mode, tmp, out1);
18527 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18530 if (!rtx_equal_p (tmp, out))
18533 out = force_operand (tmp, copy_rtx (out));
18535 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18537 if (!rtx_equal_p (out, operands[0]))
18538 emit_move_insn (operands[0], copy_rtx (out));
18544 * General case: Jumpful:
18545 * xorl dest,dest cmpl op1, op2
18546 * cmpl op1, op2 movl ct, dest
18547 * setcc dest jcc 1f
18548 * decl dest movl cf, dest
18549 * andl (cf-ct),dest 1:
18552 * Size 20. Size 14.
18554 * This is reasonably steep, but branch mispredict costs are
18555 * high on modern cpus, so consider failing only if optimizing
18559 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18560 && BRANCH_COST (optimize_insn_for_speed_p (),
18565 enum machine_mode cmp_mode = GET_MODE (op0);
18570 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18572 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18574 /* We may be reversing unordered compare to normal compare,
18575 that is not valid in general (we may convert non-trapping
18576 condition to trapping one), however on i386 we currently
18577 emit all comparisons unordered. */
18578 code = reverse_condition_maybe_unordered (code);
18582 code = reverse_condition (code);
18583 if (compare_code != UNKNOWN)
18584 compare_code = reverse_condition (compare_code);
18588 if (compare_code != UNKNOWN)
18590 /* notl op1 (if needed)
18595 For x < 0 (resp. x <= -1) there will be no notl,
18596 so if possible swap the constants to get rid of the
18598 True/false will be -1/0 while code below (store flag
18599 followed by decrement) is 0/-1, so the constants need
18600 to be exchanged once more. */
18602 if (compare_code == GE || !cf)
18604 code = reverse_condition (code);
18609 HOST_WIDE_INT tmp = cf;
18614 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18618 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18620 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18622 copy_rtx (out), 1, OPTAB_DIRECT);
18625 out = expand_simple_binop (mode, AND, copy_rtx (out),
18626 gen_int_mode (cf - ct, mode),
18627 copy_rtx (out), 1, OPTAB_DIRECT);
18629 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18630 copy_rtx (out), 1, OPTAB_DIRECT);
18631 if (!rtx_equal_p (out, operands[0]))
18632 emit_move_insn (operands[0], copy_rtx (out));
18638 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18640 /* Try a few things more with specific constants and a variable. */
18643 rtx var, orig_out, out, tmp;
18645 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18648 /* If one of the two operands is an interesting constant, load a
18649 constant with the above and mask it in with a logical operation. */
18651 if (CONST_INT_P (operands[2]))
18654 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18655 operands[3] = constm1_rtx, op = and_optab;
18656 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18657 operands[3] = const0_rtx, op = ior_optab;
18661 else if (CONST_INT_P (operands[3]))
18664 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18665 operands[2] = constm1_rtx, op = and_optab;
18666 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18667 operands[2] = const0_rtx, op = ior_optab;
18674 orig_out = operands[0];
18675 tmp = gen_reg_rtx (mode);
18678 /* Recurse to get the constant loaded. */
18679 if (ix86_expand_int_movcc (operands) == 0)
18682 /* Mask in the interesting variable. */
18683 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18685 if (!rtx_equal_p (out, orig_out))
18686 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18692 * For comparison with above,
18702 if (! nonimmediate_operand (operands[2], mode))
18703 operands[2] = force_reg (mode, operands[2]);
18704 if (! nonimmediate_operand (operands[3], mode))
18705 operands[3] = force_reg (mode, operands[3]);
18707 if (! register_operand (operands[2], VOIDmode)
18709 || ! register_operand (operands[3], VOIDmode)))
18710 operands[2] = force_reg (mode, operands[2]);
18713 && ! register_operand (operands[3], VOIDmode))
18714 operands[3] = force_reg (mode, operands[3]);
18716 emit_insn (compare_seq);
18717 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18718 gen_rtx_IF_THEN_ELSE (mode,
18719 compare_op, operands[2],
18724 /* Swap, force into registers, or otherwise massage the two operands
18725 to an sse comparison with a mask result. Thus we differ a bit from
18726 ix86_prepare_fp_compare_args which expects to produce a flags result.
18728 The DEST operand exists to help determine whether to commute commutative
18729 operators. The POP0/POP1 operands are updated in place. The new
18730 comparison code is returned, or UNKNOWN if not implementable. */
18732 static enum rtx_code
18733 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18734 rtx *pop0, rtx *pop1)
18742 /* We have no LTGT as an operator. We could implement it with
18743 NE & ORDERED, but this requires an extra temporary. It's
18744 not clear that it's worth it. */
18751 /* These are supported directly. */
18758 /* For commutative operators, try to canonicalize the destination
18759 operand to be first in the comparison - this helps reload to
18760 avoid extra moves. */
18761 if (!dest || !rtx_equal_p (dest, *pop1))
18769 /* These are not supported directly. Swap the comparison operands
18770 to transform into something that is supported. */
18774 code = swap_condition (code);
18778 gcc_unreachable ();
18784 /* Detect conditional moves that exactly match min/max operational
18785 semantics. Note that this is IEEE safe, as long as we don't
18786 interchange the operands.
18788 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18789 and TRUE if the operation is successful and instructions are emitted. */
18792 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18793 rtx cmp_op1, rtx if_true, rtx if_false)
18795 enum machine_mode mode;
18801 else if (code == UNGE)
18804 if_true = if_false;
18810 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18812 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18817 mode = GET_MODE (dest);
18819 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18820 but MODE may be a vector mode and thus not appropriate. */
18821 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18823 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18826 if_true = force_reg (mode, if_true);
18827 v = gen_rtvec (2, if_true, if_false);
18828 tmp = gen_rtx_UNSPEC (mode, v, u);
18832 code = is_min ? SMIN : SMAX;
18833 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18836 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18840 /* Expand an sse vector comparison. Return the register with the result. */
18843 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18844 rtx op_true, rtx op_false)
18846 enum machine_mode mode = GET_MODE (dest);
18849 cmp_op0 = force_reg (mode, cmp_op0);
18850 if (!nonimmediate_operand (cmp_op1, mode))
18851 cmp_op1 = force_reg (mode, cmp_op1);
18854 || reg_overlap_mentioned_p (dest, op_true)
18855 || reg_overlap_mentioned_p (dest, op_false))
18856 dest = gen_reg_rtx (mode);
18858 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18859 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18864 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18865 operations. This is used for both scalar and vector conditional moves. */
18868 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18870 enum machine_mode mode = GET_MODE (dest);
18873 if (op_false == CONST0_RTX (mode))
18875 op_true = force_reg (mode, op_true);
18876 x = gen_rtx_AND (mode, cmp, op_true);
18877 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18879 else if (op_true == CONST0_RTX (mode))
18881 op_false = force_reg (mode, op_false);
18882 x = gen_rtx_NOT (mode, cmp);
18883 x = gen_rtx_AND (mode, x, op_false);
18884 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18886 else if (TARGET_XOP)
18888 rtx pcmov = gen_rtx_SET (mode, dest,
18889 gen_rtx_IF_THEN_ELSE (mode, cmp,
18896 op_true = force_reg (mode, op_true);
18897 op_false = force_reg (mode, op_false);
18899 t2 = gen_reg_rtx (mode);
18901 t3 = gen_reg_rtx (mode);
18905 x = gen_rtx_AND (mode, op_true, cmp);
18906 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18908 x = gen_rtx_NOT (mode, cmp);
18909 x = gen_rtx_AND (mode, x, op_false);
18910 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18912 x = gen_rtx_IOR (mode, t3, t2);
18913 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18917 /* Expand a floating-point conditional move. Return true if successful. */
18920 ix86_expand_fp_movcc (rtx operands[])
18922 enum machine_mode mode = GET_MODE (operands[0]);
18923 enum rtx_code code = GET_CODE (operands[1]);
18924 rtx tmp, compare_op;
18925 rtx op0 = XEXP (operands[1], 0);
18926 rtx op1 = XEXP (operands[1], 1);
18928 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18930 enum machine_mode cmode;
18932 /* Since we've no cmove for sse registers, don't force bad register
18933 allocation just to gain access to it. Deny movcc when the
18934 comparison mode doesn't match the move mode. */
18935 cmode = GET_MODE (op0);
18936 if (cmode == VOIDmode)
18937 cmode = GET_MODE (op1);
18941 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18942 if (code == UNKNOWN)
18945 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18946 operands[2], operands[3]))
18949 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18950 operands[2], operands[3]);
18951 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18955 /* The floating point conditional move instructions don't directly
18956 support conditions resulting from a signed integer comparison. */
18958 compare_op = ix86_expand_compare (code, op0, op1);
18959 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18961 tmp = gen_reg_rtx (QImode);
18962 ix86_expand_setcc (tmp, code, op0, op1);
18964 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18967 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18968 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18969 operands[2], operands[3])));
18974 /* Expand a floating-point vector conditional move; a vcond operation
18975 rather than a movcc operation. */
18978 ix86_expand_fp_vcond (rtx operands[])
18980 enum rtx_code code = GET_CODE (operands[3]);
18983 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18984 &operands[4], &operands[5]);
18985 if (code == UNKNOWN)
18988 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18989 operands[5], operands[1], operands[2]))
18992 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18993 operands[1], operands[2]);
18994 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18998 /* Expand a signed/unsigned integral vector conditional move. */
19001 ix86_expand_int_vcond (rtx operands[])
19003 enum machine_mode mode = GET_MODE (operands[0]);
19004 enum rtx_code code = GET_CODE (operands[3]);
19005 bool negate = false;
19008 cop0 = operands[4];
19009 cop1 = operands[5];
19011 /* XOP supports all of the comparisons on all vector int types. */
19014 /* Canonicalize the comparison to EQ, GT, GTU. */
19025 code = reverse_condition (code);
19031 code = reverse_condition (code);
19037 code = swap_condition (code);
19038 x = cop0, cop0 = cop1, cop1 = x;
19042 gcc_unreachable ();
19045 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19046 if (mode == V2DImode)
19051 /* SSE4.1 supports EQ. */
19052 if (!TARGET_SSE4_1)
19058 /* SSE4.2 supports GT/GTU. */
19059 if (!TARGET_SSE4_2)
19064 gcc_unreachable ();
19068 /* Unsigned parallel compare is not supported by the hardware.
19069 Play some tricks to turn this into a signed comparison
19073 cop0 = force_reg (mode, cop0);
19081 rtx (*gen_sub3) (rtx, rtx, rtx);
19083 /* Subtract (-(INT MAX) - 1) from both operands to make
19085 mask = ix86_build_signbit_mask (mode, true, false);
19086 gen_sub3 = (mode == V4SImode
19087 ? gen_subv4si3 : gen_subv2di3);
19088 t1 = gen_reg_rtx (mode);
19089 emit_insn (gen_sub3 (t1, cop0, mask));
19091 t2 = gen_reg_rtx (mode);
19092 emit_insn (gen_sub3 (t2, cop1, mask));
19102 /* Perform a parallel unsigned saturating subtraction. */
19103 x = gen_reg_rtx (mode);
19104 emit_insn (gen_rtx_SET (VOIDmode, x,
19105 gen_rtx_US_MINUS (mode, cop0, cop1)));
19108 cop1 = CONST0_RTX (mode);
19114 gcc_unreachable ();
19119 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
19120 operands[1+negate], operands[2-negate]);
19122 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
19123 operands[2-negate]);
19127 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
19128 true if we should do zero extension, else sign extension. HIGH_P is
19129 true if we want the N/2 high elements, else the low elements. */
19132 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
19134 enum machine_mode imode = GET_MODE (operands[1]);
19139 rtx (*unpack)(rtx, rtx);
19145 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
19147 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
19151 unpack = gen_sse4_1_zero_extendv4hiv4si2;
19153 unpack = gen_sse4_1_sign_extendv4hiv4si2;
19157 unpack = gen_sse4_1_zero_extendv2siv2di2;
19159 unpack = gen_sse4_1_sign_extendv2siv2di2;
19162 gcc_unreachable ();
19167 /* Shift higher 8 bytes to lower 8 bytes. */
19168 tmp = gen_reg_rtx (imode);
19169 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
19170 gen_lowpart (V1TImode, operands[1]),
19176 emit_insn (unpack (operands[0], tmp));
19180 rtx (*unpack)(rtx, rtx, rtx);
19186 unpack = gen_vec_interleave_highv16qi;
19188 unpack = gen_vec_interleave_lowv16qi;
19192 unpack = gen_vec_interleave_highv8hi;
19194 unpack = gen_vec_interleave_lowv8hi;
19198 unpack = gen_vec_interleave_highv4si;
19200 unpack = gen_vec_interleave_lowv4si;
19203 gcc_unreachable ();
19206 dest = gen_lowpart (imode, operands[0]);
19209 tmp = force_reg (imode, CONST0_RTX (imode));
19211 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
19212 operands[1], pc_rtx, pc_rtx);
19214 emit_insn (unpack (dest, operands[1], tmp));
19218 /* Expand conditional increment or decrement using adb/sbb instructions.
19219 The default case using setcc followed by the conditional move can be
19220 done by generic code. */
19222 ix86_expand_int_addcc (rtx operands[])
19224 enum rtx_code code = GET_CODE (operands[1]);
19226 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
19228 rtx val = const0_rtx;
19229 bool fpcmp = false;
19230 enum machine_mode mode;
19231 rtx op0 = XEXP (operands[1], 0);
19232 rtx op1 = XEXP (operands[1], 1);
19234 if (operands[3] != const1_rtx
19235 && operands[3] != constm1_rtx)
19237 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
19239 code = GET_CODE (compare_op);
19241 flags = XEXP (compare_op, 0);
19243 if (GET_MODE (flags) == CCFPmode
19244 || GET_MODE (flags) == CCFPUmode)
19247 code = ix86_fp_compare_code_to_integer (code);
19254 PUT_CODE (compare_op,
19255 reverse_condition_maybe_unordered
19256 (GET_CODE (compare_op)));
19258 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
19261 mode = GET_MODE (operands[0]);
19263 /* Construct either adc or sbb insn. */
19264 if ((code == LTU) == (operands[3] == constm1_rtx))
19269 insn = gen_subqi3_carry;
19272 insn = gen_subhi3_carry;
19275 insn = gen_subsi3_carry;
19278 insn = gen_subdi3_carry;
19281 gcc_unreachable ();
19289 insn = gen_addqi3_carry;
19292 insn = gen_addhi3_carry;
19295 insn = gen_addsi3_carry;
19298 insn = gen_adddi3_carry;
19301 gcc_unreachable ();
19304 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
19310 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
19311 but works for floating pointer parameters and nonoffsetable memories.
19312 For pushes, it returns just stack offsets; the values will be saved
19313 in the right order. Maximally three parts are generated. */
19316 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
19321 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
19323 size = (GET_MODE_SIZE (mode) + 4) / 8;
19325 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
19326 gcc_assert (size >= 2 && size <= 4);
19328 /* Optimize constant pool reference to immediates. This is used by fp
19329 moves, that force all constants to memory to allow combining. */
19330 if (MEM_P (operand) && MEM_READONLY_P (operand))
19332 rtx tmp = maybe_get_pool_constant (operand);
19337 if (MEM_P (operand) && !offsettable_memref_p (operand))
19339 /* The only non-offsetable memories we handle are pushes. */
19340 int ok = push_operand (operand, VOIDmode);
19344 operand = copy_rtx (operand);
19345 PUT_MODE (operand, Pmode);
19346 parts[0] = parts[1] = parts[2] = parts[3] = operand;
19350 if (GET_CODE (operand) == CONST_VECTOR)
19352 enum machine_mode imode = int_mode_for_mode (mode);
19353 /* Caution: if we looked through a constant pool memory above,
19354 the operand may actually have a different mode now. That's
19355 ok, since we want to pun this all the way back to an integer. */
19356 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
19357 gcc_assert (operand != NULL);
19363 if (mode == DImode)
19364 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19369 if (REG_P (operand))
19371 gcc_assert (reload_completed);
19372 for (i = 0; i < size; i++)
19373 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
19375 else if (offsettable_memref_p (operand))
19377 operand = adjust_address (operand, SImode, 0);
19378 parts[0] = operand;
19379 for (i = 1; i < size; i++)
19380 parts[i] = adjust_address (operand, SImode, 4 * i);
19382 else if (GET_CODE (operand) == CONST_DOUBLE)
19387 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19391 real_to_target (l, &r, mode);
19392 parts[3] = gen_int_mode (l[3], SImode);
19393 parts[2] = gen_int_mode (l[2], SImode);
19396 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
19397 parts[2] = gen_int_mode (l[2], SImode);
19400 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
19403 gcc_unreachable ();
19405 parts[1] = gen_int_mode (l[1], SImode);
19406 parts[0] = gen_int_mode (l[0], SImode);
19409 gcc_unreachable ();
19414 if (mode == TImode)
19415 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19416 if (mode == XFmode || mode == TFmode)
19418 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
19419 if (REG_P (operand))
19421 gcc_assert (reload_completed);
19422 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
19423 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
19425 else if (offsettable_memref_p (operand))
19427 operand = adjust_address (operand, DImode, 0);
19428 parts[0] = operand;
19429 parts[1] = adjust_address (operand, upper_mode, 8);
19431 else if (GET_CODE (operand) == CONST_DOUBLE)
19436 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19437 real_to_target (l, &r, mode);
19439 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19440 if (HOST_BITS_PER_WIDE_INT >= 64)
19443 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
19444 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
19447 parts[0] = immed_double_const (l[0], l[1], DImode);
19449 if (upper_mode == SImode)
19450 parts[1] = gen_int_mode (l[2], SImode);
19451 else if (HOST_BITS_PER_WIDE_INT >= 64)
19454 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
19455 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
19458 parts[1] = immed_double_const (l[2], l[3], DImode);
19461 gcc_unreachable ();
19468 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19469 Return false when normal moves are needed; true when all required
19470 insns have been emitted. Operands 2-4 contain the input values
19471 int the correct order; operands 5-7 contain the output values. */
19474 ix86_split_long_move (rtx operands[])
19479 int collisions = 0;
19480 enum machine_mode mode = GET_MODE (operands[0]);
19481 bool collisionparts[4];
19483 /* The DFmode expanders may ask us to move double.
19484 For 64bit target this is single move. By hiding the fact
19485 here we simplify i386.md splitters. */
19486 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19488 /* Optimize constant pool reference to immediates. This is used by
19489 fp moves, that force all constants to memory to allow combining. */
19491 if (MEM_P (operands[1])
19492 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19493 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19494 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19495 if (push_operand (operands[0], VOIDmode))
19497 operands[0] = copy_rtx (operands[0]);
19498 PUT_MODE (operands[0], Pmode);
19501 operands[0] = gen_lowpart (DImode, operands[0]);
19502 operands[1] = gen_lowpart (DImode, operands[1]);
19503 emit_move_insn (operands[0], operands[1]);
19507 /* The only non-offsettable memory we handle is push. */
19508 if (push_operand (operands[0], VOIDmode))
19511 gcc_assert (!MEM_P (operands[0])
19512 || offsettable_memref_p (operands[0]));
19514 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19515 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19517 /* When emitting push, take care for source operands on the stack. */
19518 if (push && MEM_P (operands[1])
19519 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19521 rtx src_base = XEXP (part[1][nparts - 1], 0);
19523 /* Compensate for the stack decrement by 4. */
19524 if (!TARGET_64BIT && nparts == 3
19525 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19526 src_base = plus_constant (src_base, 4);
19528 /* src_base refers to the stack pointer and is
19529 automatically decreased by emitted push. */
19530 for (i = 0; i < nparts; i++)
19531 part[1][i] = change_address (part[1][i],
19532 GET_MODE (part[1][i]), src_base);
19535 /* We need to do copy in the right order in case an address register
19536 of the source overlaps the destination. */
19537 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19541 for (i = 0; i < nparts; i++)
19544 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19545 if (collisionparts[i])
19549 /* Collision in the middle part can be handled by reordering. */
19550 if (collisions == 1 && nparts == 3 && collisionparts [1])
19552 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19553 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19555 else if (collisions == 1
19557 && (collisionparts [1] || collisionparts [2]))
19559 if (collisionparts [1])
19561 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19562 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19566 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19567 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19571 /* If there are more collisions, we can't handle it by reordering.
19572 Do an lea to the last part and use only one colliding move. */
19573 else if (collisions > 1)
19579 base = part[0][nparts - 1];
19581 /* Handle the case when the last part isn't valid for lea.
19582 Happens in 64-bit mode storing the 12-byte XFmode. */
19583 if (GET_MODE (base) != Pmode)
19584 base = gen_rtx_REG (Pmode, REGNO (base));
19586 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19587 part[1][0] = replace_equiv_address (part[1][0], base);
19588 for (i = 1; i < nparts; i++)
19590 tmp = plus_constant (base, UNITS_PER_WORD * i);
19591 part[1][i] = replace_equiv_address (part[1][i], tmp);
19602 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19603 emit_insn (gen_addsi3 (stack_pointer_rtx,
19604 stack_pointer_rtx, GEN_INT (-4)));
19605 emit_move_insn (part[0][2], part[1][2]);
19607 else if (nparts == 4)
19609 emit_move_insn (part[0][3], part[1][3]);
19610 emit_move_insn (part[0][2], part[1][2]);
19615 /* In 64bit mode we don't have 32bit push available. In case this is
19616 register, it is OK - we will just use larger counterpart. We also
19617 retype memory - these comes from attempt to avoid REX prefix on
19618 moving of second half of TFmode value. */
19619 if (GET_MODE (part[1][1]) == SImode)
19621 switch (GET_CODE (part[1][1]))
19624 part[1][1] = adjust_address (part[1][1], DImode, 0);
19628 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19632 gcc_unreachable ();
19635 if (GET_MODE (part[1][0]) == SImode)
19636 part[1][0] = part[1][1];
19639 emit_move_insn (part[0][1], part[1][1]);
19640 emit_move_insn (part[0][0], part[1][0]);
19644 /* Choose correct order to not overwrite the source before it is copied. */
19645 if ((REG_P (part[0][0])
19646 && REG_P (part[1][1])
19647 && (REGNO (part[0][0]) == REGNO (part[1][1])
19649 && REGNO (part[0][0]) == REGNO (part[1][2]))
19651 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19653 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19655 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19657 operands[2 + i] = part[0][j];
19658 operands[6 + i] = part[1][j];
19663 for (i = 0; i < nparts; i++)
19665 operands[2 + i] = part[0][i];
19666 operands[6 + i] = part[1][i];
19670 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19671 if (optimize_insn_for_size_p ())
19673 for (j = 0; j < nparts - 1; j++)
19674 if (CONST_INT_P (operands[6 + j])
19675 && operands[6 + j] != const0_rtx
19676 && REG_P (operands[2 + j]))
19677 for (i = j; i < nparts - 1; i++)
19678 if (CONST_INT_P (operands[7 + i])
19679 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19680 operands[7 + i] = operands[2 + j];
19683 for (i = 0; i < nparts; i++)
19684 emit_move_insn (operands[2 + i], operands[6 + i]);
19689 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19690 left shift by a constant, either using a single shift or
19691 a sequence of add instructions. */
19694 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19696 rtx (*insn)(rtx, rtx, rtx);
19699 || (count * ix86_cost->add <= ix86_cost->shift_const
19700 && !optimize_insn_for_size_p ()))
19702 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19703 while (count-- > 0)
19704 emit_insn (insn (operand, operand, operand));
19708 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19709 emit_insn (insn (operand, operand, GEN_INT (count)));
19714 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19716 rtx (*gen_ashl3)(rtx, rtx, rtx);
19717 rtx (*gen_shld)(rtx, rtx, rtx);
19718 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19720 rtx low[2], high[2];
19723 if (CONST_INT_P (operands[2]))
19725 split_double_mode (mode, operands, 2, low, high);
19726 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19728 if (count >= half_width)
19730 emit_move_insn (high[0], low[1]);
19731 emit_move_insn (low[0], const0_rtx);
19733 if (count > half_width)
19734 ix86_expand_ashl_const (high[0], count - half_width, mode);
19738 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19740 if (!rtx_equal_p (operands[0], operands[1]))
19741 emit_move_insn (operands[0], operands[1]);
19743 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19744 ix86_expand_ashl_const (low[0], count, mode);
19749 split_double_mode (mode, operands, 1, low, high);
19751 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19753 if (operands[1] == const1_rtx)
19755 /* Assuming we've chosen a QImode capable registers, then 1 << N
19756 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19757 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19759 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19761 ix86_expand_clear (low[0]);
19762 ix86_expand_clear (high[0]);
19763 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19765 d = gen_lowpart (QImode, low[0]);
19766 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19767 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19768 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19770 d = gen_lowpart (QImode, high[0]);
19771 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19772 s = gen_rtx_NE (QImode, flags, const0_rtx);
19773 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19776 /* Otherwise, we can get the same results by manually performing
19777 a bit extract operation on bit 5/6, and then performing the two
19778 shifts. The two methods of getting 0/1 into low/high are exactly
19779 the same size. Avoiding the shift in the bit extract case helps
19780 pentium4 a bit; no one else seems to care much either way. */
19783 enum machine_mode half_mode;
19784 rtx (*gen_lshr3)(rtx, rtx, rtx);
19785 rtx (*gen_and3)(rtx, rtx, rtx);
19786 rtx (*gen_xor3)(rtx, rtx, rtx);
19787 HOST_WIDE_INT bits;
19790 if (mode == DImode)
19792 half_mode = SImode;
19793 gen_lshr3 = gen_lshrsi3;
19794 gen_and3 = gen_andsi3;
19795 gen_xor3 = gen_xorsi3;
19800 half_mode = DImode;
19801 gen_lshr3 = gen_lshrdi3;
19802 gen_and3 = gen_anddi3;
19803 gen_xor3 = gen_xordi3;
19807 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19808 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19810 x = gen_lowpart (half_mode, operands[2]);
19811 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19813 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19814 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19815 emit_move_insn (low[0], high[0]);
19816 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19819 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19820 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19824 if (operands[1] == constm1_rtx)
19826 /* For -1 << N, we can avoid the shld instruction, because we
19827 know that we're shifting 0...31/63 ones into a -1. */
19828 emit_move_insn (low[0], constm1_rtx);
19829 if (optimize_insn_for_size_p ())
19830 emit_move_insn (high[0], low[0]);
19832 emit_move_insn (high[0], constm1_rtx);
19836 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19838 if (!rtx_equal_p (operands[0], operands[1]))
19839 emit_move_insn (operands[0], operands[1]);
19841 split_double_mode (mode, operands, 1, low, high);
19842 emit_insn (gen_shld (high[0], low[0], operands[2]));
19845 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19847 if (TARGET_CMOVE && scratch)
19849 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19850 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19852 ix86_expand_clear (scratch);
19853 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19857 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19858 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19860 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19865 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19867 rtx (*gen_ashr3)(rtx, rtx, rtx)
19868 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19869 rtx (*gen_shrd)(rtx, rtx, rtx);
19870 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19872 rtx low[2], high[2];
19875 if (CONST_INT_P (operands[2]))
19877 split_double_mode (mode, operands, 2, low, high);
19878 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19880 if (count == GET_MODE_BITSIZE (mode) - 1)
19882 emit_move_insn (high[0], high[1]);
19883 emit_insn (gen_ashr3 (high[0], high[0],
19884 GEN_INT (half_width - 1)));
19885 emit_move_insn (low[0], high[0]);
19888 else if (count >= half_width)
19890 emit_move_insn (low[0], high[1]);
19891 emit_move_insn (high[0], low[0]);
19892 emit_insn (gen_ashr3 (high[0], high[0],
19893 GEN_INT (half_width - 1)));
19895 if (count > half_width)
19896 emit_insn (gen_ashr3 (low[0], low[0],
19897 GEN_INT (count - half_width)));
19901 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19903 if (!rtx_equal_p (operands[0], operands[1]))
19904 emit_move_insn (operands[0], operands[1]);
19906 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19907 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19912 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19914 if (!rtx_equal_p (operands[0], operands[1]))
19915 emit_move_insn (operands[0], operands[1]);
19917 split_double_mode (mode, operands, 1, low, high);
19919 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19920 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19922 if (TARGET_CMOVE && scratch)
19924 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19925 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19927 emit_move_insn (scratch, high[0]);
19928 emit_insn (gen_ashr3 (scratch, scratch,
19929 GEN_INT (half_width - 1)));
19930 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19935 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19936 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19938 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19944 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19946 rtx (*gen_lshr3)(rtx, rtx, rtx)
19947 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19948 rtx (*gen_shrd)(rtx, rtx, rtx);
19949 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19951 rtx low[2], high[2];
19954 if (CONST_INT_P (operands[2]))
19956 split_double_mode (mode, operands, 2, low, high);
19957 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19959 if (count >= half_width)
19961 emit_move_insn (low[0], high[1]);
19962 ix86_expand_clear (high[0]);
19964 if (count > half_width)
19965 emit_insn (gen_lshr3 (low[0], low[0],
19966 GEN_INT (count - half_width)));
19970 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19972 if (!rtx_equal_p (operands[0], operands[1]))
19973 emit_move_insn (operands[0], operands[1]);
19975 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19976 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19981 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19983 if (!rtx_equal_p (operands[0], operands[1]))
19984 emit_move_insn (operands[0], operands[1]);
19986 split_double_mode (mode, operands, 1, low, high);
19988 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19989 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19991 if (TARGET_CMOVE && scratch)
19993 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19994 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19996 ix86_expand_clear (scratch);
19997 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
20002 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
20003 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
20005 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
20010 /* Predict just emitted jump instruction to be taken with probability PROB. */
20012 predict_jump (int prob)
20014 rtx insn = get_last_insn ();
20015 gcc_assert (JUMP_P (insn));
20016 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
20019 /* Helper function for the string operations below. Dest VARIABLE whether
20020 it is aligned to VALUE bytes. If true, jump to the label. */
20022 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
20024 rtx label = gen_label_rtx ();
20025 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
20026 if (GET_MODE (variable) == DImode)
20027 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
20029 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
20030 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
20033 predict_jump (REG_BR_PROB_BASE * 50 / 100);
20035 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20039 /* Adjust COUNTER by the VALUE. */
20041 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
20043 rtx (*gen_add)(rtx, rtx, rtx)
20044 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
20046 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
20049 /* Zero extend possibly SImode EXP to Pmode register. */
20051 ix86_zero_extend_to_Pmode (rtx exp)
20054 if (GET_MODE (exp) == VOIDmode)
20055 return force_reg (Pmode, exp);
20056 if (GET_MODE (exp) == Pmode)
20057 return copy_to_mode_reg (Pmode, exp);
20058 r = gen_reg_rtx (Pmode);
20059 emit_insn (gen_zero_extendsidi2 (r, exp));
20063 /* Divide COUNTREG by SCALE. */
20065 scale_counter (rtx countreg, int scale)
20071 if (CONST_INT_P (countreg))
20072 return GEN_INT (INTVAL (countreg) / scale);
20073 gcc_assert (REG_P (countreg));
20075 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
20076 GEN_INT (exact_log2 (scale)),
20077 NULL, 1, OPTAB_DIRECT);
20081 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
20082 DImode for constant loop counts. */
20084 static enum machine_mode
20085 counter_mode (rtx count_exp)
20087 if (GET_MODE (count_exp) != VOIDmode)
20088 return GET_MODE (count_exp);
20089 if (!CONST_INT_P (count_exp))
20091 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
20096 /* When SRCPTR is non-NULL, output simple loop to move memory
20097 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
20098 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
20099 equivalent loop to set memory by VALUE (supposed to be in MODE).
20101 The size is rounded down to whole number of chunk size moved at once.
20102 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
20106 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
20107 rtx destptr, rtx srcptr, rtx value,
20108 rtx count, enum machine_mode mode, int unroll,
20111 rtx out_label, top_label, iter, tmp;
20112 enum machine_mode iter_mode = counter_mode (count);
20113 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
20114 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
20120 top_label = gen_label_rtx ();
20121 out_label = gen_label_rtx ();
20122 iter = gen_reg_rtx (iter_mode);
20124 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
20125 NULL, 1, OPTAB_DIRECT);
20126 /* Those two should combine. */
20127 if (piece_size == const1_rtx)
20129 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
20131 predict_jump (REG_BR_PROB_BASE * 10 / 100);
20133 emit_move_insn (iter, const0_rtx);
20135 emit_label (top_label);
20137 tmp = convert_modes (Pmode, iter_mode, iter, true);
20138 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
20139 destmem = change_address (destmem, mode, x_addr);
20143 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
20144 srcmem = change_address (srcmem, mode, y_addr);
20146 /* When unrolling for chips that reorder memory reads and writes,
20147 we can save registers by using single temporary.
20148 Also using 4 temporaries is overkill in 32bit mode. */
20149 if (!TARGET_64BIT && 0)
20151 for (i = 0; i < unroll; i++)
20156 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20158 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20160 emit_move_insn (destmem, srcmem);
20166 gcc_assert (unroll <= 4);
20167 for (i = 0; i < unroll; i++)
20169 tmpreg[i] = gen_reg_rtx (mode);
20173 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20175 emit_move_insn (tmpreg[i], srcmem);
20177 for (i = 0; i < unroll; i++)
20182 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20184 emit_move_insn (destmem, tmpreg[i]);
20189 for (i = 0; i < unroll; i++)
20193 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20194 emit_move_insn (destmem, value);
20197 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
20198 true, OPTAB_LIB_WIDEN);
20200 emit_move_insn (iter, tmp);
20202 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
20204 if (expected_size != -1)
20206 expected_size /= GET_MODE_SIZE (mode) * unroll;
20207 if (expected_size == 0)
20209 else if (expected_size > REG_BR_PROB_BASE)
20210 predict_jump (REG_BR_PROB_BASE - 1);
20212 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
20215 predict_jump (REG_BR_PROB_BASE * 80 / 100);
20216 iter = ix86_zero_extend_to_Pmode (iter);
20217 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
20218 true, OPTAB_LIB_WIDEN);
20219 if (tmp != destptr)
20220 emit_move_insn (destptr, tmp);
20223 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
20224 true, OPTAB_LIB_WIDEN);
20226 emit_move_insn (srcptr, tmp);
20228 emit_label (out_label);
20231 /* Output "rep; mov" instruction.
20232 Arguments have same meaning as for previous function */
20234 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
20235 rtx destptr, rtx srcptr,
20237 enum machine_mode mode)
20243 /* If the size is known, it is shorter to use rep movs. */
20244 if (mode == QImode && CONST_INT_P (count)
20245 && !(INTVAL (count) & 3))
20248 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20249 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20250 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
20251 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
20252 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20253 if (mode != QImode)
20255 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20256 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20257 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20258 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
20259 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20260 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
20264 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20265 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
20267 if (CONST_INT_P (count))
20269 count = GEN_INT (INTVAL (count)
20270 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20271 destmem = shallow_copy_rtx (destmem);
20272 srcmem = shallow_copy_rtx (srcmem);
20273 set_mem_size (destmem, count);
20274 set_mem_size (srcmem, count);
20278 if (MEM_SIZE (destmem))
20279 set_mem_size (destmem, NULL_RTX);
20280 if (MEM_SIZE (srcmem))
20281 set_mem_size (srcmem, NULL_RTX);
20283 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
20287 /* Output "rep; stos" instruction.
20288 Arguments have same meaning as for previous function */
20290 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
20291 rtx count, enum machine_mode mode,
20297 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20298 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20299 value = force_reg (mode, gen_lowpart (mode, value));
20300 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20301 if (mode != QImode)
20303 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20304 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20305 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20308 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20309 if (orig_value == const0_rtx && CONST_INT_P (count))
20311 count = GEN_INT (INTVAL (count)
20312 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20313 destmem = shallow_copy_rtx (destmem);
20314 set_mem_size (destmem, count);
20316 else if (MEM_SIZE (destmem))
20317 set_mem_size (destmem, NULL_RTX);
20318 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
20322 emit_strmov (rtx destmem, rtx srcmem,
20323 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
20325 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
20326 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
20327 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20330 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
20332 expand_movmem_epilogue (rtx destmem, rtx srcmem,
20333 rtx destptr, rtx srcptr, rtx count, int max_size)
20336 if (CONST_INT_P (count))
20338 HOST_WIDE_INT countval = INTVAL (count);
20341 if ((countval & 0x10) && max_size > 16)
20345 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20346 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
20349 gcc_unreachable ();
20352 if ((countval & 0x08) && max_size > 8)
20355 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20358 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20359 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
20363 if ((countval & 0x04) && max_size > 4)
20365 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20368 if ((countval & 0x02) && max_size > 2)
20370 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
20373 if ((countval & 0x01) && max_size > 1)
20375 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
20382 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
20383 count, 1, OPTAB_DIRECT);
20384 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
20385 count, QImode, 1, 4);
20389 /* When there are stringops, we can cheaply increase dest and src pointers.
20390 Otherwise we save code size by maintaining offset (zero is readily
20391 available from preceding rep operation) and using x86 addressing modes.
20393 if (TARGET_SINGLE_STRINGOP)
20397 rtx label = ix86_expand_aligntest (count, 4, true);
20398 src = change_address (srcmem, SImode, srcptr);
20399 dest = change_address (destmem, SImode, destptr);
20400 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20401 emit_label (label);
20402 LABEL_NUSES (label) = 1;
20406 rtx label = ix86_expand_aligntest (count, 2, true);
20407 src = change_address (srcmem, HImode, srcptr);
20408 dest = change_address (destmem, HImode, destptr);
20409 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20410 emit_label (label);
20411 LABEL_NUSES (label) = 1;
20415 rtx label = ix86_expand_aligntest (count, 1, true);
20416 src = change_address (srcmem, QImode, srcptr);
20417 dest = change_address (destmem, QImode, destptr);
20418 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20419 emit_label (label);
20420 LABEL_NUSES (label) = 1;
20425 rtx offset = force_reg (Pmode, const0_rtx);
20430 rtx label = ix86_expand_aligntest (count, 4, true);
20431 src = change_address (srcmem, SImode, srcptr);
20432 dest = change_address (destmem, SImode, destptr);
20433 emit_move_insn (dest, src);
20434 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
20435 true, OPTAB_LIB_WIDEN);
20437 emit_move_insn (offset, tmp);
20438 emit_label (label);
20439 LABEL_NUSES (label) = 1;
20443 rtx label = ix86_expand_aligntest (count, 2, true);
20444 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20445 src = change_address (srcmem, HImode, tmp);
20446 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20447 dest = change_address (destmem, HImode, tmp);
20448 emit_move_insn (dest, src);
20449 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
20450 true, OPTAB_LIB_WIDEN);
20452 emit_move_insn (offset, tmp);
20453 emit_label (label);
20454 LABEL_NUSES (label) = 1;
20458 rtx label = ix86_expand_aligntest (count, 1, true);
20459 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20460 src = change_address (srcmem, QImode, tmp);
20461 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20462 dest = change_address (destmem, QImode, tmp);
20463 emit_move_insn (dest, src);
20464 emit_label (label);
20465 LABEL_NUSES (label) = 1;
20470 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20472 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
20473 rtx count, int max_size)
20476 expand_simple_binop (counter_mode (count), AND, count,
20477 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
20478 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
20479 gen_lowpart (QImode, value), count, QImode,
20483 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20485 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20489 if (CONST_INT_P (count))
20491 HOST_WIDE_INT countval = INTVAL (count);
20494 if ((countval & 0x10) && max_size > 16)
20498 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20499 emit_insn (gen_strset (destptr, dest, value));
20500 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20501 emit_insn (gen_strset (destptr, dest, value));
20504 gcc_unreachable ();
20507 if ((countval & 0x08) && max_size > 8)
20511 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20512 emit_insn (gen_strset (destptr, dest, value));
20516 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20517 emit_insn (gen_strset (destptr, dest, value));
20518 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20519 emit_insn (gen_strset (destptr, dest, value));
20523 if ((countval & 0x04) && max_size > 4)
20525 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20526 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20529 if ((countval & 0x02) && max_size > 2)
20531 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20532 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20535 if ((countval & 0x01) && max_size > 1)
20537 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20538 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20545 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20550 rtx label = ix86_expand_aligntest (count, 16, true);
20553 dest = change_address (destmem, DImode, destptr);
20554 emit_insn (gen_strset (destptr, dest, value));
20555 emit_insn (gen_strset (destptr, dest, value));
20559 dest = change_address (destmem, SImode, destptr);
20560 emit_insn (gen_strset (destptr, dest, value));
20561 emit_insn (gen_strset (destptr, dest, value));
20562 emit_insn (gen_strset (destptr, dest, value));
20563 emit_insn (gen_strset (destptr, dest, value));
20565 emit_label (label);
20566 LABEL_NUSES (label) = 1;
20570 rtx label = ix86_expand_aligntest (count, 8, true);
20573 dest = change_address (destmem, DImode, destptr);
20574 emit_insn (gen_strset (destptr, dest, value));
20578 dest = change_address (destmem, SImode, destptr);
20579 emit_insn (gen_strset (destptr, dest, value));
20580 emit_insn (gen_strset (destptr, dest, value));
20582 emit_label (label);
20583 LABEL_NUSES (label) = 1;
20587 rtx label = ix86_expand_aligntest (count, 4, true);
20588 dest = change_address (destmem, SImode, destptr);
20589 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20590 emit_label (label);
20591 LABEL_NUSES (label) = 1;
20595 rtx label = ix86_expand_aligntest (count, 2, true);
20596 dest = change_address (destmem, HImode, destptr);
20597 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20598 emit_label (label);
20599 LABEL_NUSES (label) = 1;
20603 rtx label = ix86_expand_aligntest (count, 1, true);
20604 dest = change_address (destmem, QImode, destptr);
20605 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20606 emit_label (label);
20607 LABEL_NUSES (label) = 1;
20611 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20612 DESIRED_ALIGNMENT. */
20614 expand_movmem_prologue (rtx destmem, rtx srcmem,
20615 rtx destptr, rtx srcptr, rtx count,
20616 int align, int desired_alignment)
20618 if (align <= 1 && desired_alignment > 1)
20620 rtx label = ix86_expand_aligntest (destptr, 1, false);
20621 srcmem = change_address (srcmem, QImode, srcptr);
20622 destmem = change_address (destmem, QImode, destptr);
20623 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20624 ix86_adjust_counter (count, 1);
20625 emit_label (label);
20626 LABEL_NUSES (label) = 1;
20628 if (align <= 2 && desired_alignment > 2)
20630 rtx label = ix86_expand_aligntest (destptr, 2, false);
20631 srcmem = change_address (srcmem, HImode, srcptr);
20632 destmem = change_address (destmem, HImode, destptr);
20633 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20634 ix86_adjust_counter (count, 2);
20635 emit_label (label);
20636 LABEL_NUSES (label) = 1;
20638 if (align <= 4 && desired_alignment > 4)
20640 rtx label = ix86_expand_aligntest (destptr, 4, false);
20641 srcmem = change_address (srcmem, SImode, srcptr);
20642 destmem = change_address (destmem, SImode, destptr);
20643 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20644 ix86_adjust_counter (count, 4);
20645 emit_label (label);
20646 LABEL_NUSES (label) = 1;
20648 gcc_assert (desired_alignment <= 8);
20651 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20652 ALIGN_BYTES is how many bytes need to be copied. */
20654 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20655 int desired_align, int align_bytes)
20658 rtx src_size, dst_size;
20660 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20661 if (src_align_bytes >= 0)
20662 src_align_bytes = desired_align - src_align_bytes;
20663 src_size = MEM_SIZE (src);
20664 dst_size = MEM_SIZE (dst);
20665 if (align_bytes & 1)
20667 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20668 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20670 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20672 if (align_bytes & 2)
20674 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20675 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20676 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20677 set_mem_align (dst, 2 * BITS_PER_UNIT);
20678 if (src_align_bytes >= 0
20679 && (src_align_bytes & 1) == (align_bytes & 1)
20680 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20681 set_mem_align (src, 2 * BITS_PER_UNIT);
20683 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20685 if (align_bytes & 4)
20687 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20688 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20689 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20690 set_mem_align (dst, 4 * BITS_PER_UNIT);
20691 if (src_align_bytes >= 0)
20693 unsigned int src_align = 0;
20694 if ((src_align_bytes & 3) == (align_bytes & 3))
20696 else if ((src_align_bytes & 1) == (align_bytes & 1))
20698 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20699 set_mem_align (src, src_align * BITS_PER_UNIT);
20702 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20704 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20705 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20706 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20707 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20708 if (src_align_bytes >= 0)
20710 unsigned int src_align = 0;
20711 if ((src_align_bytes & 7) == (align_bytes & 7))
20713 else if ((src_align_bytes & 3) == (align_bytes & 3))
20715 else if ((src_align_bytes & 1) == (align_bytes & 1))
20717 if (src_align > (unsigned int) desired_align)
20718 src_align = desired_align;
20719 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20720 set_mem_align (src, src_align * BITS_PER_UNIT);
20723 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20725 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
20730 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20731 DESIRED_ALIGNMENT. */
20733 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20734 int align, int desired_alignment)
20736 if (align <= 1 && desired_alignment > 1)
20738 rtx label = ix86_expand_aligntest (destptr, 1, false);
20739 destmem = change_address (destmem, QImode, destptr);
20740 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20741 ix86_adjust_counter (count, 1);
20742 emit_label (label);
20743 LABEL_NUSES (label) = 1;
20745 if (align <= 2 && desired_alignment > 2)
20747 rtx label = ix86_expand_aligntest (destptr, 2, false);
20748 destmem = change_address (destmem, HImode, destptr);
20749 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20750 ix86_adjust_counter (count, 2);
20751 emit_label (label);
20752 LABEL_NUSES (label) = 1;
20754 if (align <= 4 && desired_alignment > 4)
20756 rtx label = ix86_expand_aligntest (destptr, 4, false);
20757 destmem = change_address (destmem, SImode, destptr);
20758 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20759 ix86_adjust_counter (count, 4);
20760 emit_label (label);
20761 LABEL_NUSES (label) = 1;
20763 gcc_assert (desired_alignment <= 8);
20766 /* Set enough from DST to align DST known to by aligned by ALIGN to
20767 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20769 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20770 int desired_align, int align_bytes)
20773 rtx dst_size = MEM_SIZE (dst);
20774 if (align_bytes & 1)
20776 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20778 emit_insn (gen_strset (destreg, dst,
20779 gen_lowpart (QImode, value)));
20781 if (align_bytes & 2)
20783 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20784 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20785 set_mem_align (dst, 2 * BITS_PER_UNIT);
20787 emit_insn (gen_strset (destreg, dst,
20788 gen_lowpart (HImode, value)));
20790 if (align_bytes & 4)
20792 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20793 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20794 set_mem_align (dst, 4 * BITS_PER_UNIT);
20796 emit_insn (gen_strset (destreg, dst,
20797 gen_lowpart (SImode, value)));
20799 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20800 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20801 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20803 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20807 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20808 static enum stringop_alg
20809 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20810 int *dynamic_check)
20812 const struct stringop_algs * algs;
20813 bool optimize_for_speed;
20814 /* Algorithms using the rep prefix want at least edi and ecx;
20815 additionally, memset wants eax and memcpy wants esi. Don't
20816 consider such algorithms if the user has appropriated those
20817 registers for their own purposes. */
20818 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20820 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20822 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20823 || (alg != rep_prefix_1_byte \
20824 && alg != rep_prefix_4_byte \
20825 && alg != rep_prefix_8_byte))
20826 const struct processor_costs *cost;
20828 /* Even if the string operation call is cold, we still might spend a lot
20829 of time processing large blocks. */
20830 if (optimize_function_for_size_p (cfun)
20831 || (optimize_insn_for_size_p ()
20832 && expected_size != -1 && expected_size < 256))
20833 optimize_for_speed = false;
20835 optimize_for_speed = true;
20837 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20839 *dynamic_check = -1;
20841 algs = &cost->memset[TARGET_64BIT != 0];
20843 algs = &cost->memcpy[TARGET_64BIT != 0];
20844 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
20845 return ix86_stringop_alg;
20846 /* rep; movq or rep; movl is the smallest variant. */
20847 else if (!optimize_for_speed)
20849 if (!count || (count & 3))
20850 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20852 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20854 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20856 else if (expected_size != -1 && expected_size < 4)
20857 return loop_1_byte;
20858 else if (expected_size != -1)
20861 enum stringop_alg alg = libcall;
20862 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20864 /* We get here if the algorithms that were not libcall-based
20865 were rep-prefix based and we are unable to use rep prefixes
20866 based on global register usage. Break out of the loop and
20867 use the heuristic below. */
20868 if (algs->size[i].max == 0)
20870 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20872 enum stringop_alg candidate = algs->size[i].alg;
20874 if (candidate != libcall && ALG_USABLE_P (candidate))
20876 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20877 last non-libcall inline algorithm. */
20878 if (TARGET_INLINE_ALL_STRINGOPS)
20880 /* When the current size is best to be copied by a libcall,
20881 but we are still forced to inline, run the heuristic below
20882 that will pick code for medium sized blocks. */
20883 if (alg != libcall)
20887 else if (ALG_USABLE_P (candidate))
20891 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20893 /* When asked to inline the call anyway, try to pick meaningful choice.
20894 We look for maximal size of block that is faster to copy by hand and
20895 take blocks of at most of that size guessing that average size will
20896 be roughly half of the block.
20898 If this turns out to be bad, we might simply specify the preferred
20899 choice in ix86_costs. */
20900 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20901 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20904 enum stringop_alg alg;
20906 bool any_alg_usable_p = true;
20908 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20910 enum stringop_alg candidate = algs->size[i].alg;
20911 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20913 if (candidate != libcall && candidate
20914 && ALG_USABLE_P (candidate))
20915 max = algs->size[i].max;
20917 /* If there aren't any usable algorithms, then recursing on
20918 smaller sizes isn't going to find anything. Just return the
20919 simple byte-at-a-time copy loop. */
20920 if (!any_alg_usable_p)
20922 /* Pick something reasonable. */
20923 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20924 *dynamic_check = 128;
20925 return loop_1_byte;
20929 alg = decide_alg (count, max / 2, memset, dynamic_check);
20930 gcc_assert (*dynamic_check == -1);
20931 gcc_assert (alg != libcall);
20932 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20933 *dynamic_check = max;
20936 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20937 #undef ALG_USABLE_P
20940 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20941 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20943 decide_alignment (int align,
20944 enum stringop_alg alg,
20947 int desired_align = 0;
20951 gcc_unreachable ();
20953 case unrolled_loop:
20954 desired_align = GET_MODE_SIZE (Pmode);
20956 case rep_prefix_8_byte:
20959 case rep_prefix_4_byte:
20960 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20961 copying whole cacheline at once. */
20962 if (TARGET_PENTIUMPRO)
20967 case rep_prefix_1_byte:
20968 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20969 copying whole cacheline at once. */
20970 if (TARGET_PENTIUMPRO)
20984 if (desired_align < align)
20985 desired_align = align;
20986 if (expected_size != -1 && expected_size < 4)
20987 desired_align = align;
20988 return desired_align;
20991 /* Return the smallest power of 2 greater than VAL. */
20993 smallest_pow2_greater_than (int val)
21001 /* Expand string move (memcpy) operation. Use i386 string operations
21002 when profitable. expand_setmem contains similar code. The code
21003 depends upon architecture, block size and alignment, but always has
21004 the same overall structure:
21006 1) Prologue guard: Conditional that jumps up to epilogues for small
21007 blocks that can be handled by epilogue alone. This is faster
21008 but also needed for correctness, since prologue assume the block
21009 is larger than the desired alignment.
21011 Optional dynamic check for size and libcall for large
21012 blocks is emitted here too, with -minline-stringops-dynamically.
21014 2) Prologue: copy first few bytes in order to get destination
21015 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
21016 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
21017 copied. We emit either a jump tree on power of two sized
21018 blocks, or a byte loop.
21020 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
21021 with specified algorithm.
21023 4) Epilogue: code copying tail of the block that is too small to be
21024 handled by main body (or up to size guarded by prologue guard). */
21027 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
21028 rtx expected_align_exp, rtx expected_size_exp)
21034 rtx jump_around_label = NULL;
21035 HOST_WIDE_INT align = 1;
21036 unsigned HOST_WIDE_INT count = 0;
21037 HOST_WIDE_INT expected_size = -1;
21038 int size_needed = 0, epilogue_size_needed;
21039 int desired_align = 0, align_bytes = 0;
21040 enum stringop_alg alg;
21042 bool need_zero_guard = false;
21044 if (CONST_INT_P (align_exp))
21045 align = INTVAL (align_exp);
21046 /* i386 can do misaligned access on reasonably increased cost. */
21047 if (CONST_INT_P (expected_align_exp)
21048 && INTVAL (expected_align_exp) > align)
21049 align = INTVAL (expected_align_exp);
21050 /* ALIGN is the minimum of destination and source alignment, but we care here
21051 just about destination alignment. */
21052 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
21053 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
21055 if (CONST_INT_P (count_exp))
21056 count = expected_size = INTVAL (count_exp);
21057 if (CONST_INT_P (expected_size_exp) && count == 0)
21058 expected_size = INTVAL (expected_size_exp);
21060 /* Make sure we don't need to care about overflow later on. */
21061 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21064 /* Step 0: Decide on preferred algorithm, desired alignment and
21065 size of chunks to be copied by main loop. */
21067 alg = decide_alg (count, expected_size, false, &dynamic_check);
21068 desired_align = decide_alignment (align, alg, expected_size);
21070 if (!TARGET_ALIGN_STRINGOPS)
21071 align = desired_align;
21073 if (alg == libcall)
21075 gcc_assert (alg != no_stringop);
21077 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
21078 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21079 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
21084 gcc_unreachable ();
21086 need_zero_guard = true;
21087 size_needed = GET_MODE_SIZE (Pmode);
21089 case unrolled_loop:
21090 need_zero_guard = true;
21091 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
21093 case rep_prefix_8_byte:
21096 case rep_prefix_4_byte:
21099 case rep_prefix_1_byte:
21103 need_zero_guard = true;
21108 epilogue_size_needed = size_needed;
21110 /* Step 1: Prologue guard. */
21112 /* Alignment code needs count to be in register. */
21113 if (CONST_INT_P (count_exp) && desired_align > align)
21115 if (INTVAL (count_exp) > desired_align
21116 && INTVAL (count_exp) > size_needed)
21119 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21120 if (align_bytes <= 0)
21123 align_bytes = desired_align - align_bytes;
21125 if (align_bytes == 0)
21126 count_exp = force_reg (counter_mode (count_exp), count_exp);
21128 gcc_assert (desired_align >= 1 && align >= 1);
21130 /* Ensure that alignment prologue won't copy past end of block. */
21131 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21133 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21134 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
21135 Make sure it is power of 2. */
21136 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21140 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21142 /* If main algorithm works on QImode, no epilogue is needed.
21143 For small sizes just don't align anything. */
21144 if (size_needed == 1)
21145 desired_align = align;
21152 label = gen_label_rtx ();
21153 emit_cmp_and_jump_insns (count_exp,
21154 GEN_INT (epilogue_size_needed),
21155 LTU, 0, counter_mode (count_exp), 1, label);
21156 if (expected_size == -1 || expected_size < epilogue_size_needed)
21157 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21159 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21163 /* Emit code to decide on runtime whether library call or inline should be
21165 if (dynamic_check != -1)
21167 if (CONST_INT_P (count_exp))
21169 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
21171 emit_block_move_via_libcall (dst, src, count_exp, false);
21172 count_exp = const0_rtx;
21178 rtx hot_label = gen_label_rtx ();
21179 jump_around_label = gen_label_rtx ();
21180 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21181 LEU, 0, GET_MODE (count_exp), 1, hot_label);
21182 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21183 emit_block_move_via_libcall (dst, src, count_exp, false);
21184 emit_jump (jump_around_label);
21185 emit_label (hot_label);
21189 /* Step 2: Alignment prologue. */
21191 if (desired_align > align)
21193 if (align_bytes == 0)
21195 /* Except for the first move in epilogue, we no longer know
21196 constant offset in aliasing info. It don't seems to worth
21197 the pain to maintain it for the first move, so throw away
21199 src = change_address (src, BLKmode, srcreg);
21200 dst = change_address (dst, BLKmode, destreg);
21201 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
21206 /* If we know how many bytes need to be stored before dst is
21207 sufficiently aligned, maintain aliasing info accurately. */
21208 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
21209 desired_align, align_bytes);
21210 count_exp = plus_constant (count_exp, -align_bytes);
21211 count -= align_bytes;
21213 if (need_zero_guard
21214 && (count < (unsigned HOST_WIDE_INT) size_needed
21215 || (align_bytes == 0
21216 && count < ((unsigned HOST_WIDE_INT) size_needed
21217 + desired_align - align))))
21219 /* It is possible that we copied enough so the main loop will not
21221 gcc_assert (size_needed > 1);
21222 if (label == NULL_RTX)
21223 label = gen_label_rtx ();
21224 emit_cmp_and_jump_insns (count_exp,
21225 GEN_INT (size_needed),
21226 LTU, 0, counter_mode (count_exp), 1, label);
21227 if (expected_size == -1
21228 || expected_size < (desired_align - align) / 2 + size_needed)
21229 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21231 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21234 if (label && size_needed == 1)
21236 emit_label (label);
21237 LABEL_NUSES (label) = 1;
21239 epilogue_size_needed = 1;
21241 else if (label == NULL_RTX)
21242 epilogue_size_needed = size_needed;
21244 /* Step 3: Main loop. */
21250 gcc_unreachable ();
21252 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21253 count_exp, QImode, 1, expected_size);
21256 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21257 count_exp, Pmode, 1, expected_size);
21259 case unrolled_loop:
21260 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
21261 registers for 4 temporaries anyway. */
21262 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21263 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
21266 case rep_prefix_8_byte:
21267 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21270 case rep_prefix_4_byte:
21271 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21274 case rep_prefix_1_byte:
21275 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21279 /* Adjust properly the offset of src and dest memory for aliasing. */
21280 if (CONST_INT_P (count_exp))
21282 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
21283 (count / size_needed) * size_needed);
21284 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21285 (count / size_needed) * size_needed);
21289 src = change_address (src, BLKmode, srcreg);
21290 dst = change_address (dst, BLKmode, destreg);
21293 /* Step 4: Epilogue to copy the remaining bytes. */
21297 /* When the main loop is done, COUNT_EXP might hold original count,
21298 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21299 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21300 bytes. Compensate if needed. */
21302 if (size_needed < epilogue_size_needed)
21305 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21306 GEN_INT (size_needed - 1), count_exp, 1,
21308 if (tmp != count_exp)
21309 emit_move_insn (count_exp, tmp);
21311 emit_label (label);
21312 LABEL_NUSES (label) = 1;
21315 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21316 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
21317 epilogue_size_needed);
21318 if (jump_around_label)
21319 emit_label (jump_around_label);
21323 /* Helper function for memcpy. For QImode value 0xXY produce
21324 0xXYXYXYXY of wide specified by MODE. This is essentially
21325 a * 0x10101010, but we can do slightly better than
21326 synth_mult by unwinding the sequence by hand on CPUs with
21329 promote_duplicated_reg (enum machine_mode mode, rtx val)
21331 enum machine_mode valmode = GET_MODE (val);
21333 int nops = mode == DImode ? 3 : 2;
21335 gcc_assert (mode == SImode || mode == DImode);
21336 if (val == const0_rtx)
21337 return copy_to_mode_reg (mode, const0_rtx);
21338 if (CONST_INT_P (val))
21340 HOST_WIDE_INT v = INTVAL (val) & 255;
21344 if (mode == DImode)
21345 v |= (v << 16) << 16;
21346 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
21349 if (valmode == VOIDmode)
21351 if (valmode != QImode)
21352 val = gen_lowpart (QImode, val);
21353 if (mode == QImode)
21355 if (!TARGET_PARTIAL_REG_STALL)
21357 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
21358 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
21359 <= (ix86_cost->shift_const + ix86_cost->add) * nops
21360 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
21362 rtx reg = convert_modes (mode, QImode, val, true);
21363 tmp = promote_duplicated_reg (mode, const1_rtx);
21364 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
21369 rtx reg = convert_modes (mode, QImode, val, true);
21371 if (!TARGET_PARTIAL_REG_STALL)
21372 if (mode == SImode)
21373 emit_insn (gen_movsi_insv_1 (reg, reg));
21375 emit_insn (gen_movdi_insv_1 (reg, reg));
21378 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
21379 NULL, 1, OPTAB_DIRECT);
21381 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21383 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
21384 NULL, 1, OPTAB_DIRECT);
21385 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21386 if (mode == SImode)
21388 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
21389 NULL, 1, OPTAB_DIRECT);
21390 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21395 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
21396 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
21397 alignment from ALIGN to DESIRED_ALIGN. */
21399 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
21404 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
21405 promoted_val = promote_duplicated_reg (DImode, val);
21406 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
21407 promoted_val = promote_duplicated_reg (SImode, val);
21408 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
21409 promoted_val = promote_duplicated_reg (HImode, val);
21411 promoted_val = val;
21413 return promoted_val;
21416 /* Expand string clear operation (bzero). Use i386 string operations when
21417 profitable. See expand_movmem comment for explanation of individual
21418 steps performed. */
21420 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
21421 rtx expected_align_exp, rtx expected_size_exp)
21426 rtx jump_around_label = NULL;
21427 HOST_WIDE_INT align = 1;
21428 unsigned HOST_WIDE_INT count = 0;
21429 HOST_WIDE_INT expected_size = -1;
21430 int size_needed = 0, epilogue_size_needed;
21431 int desired_align = 0, align_bytes = 0;
21432 enum stringop_alg alg;
21433 rtx promoted_val = NULL;
21434 bool force_loopy_epilogue = false;
21436 bool need_zero_guard = false;
21438 if (CONST_INT_P (align_exp))
21439 align = INTVAL (align_exp);
21440 /* i386 can do misaligned access on reasonably increased cost. */
21441 if (CONST_INT_P (expected_align_exp)
21442 && INTVAL (expected_align_exp) > align)
21443 align = INTVAL (expected_align_exp);
21444 if (CONST_INT_P (count_exp))
21445 count = expected_size = INTVAL (count_exp);
21446 if (CONST_INT_P (expected_size_exp) && count == 0)
21447 expected_size = INTVAL (expected_size_exp);
21449 /* Make sure we don't need to care about overflow later on. */
21450 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21453 /* Step 0: Decide on preferred algorithm, desired alignment and
21454 size of chunks to be copied by main loop. */
21456 alg = decide_alg (count, expected_size, true, &dynamic_check);
21457 desired_align = decide_alignment (align, alg, expected_size);
21459 if (!TARGET_ALIGN_STRINGOPS)
21460 align = desired_align;
21462 if (alg == libcall)
21464 gcc_assert (alg != no_stringop);
21466 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
21467 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21472 gcc_unreachable ();
21474 need_zero_guard = true;
21475 size_needed = GET_MODE_SIZE (Pmode);
21477 case unrolled_loop:
21478 need_zero_guard = true;
21479 size_needed = GET_MODE_SIZE (Pmode) * 4;
21481 case rep_prefix_8_byte:
21484 case rep_prefix_4_byte:
21487 case rep_prefix_1_byte:
21491 need_zero_guard = true;
21495 epilogue_size_needed = size_needed;
21497 /* Step 1: Prologue guard. */
21499 /* Alignment code needs count to be in register. */
21500 if (CONST_INT_P (count_exp) && desired_align > align)
21502 if (INTVAL (count_exp) > desired_align
21503 && INTVAL (count_exp) > size_needed)
21506 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21507 if (align_bytes <= 0)
21510 align_bytes = desired_align - align_bytes;
21512 if (align_bytes == 0)
21514 enum machine_mode mode = SImode;
21515 if (TARGET_64BIT && (count & ~0xffffffff))
21517 count_exp = force_reg (mode, count_exp);
21520 /* Do the cheap promotion to allow better CSE across the
21521 main loop and epilogue (ie one load of the big constant in the
21522 front of all code. */
21523 if (CONST_INT_P (val_exp))
21524 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21525 desired_align, align);
21526 /* Ensure that alignment prologue won't copy past end of block. */
21527 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21529 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21530 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21531 Make sure it is power of 2. */
21532 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21534 /* To improve performance of small blocks, we jump around the VAL
21535 promoting mode. This mean that if the promoted VAL is not constant,
21536 we might not use it in the epilogue and have to use byte
21538 if (epilogue_size_needed > 2 && !promoted_val)
21539 force_loopy_epilogue = true;
21542 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21544 /* If main algorithm works on QImode, no epilogue is needed.
21545 For small sizes just don't align anything. */
21546 if (size_needed == 1)
21547 desired_align = align;
21554 label = gen_label_rtx ();
21555 emit_cmp_and_jump_insns (count_exp,
21556 GEN_INT (epilogue_size_needed),
21557 LTU, 0, counter_mode (count_exp), 1, label);
21558 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21559 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21561 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21564 if (dynamic_check != -1)
21566 rtx hot_label = gen_label_rtx ();
21567 jump_around_label = gen_label_rtx ();
21568 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21569 LEU, 0, counter_mode (count_exp), 1, hot_label);
21570 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21571 set_storage_via_libcall (dst, count_exp, val_exp, false);
21572 emit_jump (jump_around_label);
21573 emit_label (hot_label);
21576 /* Step 2: Alignment prologue. */
21578 /* Do the expensive promotion once we branched off the small blocks. */
21580 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21581 desired_align, align);
21582 gcc_assert (desired_align >= 1 && align >= 1);
21584 if (desired_align > align)
21586 if (align_bytes == 0)
21588 /* Except for the first move in epilogue, we no longer know
21589 constant offset in aliasing info. It don't seems to worth
21590 the pain to maintain it for the first move, so throw away
21592 dst = change_address (dst, BLKmode, destreg);
21593 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21598 /* If we know how many bytes need to be stored before dst is
21599 sufficiently aligned, maintain aliasing info accurately. */
21600 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21601 desired_align, align_bytes);
21602 count_exp = plus_constant (count_exp, -align_bytes);
21603 count -= align_bytes;
21605 if (need_zero_guard
21606 && (count < (unsigned HOST_WIDE_INT) size_needed
21607 || (align_bytes == 0
21608 && count < ((unsigned HOST_WIDE_INT) size_needed
21609 + desired_align - align))))
21611 /* It is possible that we copied enough so the main loop will not
21613 gcc_assert (size_needed > 1);
21614 if (label == NULL_RTX)
21615 label = gen_label_rtx ();
21616 emit_cmp_and_jump_insns (count_exp,
21617 GEN_INT (size_needed),
21618 LTU, 0, counter_mode (count_exp), 1, label);
21619 if (expected_size == -1
21620 || expected_size < (desired_align - align) / 2 + size_needed)
21621 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21623 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21626 if (label && size_needed == 1)
21628 emit_label (label);
21629 LABEL_NUSES (label) = 1;
21631 promoted_val = val_exp;
21632 epilogue_size_needed = 1;
21634 else if (label == NULL_RTX)
21635 epilogue_size_needed = size_needed;
21637 /* Step 3: Main loop. */
21643 gcc_unreachable ();
21645 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21646 count_exp, QImode, 1, expected_size);
21649 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21650 count_exp, Pmode, 1, expected_size);
21652 case unrolled_loop:
21653 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21654 count_exp, Pmode, 4, expected_size);
21656 case rep_prefix_8_byte:
21657 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21660 case rep_prefix_4_byte:
21661 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21664 case rep_prefix_1_byte:
21665 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21669 /* Adjust properly the offset of src and dest memory for aliasing. */
21670 if (CONST_INT_P (count_exp))
21671 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21672 (count / size_needed) * size_needed);
21674 dst = change_address (dst, BLKmode, destreg);
21676 /* Step 4: Epilogue to copy the remaining bytes. */
21680 /* When the main loop is done, COUNT_EXP might hold original count,
21681 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21682 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21683 bytes. Compensate if needed. */
21685 if (size_needed < epilogue_size_needed)
21688 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21689 GEN_INT (size_needed - 1), count_exp, 1,
21691 if (tmp != count_exp)
21692 emit_move_insn (count_exp, tmp);
21694 emit_label (label);
21695 LABEL_NUSES (label) = 1;
21698 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21700 if (force_loopy_epilogue)
21701 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21702 epilogue_size_needed);
21704 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21705 epilogue_size_needed);
21707 if (jump_around_label)
21708 emit_label (jump_around_label);
21712 /* Expand the appropriate insns for doing strlen if not just doing
21715 out = result, initialized with the start address
21716 align_rtx = alignment of the address.
21717 scratch = scratch register, initialized with the startaddress when
21718 not aligned, otherwise undefined
21720 This is just the body. It needs the initializations mentioned above and
21721 some address computing at the end. These things are done in i386.md. */
21724 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21728 rtx align_2_label = NULL_RTX;
21729 rtx align_3_label = NULL_RTX;
21730 rtx align_4_label = gen_label_rtx ();
21731 rtx end_0_label = gen_label_rtx ();
21733 rtx tmpreg = gen_reg_rtx (SImode);
21734 rtx scratch = gen_reg_rtx (SImode);
21738 if (CONST_INT_P (align_rtx))
21739 align = INTVAL (align_rtx);
21741 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21743 /* Is there a known alignment and is it less than 4? */
21746 rtx scratch1 = gen_reg_rtx (Pmode);
21747 emit_move_insn (scratch1, out);
21748 /* Is there a known alignment and is it not 2? */
21751 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21752 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21754 /* Leave just the 3 lower bits. */
21755 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21756 NULL_RTX, 0, OPTAB_WIDEN);
21758 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21759 Pmode, 1, align_4_label);
21760 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21761 Pmode, 1, align_2_label);
21762 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21763 Pmode, 1, align_3_label);
21767 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21768 check if is aligned to 4 - byte. */
21770 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21771 NULL_RTX, 0, OPTAB_WIDEN);
21773 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21774 Pmode, 1, align_4_label);
21777 mem = change_address (src, QImode, out);
21779 /* Now compare the bytes. */
21781 /* Compare the first n unaligned byte on a byte per byte basis. */
21782 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21783 QImode, 1, end_0_label);
21785 /* Increment the address. */
21786 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21788 /* Not needed with an alignment of 2 */
21791 emit_label (align_2_label);
21793 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21796 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21798 emit_label (align_3_label);
21801 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21804 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21807 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21808 align this loop. It gives only huge programs, but does not help to
21810 emit_label (align_4_label);
21812 mem = change_address (src, SImode, out);
21813 emit_move_insn (scratch, mem);
21814 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21816 /* This formula yields a nonzero result iff one of the bytes is zero.
21817 This saves three branches inside loop and many cycles. */
21819 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21820 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21821 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21822 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21823 gen_int_mode (0x80808080, SImode)));
21824 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21829 rtx reg = gen_reg_rtx (SImode);
21830 rtx reg2 = gen_reg_rtx (Pmode);
21831 emit_move_insn (reg, tmpreg);
21832 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21834 /* If zero is not in the first two bytes, move two bytes forward. */
21835 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21836 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21837 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21838 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21839 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21842 /* Emit lea manually to avoid clobbering of flags. */
21843 emit_insn (gen_rtx_SET (SImode, reg2,
21844 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21846 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21847 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21848 emit_insn (gen_rtx_SET (VOIDmode, out,
21849 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21855 rtx end_2_label = gen_label_rtx ();
21856 /* Is zero in the first two bytes? */
21858 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21859 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21860 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21861 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21862 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21864 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21865 JUMP_LABEL (tmp) = end_2_label;
21867 /* Not in the first two. Move two bytes forward. */
21868 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21869 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21871 emit_label (end_2_label);
21875 /* Avoid branch in fixing the byte. */
21876 tmpreg = gen_lowpart (QImode, tmpreg);
21877 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21878 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21879 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21880 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21882 emit_label (end_0_label);
21885 /* Expand strlen. */
21888 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21890 rtx addr, scratch1, scratch2, scratch3, scratch4;
21892 /* The generic case of strlen expander is long. Avoid it's
21893 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21895 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21896 && !TARGET_INLINE_ALL_STRINGOPS
21897 && !optimize_insn_for_size_p ()
21898 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21901 addr = force_reg (Pmode, XEXP (src, 0));
21902 scratch1 = gen_reg_rtx (Pmode);
21904 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21905 && !optimize_insn_for_size_p ())
21907 /* Well it seems that some optimizer does not combine a call like
21908 foo(strlen(bar), strlen(bar));
21909 when the move and the subtraction is done here. It does calculate
21910 the length just once when these instructions are done inside of
21911 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21912 often used and I use one fewer register for the lifetime of
21913 output_strlen_unroll() this is better. */
21915 emit_move_insn (out, addr);
21917 ix86_expand_strlensi_unroll_1 (out, src, align);
21919 /* strlensi_unroll_1 returns the address of the zero at the end of
21920 the string, like memchr(), so compute the length by subtracting
21921 the start address. */
21922 emit_insn (ix86_gen_sub3 (out, out, addr));
21928 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21929 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21932 scratch2 = gen_reg_rtx (Pmode);
21933 scratch3 = gen_reg_rtx (Pmode);
21934 scratch4 = force_reg (Pmode, constm1_rtx);
21936 emit_move_insn (scratch3, addr);
21937 eoschar = force_reg (QImode, eoschar);
21939 src = replace_equiv_address_nv (src, scratch3);
21941 /* If .md starts supporting :P, this can be done in .md. */
21942 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21943 scratch4), UNSPEC_SCAS);
21944 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21945 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21946 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21951 /* For given symbol (function) construct code to compute address of it's PLT
21952 entry in large x86-64 PIC model. */
21954 construct_plt_address (rtx symbol)
21956 rtx tmp = gen_reg_rtx (Pmode);
21957 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21959 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21960 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21962 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21963 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21968 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21970 rtx pop, bool sibcall)
21972 rtx use = NULL, call;
21974 if (pop == const0_rtx)
21976 gcc_assert (!TARGET_64BIT || !pop);
21978 if (TARGET_MACHO && !TARGET_64BIT)
21981 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21982 fnaddr = machopic_indirect_call_target (fnaddr);
21987 /* Static functions and indirect calls don't need the pic register. */
21988 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21989 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21990 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21991 use_reg (&use, pic_offset_table_rtx);
21994 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21996 rtx al = gen_rtx_REG (QImode, AX_REG);
21997 emit_move_insn (al, callarg2);
21998 use_reg (&use, al);
22001 if (ix86_cmodel == CM_LARGE_PIC
22003 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
22004 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
22005 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
22007 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
22008 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
22010 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
22011 fnaddr = gen_rtx_MEM (QImode, fnaddr);
22014 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
22016 call = gen_rtx_SET (VOIDmode, retval, call);
22019 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
22020 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
22021 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
22023 if (TARGET_64BIT_MS_ABI
22024 && (!callarg2 || INTVAL (callarg2) != -2))
22026 /* We need to represent that SI and DI registers are clobbered
22028 static int clobbered_registers[] = {
22029 XMM6_REG, XMM7_REG, XMM8_REG,
22030 XMM9_REG, XMM10_REG, XMM11_REG,
22031 XMM12_REG, XMM13_REG, XMM14_REG,
22032 XMM15_REG, SI_REG, DI_REG
22035 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
22036 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
22037 UNSPEC_MS_TO_SYSV_CALL);
22041 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
22042 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
22045 (SSE_REGNO_P (clobbered_registers[i])
22047 clobbered_registers[i]));
22049 call = gen_rtx_PARALLEL (VOIDmode,
22050 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
22054 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
22055 if (TARGET_VZEROUPPER)
22060 if (cfun->machine->callee_pass_avx256_p)
22062 if (cfun->machine->callee_return_avx256_p)
22063 avx256 = callee_return_pass_avx256;
22065 avx256 = callee_pass_avx256;
22067 else if (cfun->machine->callee_return_avx256_p)
22068 avx256 = callee_return_avx256;
22070 avx256 = call_no_avx256;
22072 if (reload_completed)
22073 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
22076 unspec = gen_rtx_UNSPEC (VOIDmode,
22077 gen_rtvec (1, GEN_INT (avx256)),
22078 UNSPEC_CALL_NEEDS_VZEROUPPER);
22079 call = gen_rtx_PARALLEL (VOIDmode,
22080 gen_rtvec (2, call, unspec));
22084 call = emit_call_insn (call);
22086 CALL_INSN_FUNCTION_USAGE (call) = use;
22092 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
22094 rtx call = XVECEXP (PATTERN (insn), 0, 0);
22095 emit_insn (gen_avx_vzeroupper (vzeroupper));
22096 emit_call_insn (call);
22099 /* Output the assembly for a call instruction. */
22102 ix86_output_call_insn (rtx insn, rtx call_op)
22104 bool direct_p = constant_call_address_operand (call_op, Pmode);
22105 bool seh_nop_p = false;
22108 if (SIBLING_CALL_P (insn))
22112 /* SEH epilogue detection requires the indirect branch case
22113 to include REX.W. */
22114 else if (TARGET_SEH)
22115 xasm = "rex.W jmp %A0";
22119 output_asm_insn (xasm, &call_op);
22123 /* SEH unwinding can require an extra nop to be emitted in several
22124 circumstances. Determine if we have one of those. */
22129 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
22131 /* If we get to another real insn, we don't need the nop. */
22135 /* If we get to the epilogue note, prevent a catch region from
22136 being adjacent to the standard epilogue sequence. If non-
22137 call-exceptions, we'll have done this during epilogue emission. */
22138 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
22139 && !flag_non_call_exceptions
22140 && !can_throw_internal (insn))
22147 /* If we didn't find a real insn following the call, prevent the
22148 unwinder from looking into the next function. */
22154 xasm = "call\t%P0";
22156 xasm = "call\t%A0";
22158 output_asm_insn (xasm, &call_op);
22166 /* Clear stack slot assignments remembered from previous functions.
22167 This is called from INIT_EXPANDERS once before RTL is emitted for each
22170 static struct machine_function *
22171 ix86_init_machine_status (void)
22173 struct machine_function *f;
22175 f = ggc_alloc_cleared_machine_function ();
22176 f->use_fast_prologue_epilogue_nregs = -1;
22177 f->tls_descriptor_call_expanded_p = 0;
22178 f->call_abi = ix86_abi;
22183 /* Return a MEM corresponding to a stack slot with mode MODE.
22184 Allocate a new slot if necessary.
22186 The RTL for a function can have several slots available: N is
22187 which slot to use. */
22190 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
22192 struct stack_local_entry *s;
22194 gcc_assert (n < MAX_386_STACK_LOCALS);
22196 /* Virtual slot is valid only before vregs are instantiated. */
22197 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
22199 for (s = ix86_stack_locals; s; s = s->next)
22200 if (s->mode == mode && s->n == n)
22201 return copy_rtx (s->rtl);
22203 s = ggc_alloc_stack_local_entry ();
22206 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
22208 s->next = ix86_stack_locals;
22209 ix86_stack_locals = s;
22213 /* Calculate the length of the memory address in the instruction
22214 encoding. Does not include the one-byte modrm, opcode, or prefix. */
22217 memory_address_length (rtx addr)
22219 struct ix86_address parts;
22220 rtx base, index, disp;
22224 if (GET_CODE (addr) == PRE_DEC
22225 || GET_CODE (addr) == POST_INC
22226 || GET_CODE (addr) == PRE_MODIFY
22227 || GET_CODE (addr) == POST_MODIFY)
22230 ok = ix86_decompose_address (addr, &parts);
22233 if (parts.base && GET_CODE (parts.base) == SUBREG)
22234 parts.base = SUBREG_REG (parts.base);
22235 if (parts.index && GET_CODE (parts.index) == SUBREG)
22236 parts.index = SUBREG_REG (parts.index);
22239 index = parts.index;
22244 - esp as the base always wants an index,
22245 - ebp as the base always wants a displacement,
22246 - r12 as the base always wants an index,
22247 - r13 as the base always wants a displacement. */
22249 /* Register Indirect. */
22250 if (base && !index && !disp)
22252 /* esp (for its index) and ebp (for its displacement) need
22253 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
22256 && (addr == arg_pointer_rtx
22257 || addr == frame_pointer_rtx
22258 || REGNO (addr) == SP_REG
22259 || REGNO (addr) == BP_REG
22260 || REGNO (addr) == R12_REG
22261 || REGNO (addr) == R13_REG))
22265 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
22266 is not disp32, but disp32(%rip), so for disp32
22267 SIB byte is needed, unless print_operand_address
22268 optimizes it into disp32(%rip) or (%rip) is implied
22270 else if (disp && !base && !index)
22277 if (GET_CODE (disp) == CONST)
22278 symbol = XEXP (disp, 0);
22279 if (GET_CODE (symbol) == PLUS
22280 && CONST_INT_P (XEXP (symbol, 1)))
22281 symbol = XEXP (symbol, 0);
22283 if (GET_CODE (symbol) != LABEL_REF
22284 && (GET_CODE (symbol) != SYMBOL_REF
22285 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
22286 && (GET_CODE (symbol) != UNSPEC
22287 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
22288 && XINT (symbol, 1) != UNSPEC_PCREL
22289 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
22296 /* Find the length of the displacement constant. */
22299 if (base && satisfies_constraint_K (disp))
22304 /* ebp always wants a displacement. Similarly r13. */
22305 else if (base && REG_P (base)
22306 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
22309 /* An index requires the two-byte modrm form.... */
22311 /* ...like esp (or r12), which always wants an index. */
22312 || base == arg_pointer_rtx
22313 || base == frame_pointer_rtx
22314 || (base && REG_P (base)
22315 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
22332 /* Compute default value for "length_immediate" attribute. When SHORTFORM
22333 is set, expect that insn have 8bit immediate alternative. */
22335 ix86_attr_length_immediate_default (rtx insn, bool shortform)
22339 extract_insn_cached (insn);
22340 for (i = recog_data.n_operands - 1; i >= 0; --i)
22341 if (CONSTANT_P (recog_data.operand[i]))
22343 enum attr_mode mode = get_attr_mode (insn);
22346 if (shortform && CONST_INT_P (recog_data.operand[i]))
22348 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
22355 ival = trunc_int_for_mode (ival, HImode);
22358 ival = trunc_int_for_mode (ival, SImode);
22363 if (IN_RANGE (ival, -128, 127))
22380 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
22385 fatal_insn ("unknown insn mode", insn);
22390 /* Compute default value for "length_address" attribute. */
22392 ix86_attr_length_address_default (rtx insn)
22396 if (get_attr_type (insn) == TYPE_LEA)
22398 rtx set = PATTERN (insn), addr;
22400 if (GET_CODE (set) == PARALLEL)
22401 set = XVECEXP (set, 0, 0);
22403 gcc_assert (GET_CODE (set) == SET);
22405 addr = SET_SRC (set);
22406 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
22408 if (GET_CODE (addr) == ZERO_EXTEND)
22409 addr = XEXP (addr, 0);
22410 if (GET_CODE (addr) == SUBREG)
22411 addr = SUBREG_REG (addr);
22414 return memory_address_length (addr);
22417 extract_insn_cached (insn);
22418 for (i = recog_data.n_operands - 1; i >= 0; --i)
22419 if (MEM_P (recog_data.operand[i]))
22421 constrain_operands_cached (reload_completed);
22422 if (which_alternative != -1)
22424 const char *constraints = recog_data.constraints[i];
22425 int alt = which_alternative;
22427 while (*constraints == '=' || *constraints == '+')
22430 while (*constraints++ != ',')
22432 /* Skip ignored operands. */
22433 if (*constraints == 'X')
22436 return memory_address_length (XEXP (recog_data.operand[i], 0));
22441 /* Compute default value for "length_vex" attribute. It includes
22442 2 or 3 byte VEX prefix and 1 opcode byte. */
22445 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
22449 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22450 byte VEX prefix. */
22451 if (!has_0f_opcode || has_vex_w)
22454 /* We can always use 2 byte VEX prefix in 32bit. */
22458 extract_insn_cached (insn);
22460 for (i = recog_data.n_operands - 1; i >= 0; --i)
22461 if (REG_P (recog_data.operand[i]))
22463 /* REX.W bit uses 3 byte VEX prefix. */
22464 if (GET_MODE (recog_data.operand[i]) == DImode
22465 && GENERAL_REG_P (recog_data.operand[i]))
22470 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22471 if (MEM_P (recog_data.operand[i])
22472 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22479 /* Return the maximum number of instructions a cpu can issue. */
22482 ix86_issue_rate (void)
22486 case PROCESSOR_PENTIUM:
22487 case PROCESSOR_ATOM:
22491 case PROCESSOR_PENTIUMPRO:
22492 case PROCESSOR_PENTIUM4:
22493 case PROCESSOR_CORE2_32:
22494 case PROCESSOR_CORE2_64:
22495 case PROCESSOR_COREI7_32:
22496 case PROCESSOR_COREI7_64:
22497 case PROCESSOR_ATHLON:
22499 case PROCESSOR_AMDFAM10:
22500 case PROCESSOR_NOCONA:
22501 case PROCESSOR_GENERIC32:
22502 case PROCESSOR_GENERIC64:
22503 case PROCESSOR_BDVER1:
22504 case PROCESSOR_BTVER1:
22512 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
22513 by DEP_INSN and nothing set by DEP_INSN. */
22516 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22520 /* Simplify the test for uninteresting insns. */
22521 if (insn_type != TYPE_SETCC
22522 && insn_type != TYPE_ICMOV
22523 && insn_type != TYPE_FCMOV
22524 && insn_type != TYPE_IBR)
22527 if ((set = single_set (dep_insn)) != 0)
22529 set = SET_DEST (set);
22532 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22533 && XVECLEN (PATTERN (dep_insn), 0) == 2
22534 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22535 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22537 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22538 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22543 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22546 /* This test is true if the dependent insn reads the flags but
22547 not any other potentially set register. */
22548 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22551 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22557 /* Return true iff USE_INSN has a memory address with operands set by
22561 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22564 extract_insn_cached (use_insn);
22565 for (i = recog_data.n_operands - 1; i >= 0; --i)
22566 if (MEM_P (recog_data.operand[i]))
22568 rtx addr = XEXP (recog_data.operand[i], 0);
22569 return modified_in_p (addr, set_insn) != 0;
22575 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22577 enum attr_type insn_type, dep_insn_type;
22578 enum attr_memory memory;
22580 int dep_insn_code_number;
22582 /* Anti and output dependencies have zero cost on all CPUs. */
22583 if (REG_NOTE_KIND (link) != 0)
22586 dep_insn_code_number = recog_memoized (dep_insn);
22588 /* If we can't recognize the insns, we can't really do anything. */
22589 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22592 insn_type = get_attr_type (insn);
22593 dep_insn_type = get_attr_type (dep_insn);
22597 case PROCESSOR_PENTIUM:
22598 /* Address Generation Interlock adds a cycle of latency. */
22599 if (insn_type == TYPE_LEA)
22601 rtx addr = PATTERN (insn);
22603 if (GET_CODE (addr) == PARALLEL)
22604 addr = XVECEXP (addr, 0, 0);
22606 gcc_assert (GET_CODE (addr) == SET);
22608 addr = SET_SRC (addr);
22609 if (modified_in_p (addr, dep_insn))
22612 else if (ix86_agi_dependent (dep_insn, insn))
22615 /* ??? Compares pair with jump/setcc. */
22616 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22619 /* Floating point stores require value to be ready one cycle earlier. */
22620 if (insn_type == TYPE_FMOV
22621 && get_attr_memory (insn) == MEMORY_STORE
22622 && !ix86_agi_dependent (dep_insn, insn))
22626 case PROCESSOR_PENTIUMPRO:
22627 memory = get_attr_memory (insn);
22629 /* INT->FP conversion is expensive. */
22630 if (get_attr_fp_int_src (dep_insn))
22633 /* There is one cycle extra latency between an FP op and a store. */
22634 if (insn_type == TYPE_FMOV
22635 && (set = single_set (dep_insn)) != NULL_RTX
22636 && (set2 = single_set (insn)) != NULL_RTX
22637 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22638 && MEM_P (SET_DEST (set2)))
22641 /* Show ability of reorder buffer to hide latency of load by executing
22642 in parallel with previous instruction in case
22643 previous instruction is not needed to compute the address. */
22644 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22645 && !ix86_agi_dependent (dep_insn, insn))
22647 /* Claim moves to take one cycle, as core can issue one load
22648 at time and the next load can start cycle later. */
22649 if (dep_insn_type == TYPE_IMOV
22650 || dep_insn_type == TYPE_FMOV)
22658 memory = get_attr_memory (insn);
22660 /* The esp dependency is resolved before the instruction is really
22662 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22663 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22666 /* INT->FP conversion is expensive. */
22667 if (get_attr_fp_int_src (dep_insn))
22670 /* Show ability of reorder buffer to hide latency of load by executing
22671 in parallel with previous instruction in case
22672 previous instruction is not needed to compute the address. */
22673 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22674 && !ix86_agi_dependent (dep_insn, insn))
22676 /* Claim moves to take one cycle, as core can issue one load
22677 at time and the next load can start cycle later. */
22678 if (dep_insn_type == TYPE_IMOV
22679 || dep_insn_type == TYPE_FMOV)
22688 case PROCESSOR_ATHLON:
22690 case PROCESSOR_AMDFAM10:
22691 case PROCESSOR_BDVER1:
22692 case PROCESSOR_BTVER1:
22693 case PROCESSOR_ATOM:
22694 case PROCESSOR_GENERIC32:
22695 case PROCESSOR_GENERIC64:
22696 memory = get_attr_memory (insn);
22698 /* Show ability of reorder buffer to hide latency of load by executing
22699 in parallel with previous instruction in case
22700 previous instruction is not needed to compute the address. */
22701 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22702 && !ix86_agi_dependent (dep_insn, insn))
22704 enum attr_unit unit = get_attr_unit (insn);
22707 /* Because of the difference between the length of integer and
22708 floating unit pipeline preparation stages, the memory operands
22709 for floating point are cheaper.
22711 ??? For Athlon it the difference is most probably 2. */
22712 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22715 loadcost = TARGET_ATHLON ? 2 : 0;
22717 if (cost >= loadcost)
22730 /* How many alternative schedules to try. This should be as wide as the
22731 scheduling freedom in the DFA, but no wider. Making this value too
22732 large results extra work for the scheduler. */
22735 ia32_multipass_dfa_lookahead (void)
22739 case PROCESSOR_PENTIUM:
22742 case PROCESSOR_PENTIUMPRO:
22746 case PROCESSOR_CORE2_32:
22747 case PROCESSOR_CORE2_64:
22748 case PROCESSOR_COREI7_32:
22749 case PROCESSOR_COREI7_64:
22750 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22751 as many instructions can be executed on a cycle, i.e.,
22752 issue_rate. I wonder why tuning for many CPUs does not do this. */
22753 return ix86_issue_rate ();
22762 /* Model decoder of Core 2/i7.
22763 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22764 track the instruction fetch block boundaries and make sure that long
22765 (9+ bytes) instructions are assigned to D0. */
22767 /* Maximum length of an insn that can be handled by
22768 a secondary decoder unit. '8' for Core 2/i7. */
22769 static int core2i7_secondary_decoder_max_insn_size;
22771 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22772 '16' for Core 2/i7. */
22773 static int core2i7_ifetch_block_size;
22775 /* Maximum number of instructions decoder can handle per cycle.
22776 '6' for Core 2/i7. */
22777 static int core2i7_ifetch_block_max_insns;
22779 typedef struct ix86_first_cycle_multipass_data_ *
22780 ix86_first_cycle_multipass_data_t;
22781 typedef const struct ix86_first_cycle_multipass_data_ *
22782 const_ix86_first_cycle_multipass_data_t;
22784 /* A variable to store target state across calls to max_issue within
22786 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22787 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22789 /* Initialize DATA. */
22791 core2i7_first_cycle_multipass_init (void *_data)
22793 ix86_first_cycle_multipass_data_t data
22794 = (ix86_first_cycle_multipass_data_t) _data;
22796 data->ifetch_block_len = 0;
22797 data->ifetch_block_n_insns = 0;
22798 data->ready_try_change = NULL;
22799 data->ready_try_change_size = 0;
22802 /* Advancing the cycle; reset ifetch block counts. */
22804 core2i7_dfa_post_advance_cycle (void)
22806 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22808 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22810 data->ifetch_block_len = 0;
22811 data->ifetch_block_n_insns = 0;
22814 static int min_insn_size (rtx);
22816 /* Filter out insns from ready_try that the core will not be able to issue
22817 on current cycle due to decoder. */
22819 core2i7_first_cycle_multipass_filter_ready_try
22820 (const_ix86_first_cycle_multipass_data_t data,
22821 char *ready_try, int n_ready, bool first_cycle_insn_p)
22828 if (ready_try[n_ready])
22831 insn = get_ready_element (n_ready);
22832 insn_size = min_insn_size (insn);
22834 if (/* If this is a too long an insn for a secondary decoder ... */
22835 (!first_cycle_insn_p
22836 && insn_size > core2i7_secondary_decoder_max_insn_size)
22837 /* ... or it would not fit into the ifetch block ... */
22838 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22839 /* ... or the decoder is full already ... */
22840 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22841 /* ... mask the insn out. */
22843 ready_try[n_ready] = 1;
22845 if (data->ready_try_change)
22846 SET_BIT (data->ready_try_change, n_ready);
22851 /* Prepare for a new round of multipass lookahead scheduling. */
22853 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22854 bool first_cycle_insn_p)
22856 ix86_first_cycle_multipass_data_t data
22857 = (ix86_first_cycle_multipass_data_t) _data;
22858 const_ix86_first_cycle_multipass_data_t prev_data
22859 = ix86_first_cycle_multipass_data;
22861 /* Restore the state from the end of the previous round. */
22862 data->ifetch_block_len = prev_data->ifetch_block_len;
22863 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22865 /* Filter instructions that cannot be issued on current cycle due to
22866 decoder restrictions. */
22867 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22868 first_cycle_insn_p);
22871 /* INSN is being issued in current solution. Account for its impact on
22872 the decoder model. */
22874 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22875 rtx insn, const void *_prev_data)
22877 ix86_first_cycle_multipass_data_t data
22878 = (ix86_first_cycle_multipass_data_t) _data;
22879 const_ix86_first_cycle_multipass_data_t prev_data
22880 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22882 int insn_size = min_insn_size (insn);
22884 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22885 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22886 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22887 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22889 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22890 if (!data->ready_try_change)
22892 data->ready_try_change = sbitmap_alloc (n_ready);
22893 data->ready_try_change_size = n_ready;
22895 else if (data->ready_try_change_size < n_ready)
22897 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22899 data->ready_try_change_size = n_ready;
22901 sbitmap_zero (data->ready_try_change);
22903 /* Filter out insns from ready_try that the core will not be able to issue
22904 on current cycle due to decoder. */
22905 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22909 /* Revert the effect on ready_try. */
22911 core2i7_first_cycle_multipass_backtrack (const void *_data,
22913 int n_ready ATTRIBUTE_UNUSED)
22915 const_ix86_first_cycle_multipass_data_t data
22916 = (const_ix86_first_cycle_multipass_data_t) _data;
22917 unsigned int i = 0;
22918 sbitmap_iterator sbi;
22920 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22921 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22927 /* Save the result of multipass lookahead scheduling for the next round. */
22929 core2i7_first_cycle_multipass_end (const void *_data)
22931 const_ix86_first_cycle_multipass_data_t data
22932 = (const_ix86_first_cycle_multipass_data_t) _data;
22933 ix86_first_cycle_multipass_data_t next_data
22934 = ix86_first_cycle_multipass_data;
22938 next_data->ifetch_block_len = data->ifetch_block_len;
22939 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22943 /* Deallocate target data. */
22945 core2i7_first_cycle_multipass_fini (void *_data)
22947 ix86_first_cycle_multipass_data_t data
22948 = (ix86_first_cycle_multipass_data_t) _data;
22950 if (data->ready_try_change)
22952 sbitmap_free (data->ready_try_change);
22953 data->ready_try_change = NULL;
22954 data->ready_try_change_size = 0;
22958 /* Prepare for scheduling pass. */
22960 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22961 int verbose ATTRIBUTE_UNUSED,
22962 int max_uid ATTRIBUTE_UNUSED)
22964 /* Install scheduling hooks for current CPU. Some of these hooks are used
22965 in time-critical parts of the scheduler, so we only set them up when
22966 they are actually used. */
22969 case PROCESSOR_CORE2_32:
22970 case PROCESSOR_CORE2_64:
22971 case PROCESSOR_COREI7_32:
22972 case PROCESSOR_COREI7_64:
22973 targetm.sched.dfa_post_advance_cycle
22974 = core2i7_dfa_post_advance_cycle;
22975 targetm.sched.first_cycle_multipass_init
22976 = core2i7_first_cycle_multipass_init;
22977 targetm.sched.first_cycle_multipass_begin
22978 = core2i7_first_cycle_multipass_begin;
22979 targetm.sched.first_cycle_multipass_issue
22980 = core2i7_first_cycle_multipass_issue;
22981 targetm.sched.first_cycle_multipass_backtrack
22982 = core2i7_first_cycle_multipass_backtrack;
22983 targetm.sched.first_cycle_multipass_end
22984 = core2i7_first_cycle_multipass_end;
22985 targetm.sched.first_cycle_multipass_fini
22986 = core2i7_first_cycle_multipass_fini;
22988 /* Set decoder parameters. */
22989 core2i7_secondary_decoder_max_insn_size = 8;
22990 core2i7_ifetch_block_size = 16;
22991 core2i7_ifetch_block_max_insns = 6;
22995 targetm.sched.dfa_post_advance_cycle = NULL;
22996 targetm.sched.first_cycle_multipass_init = NULL;
22997 targetm.sched.first_cycle_multipass_begin = NULL;
22998 targetm.sched.first_cycle_multipass_issue = NULL;
22999 targetm.sched.first_cycle_multipass_backtrack = NULL;
23000 targetm.sched.first_cycle_multipass_end = NULL;
23001 targetm.sched.first_cycle_multipass_fini = NULL;
23007 /* Compute the alignment given to a constant that is being placed in memory.
23008 EXP is the constant and ALIGN is the alignment that the object would
23010 The value of this function is used instead of that alignment to align
23014 ix86_constant_alignment (tree exp, int align)
23016 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
23017 || TREE_CODE (exp) == INTEGER_CST)
23019 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
23021 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
23024 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
23025 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
23026 return BITS_PER_WORD;
23031 /* Compute the alignment for a static variable.
23032 TYPE is the data type, and ALIGN is the alignment that
23033 the object would ordinarily have. The value of this function is used
23034 instead of that alignment to align the object. */
23037 ix86_data_alignment (tree type, int align)
23039 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
23041 if (AGGREGATE_TYPE_P (type)
23042 && TYPE_SIZE (type)
23043 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23044 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
23045 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
23046 && align < max_align)
23049 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23050 to 16byte boundary. */
23053 if (AGGREGATE_TYPE_P (type)
23054 && TYPE_SIZE (type)
23055 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23056 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
23057 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23061 if (TREE_CODE (type) == ARRAY_TYPE)
23063 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23065 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23068 else if (TREE_CODE (type) == COMPLEX_TYPE)
23071 if (TYPE_MODE (type) == DCmode && align < 64)
23073 if ((TYPE_MODE (type) == XCmode
23074 || TYPE_MODE (type) == TCmode) && align < 128)
23077 else if ((TREE_CODE (type) == RECORD_TYPE
23078 || TREE_CODE (type) == UNION_TYPE
23079 || TREE_CODE (type) == QUAL_UNION_TYPE)
23080 && TYPE_FIELDS (type))
23082 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23084 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23087 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23088 || TREE_CODE (type) == INTEGER_TYPE)
23090 if (TYPE_MODE (type) == DFmode && align < 64)
23092 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23099 /* Compute the alignment for a local variable or a stack slot. EXP is
23100 the data type or decl itself, MODE is the widest mode available and
23101 ALIGN is the alignment that the object would ordinarily have. The
23102 value of this macro is used instead of that alignment to align the
23106 ix86_local_alignment (tree exp, enum machine_mode mode,
23107 unsigned int align)
23111 if (exp && DECL_P (exp))
23113 type = TREE_TYPE (exp);
23122 /* Don't do dynamic stack realignment for long long objects with
23123 -mpreferred-stack-boundary=2. */
23126 && ix86_preferred_stack_boundary < 64
23127 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
23128 && (!type || !TYPE_USER_ALIGN (type))
23129 && (!decl || !DECL_USER_ALIGN (decl)))
23132 /* If TYPE is NULL, we are allocating a stack slot for caller-save
23133 register in MODE. We will return the largest alignment of XF
23137 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
23138 align = GET_MODE_ALIGNMENT (DFmode);
23142 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23143 to 16byte boundary. Exact wording is:
23145 An array uses the same alignment as its elements, except that a local or
23146 global array variable of length at least 16 bytes or
23147 a C99 variable-length array variable always has alignment of at least 16 bytes.
23149 This was added to allow use of aligned SSE instructions at arrays. This
23150 rule is meant for static storage (where compiler can not do the analysis
23151 by itself). We follow it for automatic variables only when convenient.
23152 We fully control everything in the function compiled and functions from
23153 other unit can not rely on the alignment.
23155 Exclude va_list type. It is the common case of local array where
23156 we can not benefit from the alignment. */
23157 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
23160 if (AGGREGATE_TYPE_P (type)
23161 && (va_list_type_node == NULL_TREE
23162 || (TYPE_MAIN_VARIANT (type)
23163 != TYPE_MAIN_VARIANT (va_list_type_node)))
23164 && TYPE_SIZE (type)
23165 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23166 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
23167 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23170 if (TREE_CODE (type) == ARRAY_TYPE)
23172 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23174 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23177 else if (TREE_CODE (type) == COMPLEX_TYPE)
23179 if (TYPE_MODE (type) == DCmode && align < 64)
23181 if ((TYPE_MODE (type) == XCmode
23182 || TYPE_MODE (type) == TCmode) && align < 128)
23185 else if ((TREE_CODE (type) == RECORD_TYPE
23186 || TREE_CODE (type) == UNION_TYPE
23187 || TREE_CODE (type) == QUAL_UNION_TYPE)
23188 && TYPE_FIELDS (type))
23190 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23192 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23195 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23196 || TREE_CODE (type) == INTEGER_TYPE)
23199 if (TYPE_MODE (type) == DFmode && align < 64)
23201 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23207 /* Compute the minimum required alignment for dynamic stack realignment
23208 purposes for a local variable, parameter or a stack slot. EXP is
23209 the data type or decl itself, MODE is its mode and ALIGN is the
23210 alignment that the object would ordinarily have. */
23213 ix86_minimum_alignment (tree exp, enum machine_mode mode,
23214 unsigned int align)
23218 if (exp && DECL_P (exp))
23220 type = TREE_TYPE (exp);
23229 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
23232 /* Don't do dynamic stack realignment for long long objects with
23233 -mpreferred-stack-boundary=2. */
23234 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
23235 && (!type || !TYPE_USER_ALIGN (type))
23236 && (!decl || !DECL_USER_ALIGN (decl)))
23242 /* Find a location for the static chain incoming to a nested function.
23243 This is a register, unless all free registers are used by arguments. */
23246 ix86_static_chain (const_tree fndecl, bool incoming_p)
23250 if (!DECL_STATIC_CHAIN (fndecl))
23255 /* We always use R10 in 64-bit mode. */
23263 /* By default in 32-bit mode we use ECX to pass the static chain. */
23266 fntype = TREE_TYPE (fndecl);
23267 ccvt = ix86_get_callcvt (fntype);
23268 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
23270 /* Fastcall functions use ecx/edx for arguments, which leaves
23271 us with EAX for the static chain.
23272 Thiscall functions use ecx for arguments, which also
23273 leaves us with EAX for the static chain. */
23276 else if (ix86_function_regparm (fntype, fndecl) == 3)
23278 /* For regparm 3, we have no free call-clobbered registers in
23279 which to store the static chain. In order to implement this,
23280 we have the trampoline push the static chain to the stack.
23281 However, we can't push a value below the return address when
23282 we call the nested function directly, so we have to use an
23283 alternate entry point. For this we use ESI, and have the
23284 alternate entry point push ESI, so that things appear the
23285 same once we're executing the nested function. */
23288 if (fndecl == current_function_decl)
23289 ix86_static_chain_on_stack = true;
23290 return gen_frame_mem (SImode,
23291 plus_constant (arg_pointer_rtx, -8));
23297 return gen_rtx_REG (Pmode, regno);
23300 /* Emit RTL insns to initialize the variable parts of a trampoline.
23301 FNDECL is the decl of the target address; M_TRAMP is a MEM for
23302 the trampoline, and CHAIN_VALUE is an RTX for the static chain
23303 to be passed to the target function. */
23306 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
23310 fnaddr = XEXP (DECL_RTL (fndecl), 0);
23317 /* Depending on the static chain location, either load a register
23318 with a constant, or push the constant to the stack. All of the
23319 instructions are the same size. */
23320 chain = ix86_static_chain (fndecl, true);
23323 if (REGNO (chain) == CX_REG)
23325 else if (REGNO (chain) == AX_REG)
23328 gcc_unreachable ();
23333 mem = adjust_address (m_tramp, QImode, 0);
23334 emit_move_insn (mem, gen_int_mode (opcode, QImode));
23336 mem = adjust_address (m_tramp, SImode, 1);
23337 emit_move_insn (mem, chain_value);
23339 /* Compute offset from the end of the jmp to the target function.
23340 In the case in which the trampoline stores the static chain on
23341 the stack, we need to skip the first insn which pushes the
23342 (call-saved) register static chain; this push is 1 byte. */
23343 disp = expand_binop (SImode, sub_optab, fnaddr,
23344 plus_constant (XEXP (m_tramp, 0),
23345 MEM_P (chain) ? 9 : 10),
23346 NULL_RTX, 1, OPTAB_DIRECT);
23348 mem = adjust_address (m_tramp, QImode, 5);
23349 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
23351 mem = adjust_address (m_tramp, SImode, 6);
23352 emit_move_insn (mem, disp);
23358 /* Load the function address to r11. Try to load address using
23359 the shorter movl instead of movabs. We may want to support
23360 movq for kernel mode, but kernel does not use trampolines at
23362 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
23364 fnaddr = copy_to_mode_reg (DImode, fnaddr);
23366 mem = adjust_address (m_tramp, HImode, offset);
23367 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
23369 mem = adjust_address (m_tramp, SImode, offset + 2);
23370 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
23375 mem = adjust_address (m_tramp, HImode, offset);
23376 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
23378 mem = adjust_address (m_tramp, DImode, offset + 2);
23379 emit_move_insn (mem, fnaddr);
23383 /* Load static chain using movabs to r10. */
23384 mem = adjust_address (m_tramp, HImode, offset);
23385 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
23387 mem = adjust_address (m_tramp, DImode, offset + 2);
23388 emit_move_insn (mem, chain_value);
23391 /* Jump to r11; the last (unused) byte is a nop, only there to
23392 pad the write out to a single 32-bit store. */
23393 mem = adjust_address (m_tramp, SImode, offset);
23394 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
23397 gcc_assert (offset <= TRAMPOLINE_SIZE);
23400 #ifdef ENABLE_EXECUTE_STACK
23401 #ifdef CHECK_EXECUTE_STACK_ENABLED
23402 if (CHECK_EXECUTE_STACK_ENABLED)
23404 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
23405 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
23409 /* The following file contains several enumerations and data structures
23410 built from the definitions in i386-builtin-types.def. */
23412 #include "i386-builtin-types.inc"
23414 /* Table for the ix86 builtin non-function types. */
23415 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
23417 /* Retrieve an element from the above table, building some of
23418 the types lazily. */
23421 ix86_get_builtin_type (enum ix86_builtin_type tcode)
23423 unsigned int index;
23426 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
23428 type = ix86_builtin_type_tab[(int) tcode];
23432 gcc_assert (tcode > IX86_BT_LAST_PRIM);
23433 if (tcode <= IX86_BT_LAST_VECT)
23435 enum machine_mode mode;
23437 index = tcode - IX86_BT_LAST_PRIM - 1;
23438 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
23439 mode = ix86_builtin_type_vect_mode[index];
23441 type = build_vector_type_for_mode (itype, mode);
23447 index = tcode - IX86_BT_LAST_VECT - 1;
23448 if (tcode <= IX86_BT_LAST_PTR)
23449 quals = TYPE_UNQUALIFIED;
23451 quals = TYPE_QUAL_CONST;
23453 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
23454 if (quals != TYPE_UNQUALIFIED)
23455 itype = build_qualified_type (itype, quals);
23457 type = build_pointer_type (itype);
23460 ix86_builtin_type_tab[(int) tcode] = type;
23464 /* Table for the ix86 builtin function types. */
23465 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23467 /* Retrieve an element from the above table, building some of
23468 the types lazily. */
23471 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23475 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23477 type = ix86_builtin_func_type_tab[(int) tcode];
23481 if (tcode <= IX86_BT_LAST_FUNC)
23483 unsigned start = ix86_builtin_func_start[(int) tcode];
23484 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23485 tree rtype, atype, args = void_list_node;
23488 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23489 for (i = after - 1; i > start; --i)
23491 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23492 args = tree_cons (NULL, atype, args);
23495 type = build_function_type (rtype, args);
23499 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23500 enum ix86_builtin_func_type icode;
23502 icode = ix86_builtin_func_alias_base[index];
23503 type = ix86_get_builtin_func_type (icode);
23506 ix86_builtin_func_type_tab[(int) tcode] = type;
23511 /* Codes for all the SSE/MMX builtins. */
23514 IX86_BUILTIN_ADDPS,
23515 IX86_BUILTIN_ADDSS,
23516 IX86_BUILTIN_DIVPS,
23517 IX86_BUILTIN_DIVSS,
23518 IX86_BUILTIN_MULPS,
23519 IX86_BUILTIN_MULSS,
23520 IX86_BUILTIN_SUBPS,
23521 IX86_BUILTIN_SUBSS,
23523 IX86_BUILTIN_CMPEQPS,
23524 IX86_BUILTIN_CMPLTPS,
23525 IX86_BUILTIN_CMPLEPS,
23526 IX86_BUILTIN_CMPGTPS,
23527 IX86_BUILTIN_CMPGEPS,
23528 IX86_BUILTIN_CMPNEQPS,
23529 IX86_BUILTIN_CMPNLTPS,
23530 IX86_BUILTIN_CMPNLEPS,
23531 IX86_BUILTIN_CMPNGTPS,
23532 IX86_BUILTIN_CMPNGEPS,
23533 IX86_BUILTIN_CMPORDPS,
23534 IX86_BUILTIN_CMPUNORDPS,
23535 IX86_BUILTIN_CMPEQSS,
23536 IX86_BUILTIN_CMPLTSS,
23537 IX86_BUILTIN_CMPLESS,
23538 IX86_BUILTIN_CMPNEQSS,
23539 IX86_BUILTIN_CMPNLTSS,
23540 IX86_BUILTIN_CMPNLESS,
23541 IX86_BUILTIN_CMPNGTSS,
23542 IX86_BUILTIN_CMPNGESS,
23543 IX86_BUILTIN_CMPORDSS,
23544 IX86_BUILTIN_CMPUNORDSS,
23546 IX86_BUILTIN_COMIEQSS,
23547 IX86_BUILTIN_COMILTSS,
23548 IX86_BUILTIN_COMILESS,
23549 IX86_BUILTIN_COMIGTSS,
23550 IX86_BUILTIN_COMIGESS,
23551 IX86_BUILTIN_COMINEQSS,
23552 IX86_BUILTIN_UCOMIEQSS,
23553 IX86_BUILTIN_UCOMILTSS,
23554 IX86_BUILTIN_UCOMILESS,
23555 IX86_BUILTIN_UCOMIGTSS,
23556 IX86_BUILTIN_UCOMIGESS,
23557 IX86_BUILTIN_UCOMINEQSS,
23559 IX86_BUILTIN_CVTPI2PS,
23560 IX86_BUILTIN_CVTPS2PI,
23561 IX86_BUILTIN_CVTSI2SS,
23562 IX86_BUILTIN_CVTSI642SS,
23563 IX86_BUILTIN_CVTSS2SI,
23564 IX86_BUILTIN_CVTSS2SI64,
23565 IX86_BUILTIN_CVTTPS2PI,
23566 IX86_BUILTIN_CVTTSS2SI,
23567 IX86_BUILTIN_CVTTSS2SI64,
23569 IX86_BUILTIN_MAXPS,
23570 IX86_BUILTIN_MAXSS,
23571 IX86_BUILTIN_MINPS,
23572 IX86_BUILTIN_MINSS,
23574 IX86_BUILTIN_LOADUPS,
23575 IX86_BUILTIN_STOREUPS,
23576 IX86_BUILTIN_MOVSS,
23578 IX86_BUILTIN_MOVHLPS,
23579 IX86_BUILTIN_MOVLHPS,
23580 IX86_BUILTIN_LOADHPS,
23581 IX86_BUILTIN_LOADLPS,
23582 IX86_BUILTIN_STOREHPS,
23583 IX86_BUILTIN_STORELPS,
23585 IX86_BUILTIN_MASKMOVQ,
23586 IX86_BUILTIN_MOVMSKPS,
23587 IX86_BUILTIN_PMOVMSKB,
23589 IX86_BUILTIN_MOVNTPS,
23590 IX86_BUILTIN_MOVNTQ,
23592 IX86_BUILTIN_LOADDQU,
23593 IX86_BUILTIN_STOREDQU,
23595 IX86_BUILTIN_PACKSSWB,
23596 IX86_BUILTIN_PACKSSDW,
23597 IX86_BUILTIN_PACKUSWB,
23599 IX86_BUILTIN_PADDB,
23600 IX86_BUILTIN_PADDW,
23601 IX86_BUILTIN_PADDD,
23602 IX86_BUILTIN_PADDQ,
23603 IX86_BUILTIN_PADDSB,
23604 IX86_BUILTIN_PADDSW,
23605 IX86_BUILTIN_PADDUSB,
23606 IX86_BUILTIN_PADDUSW,
23607 IX86_BUILTIN_PSUBB,
23608 IX86_BUILTIN_PSUBW,
23609 IX86_BUILTIN_PSUBD,
23610 IX86_BUILTIN_PSUBQ,
23611 IX86_BUILTIN_PSUBSB,
23612 IX86_BUILTIN_PSUBSW,
23613 IX86_BUILTIN_PSUBUSB,
23614 IX86_BUILTIN_PSUBUSW,
23617 IX86_BUILTIN_PANDN,
23621 IX86_BUILTIN_PAVGB,
23622 IX86_BUILTIN_PAVGW,
23624 IX86_BUILTIN_PCMPEQB,
23625 IX86_BUILTIN_PCMPEQW,
23626 IX86_BUILTIN_PCMPEQD,
23627 IX86_BUILTIN_PCMPGTB,
23628 IX86_BUILTIN_PCMPGTW,
23629 IX86_BUILTIN_PCMPGTD,
23631 IX86_BUILTIN_PMADDWD,
23633 IX86_BUILTIN_PMAXSW,
23634 IX86_BUILTIN_PMAXUB,
23635 IX86_BUILTIN_PMINSW,
23636 IX86_BUILTIN_PMINUB,
23638 IX86_BUILTIN_PMULHUW,
23639 IX86_BUILTIN_PMULHW,
23640 IX86_BUILTIN_PMULLW,
23642 IX86_BUILTIN_PSADBW,
23643 IX86_BUILTIN_PSHUFW,
23645 IX86_BUILTIN_PSLLW,
23646 IX86_BUILTIN_PSLLD,
23647 IX86_BUILTIN_PSLLQ,
23648 IX86_BUILTIN_PSRAW,
23649 IX86_BUILTIN_PSRAD,
23650 IX86_BUILTIN_PSRLW,
23651 IX86_BUILTIN_PSRLD,
23652 IX86_BUILTIN_PSRLQ,
23653 IX86_BUILTIN_PSLLWI,
23654 IX86_BUILTIN_PSLLDI,
23655 IX86_BUILTIN_PSLLQI,
23656 IX86_BUILTIN_PSRAWI,
23657 IX86_BUILTIN_PSRADI,
23658 IX86_BUILTIN_PSRLWI,
23659 IX86_BUILTIN_PSRLDI,
23660 IX86_BUILTIN_PSRLQI,
23662 IX86_BUILTIN_PUNPCKHBW,
23663 IX86_BUILTIN_PUNPCKHWD,
23664 IX86_BUILTIN_PUNPCKHDQ,
23665 IX86_BUILTIN_PUNPCKLBW,
23666 IX86_BUILTIN_PUNPCKLWD,
23667 IX86_BUILTIN_PUNPCKLDQ,
23669 IX86_BUILTIN_SHUFPS,
23671 IX86_BUILTIN_RCPPS,
23672 IX86_BUILTIN_RCPSS,
23673 IX86_BUILTIN_RSQRTPS,
23674 IX86_BUILTIN_RSQRTPS_NR,
23675 IX86_BUILTIN_RSQRTSS,
23676 IX86_BUILTIN_RSQRTF,
23677 IX86_BUILTIN_SQRTPS,
23678 IX86_BUILTIN_SQRTPS_NR,
23679 IX86_BUILTIN_SQRTSS,
23681 IX86_BUILTIN_UNPCKHPS,
23682 IX86_BUILTIN_UNPCKLPS,
23684 IX86_BUILTIN_ANDPS,
23685 IX86_BUILTIN_ANDNPS,
23687 IX86_BUILTIN_XORPS,
23690 IX86_BUILTIN_LDMXCSR,
23691 IX86_BUILTIN_STMXCSR,
23692 IX86_BUILTIN_SFENCE,
23694 /* 3DNow! Original */
23695 IX86_BUILTIN_FEMMS,
23696 IX86_BUILTIN_PAVGUSB,
23697 IX86_BUILTIN_PF2ID,
23698 IX86_BUILTIN_PFACC,
23699 IX86_BUILTIN_PFADD,
23700 IX86_BUILTIN_PFCMPEQ,
23701 IX86_BUILTIN_PFCMPGE,
23702 IX86_BUILTIN_PFCMPGT,
23703 IX86_BUILTIN_PFMAX,
23704 IX86_BUILTIN_PFMIN,
23705 IX86_BUILTIN_PFMUL,
23706 IX86_BUILTIN_PFRCP,
23707 IX86_BUILTIN_PFRCPIT1,
23708 IX86_BUILTIN_PFRCPIT2,
23709 IX86_BUILTIN_PFRSQIT1,
23710 IX86_BUILTIN_PFRSQRT,
23711 IX86_BUILTIN_PFSUB,
23712 IX86_BUILTIN_PFSUBR,
23713 IX86_BUILTIN_PI2FD,
23714 IX86_BUILTIN_PMULHRW,
23716 /* 3DNow! Athlon Extensions */
23717 IX86_BUILTIN_PF2IW,
23718 IX86_BUILTIN_PFNACC,
23719 IX86_BUILTIN_PFPNACC,
23720 IX86_BUILTIN_PI2FW,
23721 IX86_BUILTIN_PSWAPDSI,
23722 IX86_BUILTIN_PSWAPDSF,
23725 IX86_BUILTIN_ADDPD,
23726 IX86_BUILTIN_ADDSD,
23727 IX86_BUILTIN_DIVPD,
23728 IX86_BUILTIN_DIVSD,
23729 IX86_BUILTIN_MULPD,
23730 IX86_BUILTIN_MULSD,
23731 IX86_BUILTIN_SUBPD,
23732 IX86_BUILTIN_SUBSD,
23734 IX86_BUILTIN_CMPEQPD,
23735 IX86_BUILTIN_CMPLTPD,
23736 IX86_BUILTIN_CMPLEPD,
23737 IX86_BUILTIN_CMPGTPD,
23738 IX86_BUILTIN_CMPGEPD,
23739 IX86_BUILTIN_CMPNEQPD,
23740 IX86_BUILTIN_CMPNLTPD,
23741 IX86_BUILTIN_CMPNLEPD,
23742 IX86_BUILTIN_CMPNGTPD,
23743 IX86_BUILTIN_CMPNGEPD,
23744 IX86_BUILTIN_CMPORDPD,
23745 IX86_BUILTIN_CMPUNORDPD,
23746 IX86_BUILTIN_CMPEQSD,
23747 IX86_BUILTIN_CMPLTSD,
23748 IX86_BUILTIN_CMPLESD,
23749 IX86_BUILTIN_CMPNEQSD,
23750 IX86_BUILTIN_CMPNLTSD,
23751 IX86_BUILTIN_CMPNLESD,
23752 IX86_BUILTIN_CMPORDSD,
23753 IX86_BUILTIN_CMPUNORDSD,
23755 IX86_BUILTIN_COMIEQSD,
23756 IX86_BUILTIN_COMILTSD,
23757 IX86_BUILTIN_COMILESD,
23758 IX86_BUILTIN_COMIGTSD,
23759 IX86_BUILTIN_COMIGESD,
23760 IX86_BUILTIN_COMINEQSD,
23761 IX86_BUILTIN_UCOMIEQSD,
23762 IX86_BUILTIN_UCOMILTSD,
23763 IX86_BUILTIN_UCOMILESD,
23764 IX86_BUILTIN_UCOMIGTSD,
23765 IX86_BUILTIN_UCOMIGESD,
23766 IX86_BUILTIN_UCOMINEQSD,
23768 IX86_BUILTIN_MAXPD,
23769 IX86_BUILTIN_MAXSD,
23770 IX86_BUILTIN_MINPD,
23771 IX86_BUILTIN_MINSD,
23773 IX86_BUILTIN_ANDPD,
23774 IX86_BUILTIN_ANDNPD,
23776 IX86_BUILTIN_XORPD,
23778 IX86_BUILTIN_SQRTPD,
23779 IX86_BUILTIN_SQRTSD,
23781 IX86_BUILTIN_UNPCKHPD,
23782 IX86_BUILTIN_UNPCKLPD,
23784 IX86_BUILTIN_SHUFPD,
23786 IX86_BUILTIN_LOADUPD,
23787 IX86_BUILTIN_STOREUPD,
23788 IX86_BUILTIN_MOVSD,
23790 IX86_BUILTIN_LOADHPD,
23791 IX86_BUILTIN_LOADLPD,
23793 IX86_BUILTIN_CVTDQ2PD,
23794 IX86_BUILTIN_CVTDQ2PS,
23796 IX86_BUILTIN_CVTPD2DQ,
23797 IX86_BUILTIN_CVTPD2PI,
23798 IX86_BUILTIN_CVTPD2PS,
23799 IX86_BUILTIN_CVTTPD2DQ,
23800 IX86_BUILTIN_CVTTPD2PI,
23802 IX86_BUILTIN_CVTPI2PD,
23803 IX86_BUILTIN_CVTSI2SD,
23804 IX86_BUILTIN_CVTSI642SD,
23806 IX86_BUILTIN_CVTSD2SI,
23807 IX86_BUILTIN_CVTSD2SI64,
23808 IX86_BUILTIN_CVTSD2SS,
23809 IX86_BUILTIN_CVTSS2SD,
23810 IX86_BUILTIN_CVTTSD2SI,
23811 IX86_BUILTIN_CVTTSD2SI64,
23813 IX86_BUILTIN_CVTPS2DQ,
23814 IX86_BUILTIN_CVTPS2PD,
23815 IX86_BUILTIN_CVTTPS2DQ,
23817 IX86_BUILTIN_MOVNTI,
23818 IX86_BUILTIN_MOVNTPD,
23819 IX86_BUILTIN_MOVNTDQ,
23821 IX86_BUILTIN_MOVQ128,
23824 IX86_BUILTIN_MASKMOVDQU,
23825 IX86_BUILTIN_MOVMSKPD,
23826 IX86_BUILTIN_PMOVMSKB128,
23828 IX86_BUILTIN_PACKSSWB128,
23829 IX86_BUILTIN_PACKSSDW128,
23830 IX86_BUILTIN_PACKUSWB128,
23832 IX86_BUILTIN_PADDB128,
23833 IX86_BUILTIN_PADDW128,
23834 IX86_BUILTIN_PADDD128,
23835 IX86_BUILTIN_PADDQ128,
23836 IX86_BUILTIN_PADDSB128,
23837 IX86_BUILTIN_PADDSW128,
23838 IX86_BUILTIN_PADDUSB128,
23839 IX86_BUILTIN_PADDUSW128,
23840 IX86_BUILTIN_PSUBB128,
23841 IX86_BUILTIN_PSUBW128,
23842 IX86_BUILTIN_PSUBD128,
23843 IX86_BUILTIN_PSUBQ128,
23844 IX86_BUILTIN_PSUBSB128,
23845 IX86_BUILTIN_PSUBSW128,
23846 IX86_BUILTIN_PSUBUSB128,
23847 IX86_BUILTIN_PSUBUSW128,
23849 IX86_BUILTIN_PAND128,
23850 IX86_BUILTIN_PANDN128,
23851 IX86_BUILTIN_POR128,
23852 IX86_BUILTIN_PXOR128,
23854 IX86_BUILTIN_PAVGB128,
23855 IX86_BUILTIN_PAVGW128,
23857 IX86_BUILTIN_PCMPEQB128,
23858 IX86_BUILTIN_PCMPEQW128,
23859 IX86_BUILTIN_PCMPEQD128,
23860 IX86_BUILTIN_PCMPGTB128,
23861 IX86_BUILTIN_PCMPGTW128,
23862 IX86_BUILTIN_PCMPGTD128,
23864 IX86_BUILTIN_PMADDWD128,
23866 IX86_BUILTIN_PMAXSW128,
23867 IX86_BUILTIN_PMAXUB128,
23868 IX86_BUILTIN_PMINSW128,
23869 IX86_BUILTIN_PMINUB128,
23871 IX86_BUILTIN_PMULUDQ,
23872 IX86_BUILTIN_PMULUDQ128,
23873 IX86_BUILTIN_PMULHUW128,
23874 IX86_BUILTIN_PMULHW128,
23875 IX86_BUILTIN_PMULLW128,
23877 IX86_BUILTIN_PSADBW128,
23878 IX86_BUILTIN_PSHUFHW,
23879 IX86_BUILTIN_PSHUFLW,
23880 IX86_BUILTIN_PSHUFD,
23882 IX86_BUILTIN_PSLLDQI128,
23883 IX86_BUILTIN_PSLLWI128,
23884 IX86_BUILTIN_PSLLDI128,
23885 IX86_BUILTIN_PSLLQI128,
23886 IX86_BUILTIN_PSRAWI128,
23887 IX86_BUILTIN_PSRADI128,
23888 IX86_BUILTIN_PSRLDQI128,
23889 IX86_BUILTIN_PSRLWI128,
23890 IX86_BUILTIN_PSRLDI128,
23891 IX86_BUILTIN_PSRLQI128,
23893 IX86_BUILTIN_PSLLDQ128,
23894 IX86_BUILTIN_PSLLW128,
23895 IX86_BUILTIN_PSLLD128,
23896 IX86_BUILTIN_PSLLQ128,
23897 IX86_BUILTIN_PSRAW128,
23898 IX86_BUILTIN_PSRAD128,
23899 IX86_BUILTIN_PSRLW128,
23900 IX86_BUILTIN_PSRLD128,
23901 IX86_BUILTIN_PSRLQ128,
23903 IX86_BUILTIN_PUNPCKHBW128,
23904 IX86_BUILTIN_PUNPCKHWD128,
23905 IX86_BUILTIN_PUNPCKHDQ128,
23906 IX86_BUILTIN_PUNPCKHQDQ128,
23907 IX86_BUILTIN_PUNPCKLBW128,
23908 IX86_BUILTIN_PUNPCKLWD128,
23909 IX86_BUILTIN_PUNPCKLDQ128,
23910 IX86_BUILTIN_PUNPCKLQDQ128,
23912 IX86_BUILTIN_CLFLUSH,
23913 IX86_BUILTIN_MFENCE,
23914 IX86_BUILTIN_LFENCE,
23916 IX86_BUILTIN_BSRSI,
23917 IX86_BUILTIN_BSRDI,
23918 IX86_BUILTIN_RDPMC,
23919 IX86_BUILTIN_RDTSC,
23920 IX86_BUILTIN_RDTSCP,
23921 IX86_BUILTIN_ROLQI,
23922 IX86_BUILTIN_ROLHI,
23923 IX86_BUILTIN_RORQI,
23924 IX86_BUILTIN_RORHI,
23927 IX86_BUILTIN_ADDSUBPS,
23928 IX86_BUILTIN_HADDPS,
23929 IX86_BUILTIN_HSUBPS,
23930 IX86_BUILTIN_MOVSHDUP,
23931 IX86_BUILTIN_MOVSLDUP,
23932 IX86_BUILTIN_ADDSUBPD,
23933 IX86_BUILTIN_HADDPD,
23934 IX86_BUILTIN_HSUBPD,
23935 IX86_BUILTIN_LDDQU,
23937 IX86_BUILTIN_MONITOR,
23938 IX86_BUILTIN_MWAIT,
23941 IX86_BUILTIN_PHADDW,
23942 IX86_BUILTIN_PHADDD,
23943 IX86_BUILTIN_PHADDSW,
23944 IX86_BUILTIN_PHSUBW,
23945 IX86_BUILTIN_PHSUBD,
23946 IX86_BUILTIN_PHSUBSW,
23947 IX86_BUILTIN_PMADDUBSW,
23948 IX86_BUILTIN_PMULHRSW,
23949 IX86_BUILTIN_PSHUFB,
23950 IX86_BUILTIN_PSIGNB,
23951 IX86_BUILTIN_PSIGNW,
23952 IX86_BUILTIN_PSIGND,
23953 IX86_BUILTIN_PALIGNR,
23954 IX86_BUILTIN_PABSB,
23955 IX86_BUILTIN_PABSW,
23956 IX86_BUILTIN_PABSD,
23958 IX86_BUILTIN_PHADDW128,
23959 IX86_BUILTIN_PHADDD128,
23960 IX86_BUILTIN_PHADDSW128,
23961 IX86_BUILTIN_PHSUBW128,
23962 IX86_BUILTIN_PHSUBD128,
23963 IX86_BUILTIN_PHSUBSW128,
23964 IX86_BUILTIN_PMADDUBSW128,
23965 IX86_BUILTIN_PMULHRSW128,
23966 IX86_BUILTIN_PSHUFB128,
23967 IX86_BUILTIN_PSIGNB128,
23968 IX86_BUILTIN_PSIGNW128,
23969 IX86_BUILTIN_PSIGND128,
23970 IX86_BUILTIN_PALIGNR128,
23971 IX86_BUILTIN_PABSB128,
23972 IX86_BUILTIN_PABSW128,
23973 IX86_BUILTIN_PABSD128,
23975 /* AMDFAM10 - SSE4A New Instructions. */
23976 IX86_BUILTIN_MOVNTSD,
23977 IX86_BUILTIN_MOVNTSS,
23978 IX86_BUILTIN_EXTRQI,
23979 IX86_BUILTIN_EXTRQ,
23980 IX86_BUILTIN_INSERTQI,
23981 IX86_BUILTIN_INSERTQ,
23984 IX86_BUILTIN_BLENDPD,
23985 IX86_BUILTIN_BLENDPS,
23986 IX86_BUILTIN_BLENDVPD,
23987 IX86_BUILTIN_BLENDVPS,
23988 IX86_BUILTIN_PBLENDVB128,
23989 IX86_BUILTIN_PBLENDW128,
23994 IX86_BUILTIN_INSERTPS128,
23996 IX86_BUILTIN_MOVNTDQA,
23997 IX86_BUILTIN_MPSADBW128,
23998 IX86_BUILTIN_PACKUSDW128,
23999 IX86_BUILTIN_PCMPEQQ,
24000 IX86_BUILTIN_PHMINPOSUW128,
24002 IX86_BUILTIN_PMAXSB128,
24003 IX86_BUILTIN_PMAXSD128,
24004 IX86_BUILTIN_PMAXUD128,
24005 IX86_BUILTIN_PMAXUW128,
24007 IX86_BUILTIN_PMINSB128,
24008 IX86_BUILTIN_PMINSD128,
24009 IX86_BUILTIN_PMINUD128,
24010 IX86_BUILTIN_PMINUW128,
24012 IX86_BUILTIN_PMOVSXBW128,
24013 IX86_BUILTIN_PMOVSXBD128,
24014 IX86_BUILTIN_PMOVSXBQ128,
24015 IX86_BUILTIN_PMOVSXWD128,
24016 IX86_BUILTIN_PMOVSXWQ128,
24017 IX86_BUILTIN_PMOVSXDQ128,
24019 IX86_BUILTIN_PMOVZXBW128,
24020 IX86_BUILTIN_PMOVZXBD128,
24021 IX86_BUILTIN_PMOVZXBQ128,
24022 IX86_BUILTIN_PMOVZXWD128,
24023 IX86_BUILTIN_PMOVZXWQ128,
24024 IX86_BUILTIN_PMOVZXDQ128,
24026 IX86_BUILTIN_PMULDQ128,
24027 IX86_BUILTIN_PMULLD128,
24029 IX86_BUILTIN_ROUNDPD,
24030 IX86_BUILTIN_ROUNDPS,
24031 IX86_BUILTIN_ROUNDSD,
24032 IX86_BUILTIN_ROUNDSS,
24034 IX86_BUILTIN_FLOORPD,
24035 IX86_BUILTIN_CEILPD,
24036 IX86_BUILTIN_TRUNCPD,
24037 IX86_BUILTIN_RINTPD,
24038 IX86_BUILTIN_FLOORPS,
24039 IX86_BUILTIN_CEILPS,
24040 IX86_BUILTIN_TRUNCPS,
24041 IX86_BUILTIN_RINTPS,
24043 IX86_BUILTIN_PTESTZ,
24044 IX86_BUILTIN_PTESTC,
24045 IX86_BUILTIN_PTESTNZC,
24047 IX86_BUILTIN_VEC_INIT_V2SI,
24048 IX86_BUILTIN_VEC_INIT_V4HI,
24049 IX86_BUILTIN_VEC_INIT_V8QI,
24050 IX86_BUILTIN_VEC_EXT_V2DF,
24051 IX86_BUILTIN_VEC_EXT_V2DI,
24052 IX86_BUILTIN_VEC_EXT_V4SF,
24053 IX86_BUILTIN_VEC_EXT_V4SI,
24054 IX86_BUILTIN_VEC_EXT_V8HI,
24055 IX86_BUILTIN_VEC_EXT_V2SI,
24056 IX86_BUILTIN_VEC_EXT_V4HI,
24057 IX86_BUILTIN_VEC_EXT_V16QI,
24058 IX86_BUILTIN_VEC_SET_V2DI,
24059 IX86_BUILTIN_VEC_SET_V4SF,
24060 IX86_BUILTIN_VEC_SET_V4SI,
24061 IX86_BUILTIN_VEC_SET_V8HI,
24062 IX86_BUILTIN_VEC_SET_V4HI,
24063 IX86_BUILTIN_VEC_SET_V16QI,
24065 IX86_BUILTIN_VEC_PACK_SFIX,
24068 IX86_BUILTIN_CRC32QI,
24069 IX86_BUILTIN_CRC32HI,
24070 IX86_BUILTIN_CRC32SI,
24071 IX86_BUILTIN_CRC32DI,
24073 IX86_BUILTIN_PCMPESTRI128,
24074 IX86_BUILTIN_PCMPESTRM128,
24075 IX86_BUILTIN_PCMPESTRA128,
24076 IX86_BUILTIN_PCMPESTRC128,
24077 IX86_BUILTIN_PCMPESTRO128,
24078 IX86_BUILTIN_PCMPESTRS128,
24079 IX86_BUILTIN_PCMPESTRZ128,
24080 IX86_BUILTIN_PCMPISTRI128,
24081 IX86_BUILTIN_PCMPISTRM128,
24082 IX86_BUILTIN_PCMPISTRA128,
24083 IX86_BUILTIN_PCMPISTRC128,
24084 IX86_BUILTIN_PCMPISTRO128,
24085 IX86_BUILTIN_PCMPISTRS128,
24086 IX86_BUILTIN_PCMPISTRZ128,
24088 IX86_BUILTIN_PCMPGTQ,
24090 /* AES instructions */
24091 IX86_BUILTIN_AESENC128,
24092 IX86_BUILTIN_AESENCLAST128,
24093 IX86_BUILTIN_AESDEC128,
24094 IX86_BUILTIN_AESDECLAST128,
24095 IX86_BUILTIN_AESIMC128,
24096 IX86_BUILTIN_AESKEYGENASSIST128,
24098 /* PCLMUL instruction */
24099 IX86_BUILTIN_PCLMULQDQ128,
24102 IX86_BUILTIN_ADDPD256,
24103 IX86_BUILTIN_ADDPS256,
24104 IX86_BUILTIN_ADDSUBPD256,
24105 IX86_BUILTIN_ADDSUBPS256,
24106 IX86_BUILTIN_ANDPD256,
24107 IX86_BUILTIN_ANDPS256,
24108 IX86_BUILTIN_ANDNPD256,
24109 IX86_BUILTIN_ANDNPS256,
24110 IX86_BUILTIN_BLENDPD256,
24111 IX86_BUILTIN_BLENDPS256,
24112 IX86_BUILTIN_BLENDVPD256,
24113 IX86_BUILTIN_BLENDVPS256,
24114 IX86_BUILTIN_DIVPD256,
24115 IX86_BUILTIN_DIVPS256,
24116 IX86_BUILTIN_DPPS256,
24117 IX86_BUILTIN_HADDPD256,
24118 IX86_BUILTIN_HADDPS256,
24119 IX86_BUILTIN_HSUBPD256,
24120 IX86_BUILTIN_HSUBPS256,
24121 IX86_BUILTIN_MAXPD256,
24122 IX86_BUILTIN_MAXPS256,
24123 IX86_BUILTIN_MINPD256,
24124 IX86_BUILTIN_MINPS256,
24125 IX86_BUILTIN_MULPD256,
24126 IX86_BUILTIN_MULPS256,
24127 IX86_BUILTIN_ORPD256,
24128 IX86_BUILTIN_ORPS256,
24129 IX86_BUILTIN_SHUFPD256,
24130 IX86_BUILTIN_SHUFPS256,
24131 IX86_BUILTIN_SUBPD256,
24132 IX86_BUILTIN_SUBPS256,
24133 IX86_BUILTIN_XORPD256,
24134 IX86_BUILTIN_XORPS256,
24135 IX86_BUILTIN_CMPSD,
24136 IX86_BUILTIN_CMPSS,
24137 IX86_BUILTIN_CMPPD,
24138 IX86_BUILTIN_CMPPS,
24139 IX86_BUILTIN_CMPPD256,
24140 IX86_BUILTIN_CMPPS256,
24141 IX86_BUILTIN_CVTDQ2PD256,
24142 IX86_BUILTIN_CVTDQ2PS256,
24143 IX86_BUILTIN_CVTPD2PS256,
24144 IX86_BUILTIN_CVTPS2DQ256,
24145 IX86_BUILTIN_CVTPS2PD256,
24146 IX86_BUILTIN_CVTTPD2DQ256,
24147 IX86_BUILTIN_CVTPD2DQ256,
24148 IX86_BUILTIN_CVTTPS2DQ256,
24149 IX86_BUILTIN_EXTRACTF128PD256,
24150 IX86_BUILTIN_EXTRACTF128PS256,
24151 IX86_BUILTIN_EXTRACTF128SI256,
24152 IX86_BUILTIN_VZEROALL,
24153 IX86_BUILTIN_VZEROUPPER,
24154 IX86_BUILTIN_VPERMILVARPD,
24155 IX86_BUILTIN_VPERMILVARPS,
24156 IX86_BUILTIN_VPERMILVARPD256,
24157 IX86_BUILTIN_VPERMILVARPS256,
24158 IX86_BUILTIN_VPERMILPD,
24159 IX86_BUILTIN_VPERMILPS,
24160 IX86_BUILTIN_VPERMILPD256,
24161 IX86_BUILTIN_VPERMILPS256,
24162 IX86_BUILTIN_VPERMIL2PD,
24163 IX86_BUILTIN_VPERMIL2PS,
24164 IX86_BUILTIN_VPERMIL2PD256,
24165 IX86_BUILTIN_VPERMIL2PS256,
24166 IX86_BUILTIN_VPERM2F128PD256,
24167 IX86_BUILTIN_VPERM2F128PS256,
24168 IX86_BUILTIN_VPERM2F128SI256,
24169 IX86_BUILTIN_VBROADCASTSS,
24170 IX86_BUILTIN_VBROADCASTSD256,
24171 IX86_BUILTIN_VBROADCASTSS256,
24172 IX86_BUILTIN_VBROADCASTPD256,
24173 IX86_BUILTIN_VBROADCASTPS256,
24174 IX86_BUILTIN_VINSERTF128PD256,
24175 IX86_BUILTIN_VINSERTF128PS256,
24176 IX86_BUILTIN_VINSERTF128SI256,
24177 IX86_BUILTIN_LOADUPD256,
24178 IX86_BUILTIN_LOADUPS256,
24179 IX86_BUILTIN_STOREUPD256,
24180 IX86_BUILTIN_STOREUPS256,
24181 IX86_BUILTIN_LDDQU256,
24182 IX86_BUILTIN_MOVNTDQ256,
24183 IX86_BUILTIN_MOVNTPD256,
24184 IX86_BUILTIN_MOVNTPS256,
24185 IX86_BUILTIN_LOADDQU256,
24186 IX86_BUILTIN_STOREDQU256,
24187 IX86_BUILTIN_MASKLOADPD,
24188 IX86_BUILTIN_MASKLOADPS,
24189 IX86_BUILTIN_MASKSTOREPD,
24190 IX86_BUILTIN_MASKSTOREPS,
24191 IX86_BUILTIN_MASKLOADPD256,
24192 IX86_BUILTIN_MASKLOADPS256,
24193 IX86_BUILTIN_MASKSTOREPD256,
24194 IX86_BUILTIN_MASKSTOREPS256,
24195 IX86_BUILTIN_MOVSHDUP256,
24196 IX86_BUILTIN_MOVSLDUP256,
24197 IX86_BUILTIN_MOVDDUP256,
24199 IX86_BUILTIN_SQRTPD256,
24200 IX86_BUILTIN_SQRTPS256,
24201 IX86_BUILTIN_SQRTPS_NR256,
24202 IX86_BUILTIN_RSQRTPS256,
24203 IX86_BUILTIN_RSQRTPS_NR256,
24205 IX86_BUILTIN_RCPPS256,
24207 IX86_BUILTIN_ROUNDPD256,
24208 IX86_BUILTIN_ROUNDPS256,
24210 IX86_BUILTIN_FLOORPD256,
24211 IX86_BUILTIN_CEILPD256,
24212 IX86_BUILTIN_TRUNCPD256,
24213 IX86_BUILTIN_RINTPD256,
24214 IX86_BUILTIN_FLOORPS256,
24215 IX86_BUILTIN_CEILPS256,
24216 IX86_BUILTIN_TRUNCPS256,
24217 IX86_BUILTIN_RINTPS256,
24219 IX86_BUILTIN_UNPCKHPD256,
24220 IX86_BUILTIN_UNPCKLPD256,
24221 IX86_BUILTIN_UNPCKHPS256,
24222 IX86_BUILTIN_UNPCKLPS256,
24224 IX86_BUILTIN_SI256_SI,
24225 IX86_BUILTIN_PS256_PS,
24226 IX86_BUILTIN_PD256_PD,
24227 IX86_BUILTIN_SI_SI256,
24228 IX86_BUILTIN_PS_PS256,
24229 IX86_BUILTIN_PD_PD256,
24231 IX86_BUILTIN_VTESTZPD,
24232 IX86_BUILTIN_VTESTCPD,
24233 IX86_BUILTIN_VTESTNZCPD,
24234 IX86_BUILTIN_VTESTZPS,
24235 IX86_BUILTIN_VTESTCPS,
24236 IX86_BUILTIN_VTESTNZCPS,
24237 IX86_BUILTIN_VTESTZPD256,
24238 IX86_BUILTIN_VTESTCPD256,
24239 IX86_BUILTIN_VTESTNZCPD256,
24240 IX86_BUILTIN_VTESTZPS256,
24241 IX86_BUILTIN_VTESTCPS256,
24242 IX86_BUILTIN_VTESTNZCPS256,
24243 IX86_BUILTIN_PTESTZ256,
24244 IX86_BUILTIN_PTESTC256,
24245 IX86_BUILTIN_PTESTNZC256,
24247 IX86_BUILTIN_MOVMSKPD256,
24248 IX86_BUILTIN_MOVMSKPS256,
24250 /* TFmode support builtins. */
24252 IX86_BUILTIN_HUGE_VALQ,
24253 IX86_BUILTIN_FABSQ,
24254 IX86_BUILTIN_COPYSIGNQ,
24256 /* Vectorizer support builtins. */
24257 IX86_BUILTIN_CPYSGNPS,
24258 IX86_BUILTIN_CPYSGNPD,
24259 IX86_BUILTIN_CPYSGNPS256,
24260 IX86_BUILTIN_CPYSGNPD256,
24262 IX86_BUILTIN_CVTUDQ2PS,
24264 IX86_BUILTIN_VEC_PERM_V2DF,
24265 IX86_BUILTIN_VEC_PERM_V4SF,
24266 IX86_BUILTIN_VEC_PERM_V2DI,
24267 IX86_BUILTIN_VEC_PERM_V4SI,
24268 IX86_BUILTIN_VEC_PERM_V8HI,
24269 IX86_BUILTIN_VEC_PERM_V16QI,
24270 IX86_BUILTIN_VEC_PERM_V2DI_U,
24271 IX86_BUILTIN_VEC_PERM_V4SI_U,
24272 IX86_BUILTIN_VEC_PERM_V8HI_U,
24273 IX86_BUILTIN_VEC_PERM_V16QI_U,
24274 IX86_BUILTIN_VEC_PERM_V4DF,
24275 IX86_BUILTIN_VEC_PERM_V8SF,
24277 /* FMA4 and XOP instructions. */
24278 IX86_BUILTIN_VFMADDSS,
24279 IX86_BUILTIN_VFMADDSD,
24280 IX86_BUILTIN_VFMADDPS,
24281 IX86_BUILTIN_VFMADDPD,
24282 IX86_BUILTIN_VFMADDPS256,
24283 IX86_BUILTIN_VFMADDPD256,
24284 IX86_BUILTIN_VFMADDSUBPS,
24285 IX86_BUILTIN_VFMADDSUBPD,
24286 IX86_BUILTIN_VFMADDSUBPS256,
24287 IX86_BUILTIN_VFMADDSUBPD256,
24289 IX86_BUILTIN_VPCMOV,
24290 IX86_BUILTIN_VPCMOV_V2DI,
24291 IX86_BUILTIN_VPCMOV_V4SI,
24292 IX86_BUILTIN_VPCMOV_V8HI,
24293 IX86_BUILTIN_VPCMOV_V16QI,
24294 IX86_BUILTIN_VPCMOV_V4SF,
24295 IX86_BUILTIN_VPCMOV_V2DF,
24296 IX86_BUILTIN_VPCMOV256,
24297 IX86_BUILTIN_VPCMOV_V4DI256,
24298 IX86_BUILTIN_VPCMOV_V8SI256,
24299 IX86_BUILTIN_VPCMOV_V16HI256,
24300 IX86_BUILTIN_VPCMOV_V32QI256,
24301 IX86_BUILTIN_VPCMOV_V8SF256,
24302 IX86_BUILTIN_VPCMOV_V4DF256,
24304 IX86_BUILTIN_VPPERM,
24306 IX86_BUILTIN_VPMACSSWW,
24307 IX86_BUILTIN_VPMACSWW,
24308 IX86_BUILTIN_VPMACSSWD,
24309 IX86_BUILTIN_VPMACSWD,
24310 IX86_BUILTIN_VPMACSSDD,
24311 IX86_BUILTIN_VPMACSDD,
24312 IX86_BUILTIN_VPMACSSDQL,
24313 IX86_BUILTIN_VPMACSSDQH,
24314 IX86_BUILTIN_VPMACSDQL,
24315 IX86_BUILTIN_VPMACSDQH,
24316 IX86_BUILTIN_VPMADCSSWD,
24317 IX86_BUILTIN_VPMADCSWD,
24319 IX86_BUILTIN_VPHADDBW,
24320 IX86_BUILTIN_VPHADDBD,
24321 IX86_BUILTIN_VPHADDBQ,
24322 IX86_BUILTIN_VPHADDWD,
24323 IX86_BUILTIN_VPHADDWQ,
24324 IX86_BUILTIN_VPHADDDQ,
24325 IX86_BUILTIN_VPHADDUBW,
24326 IX86_BUILTIN_VPHADDUBD,
24327 IX86_BUILTIN_VPHADDUBQ,
24328 IX86_BUILTIN_VPHADDUWD,
24329 IX86_BUILTIN_VPHADDUWQ,
24330 IX86_BUILTIN_VPHADDUDQ,
24331 IX86_BUILTIN_VPHSUBBW,
24332 IX86_BUILTIN_VPHSUBWD,
24333 IX86_BUILTIN_VPHSUBDQ,
24335 IX86_BUILTIN_VPROTB,
24336 IX86_BUILTIN_VPROTW,
24337 IX86_BUILTIN_VPROTD,
24338 IX86_BUILTIN_VPROTQ,
24339 IX86_BUILTIN_VPROTB_IMM,
24340 IX86_BUILTIN_VPROTW_IMM,
24341 IX86_BUILTIN_VPROTD_IMM,
24342 IX86_BUILTIN_VPROTQ_IMM,
24344 IX86_BUILTIN_VPSHLB,
24345 IX86_BUILTIN_VPSHLW,
24346 IX86_BUILTIN_VPSHLD,
24347 IX86_BUILTIN_VPSHLQ,
24348 IX86_BUILTIN_VPSHAB,
24349 IX86_BUILTIN_VPSHAW,
24350 IX86_BUILTIN_VPSHAD,
24351 IX86_BUILTIN_VPSHAQ,
24353 IX86_BUILTIN_VFRCZSS,
24354 IX86_BUILTIN_VFRCZSD,
24355 IX86_BUILTIN_VFRCZPS,
24356 IX86_BUILTIN_VFRCZPD,
24357 IX86_BUILTIN_VFRCZPS256,
24358 IX86_BUILTIN_VFRCZPD256,
24360 IX86_BUILTIN_VPCOMEQUB,
24361 IX86_BUILTIN_VPCOMNEUB,
24362 IX86_BUILTIN_VPCOMLTUB,
24363 IX86_BUILTIN_VPCOMLEUB,
24364 IX86_BUILTIN_VPCOMGTUB,
24365 IX86_BUILTIN_VPCOMGEUB,
24366 IX86_BUILTIN_VPCOMFALSEUB,
24367 IX86_BUILTIN_VPCOMTRUEUB,
24369 IX86_BUILTIN_VPCOMEQUW,
24370 IX86_BUILTIN_VPCOMNEUW,
24371 IX86_BUILTIN_VPCOMLTUW,
24372 IX86_BUILTIN_VPCOMLEUW,
24373 IX86_BUILTIN_VPCOMGTUW,
24374 IX86_BUILTIN_VPCOMGEUW,
24375 IX86_BUILTIN_VPCOMFALSEUW,
24376 IX86_BUILTIN_VPCOMTRUEUW,
24378 IX86_BUILTIN_VPCOMEQUD,
24379 IX86_BUILTIN_VPCOMNEUD,
24380 IX86_BUILTIN_VPCOMLTUD,
24381 IX86_BUILTIN_VPCOMLEUD,
24382 IX86_BUILTIN_VPCOMGTUD,
24383 IX86_BUILTIN_VPCOMGEUD,
24384 IX86_BUILTIN_VPCOMFALSEUD,
24385 IX86_BUILTIN_VPCOMTRUEUD,
24387 IX86_BUILTIN_VPCOMEQUQ,
24388 IX86_BUILTIN_VPCOMNEUQ,
24389 IX86_BUILTIN_VPCOMLTUQ,
24390 IX86_BUILTIN_VPCOMLEUQ,
24391 IX86_BUILTIN_VPCOMGTUQ,
24392 IX86_BUILTIN_VPCOMGEUQ,
24393 IX86_BUILTIN_VPCOMFALSEUQ,
24394 IX86_BUILTIN_VPCOMTRUEUQ,
24396 IX86_BUILTIN_VPCOMEQB,
24397 IX86_BUILTIN_VPCOMNEB,
24398 IX86_BUILTIN_VPCOMLTB,
24399 IX86_BUILTIN_VPCOMLEB,
24400 IX86_BUILTIN_VPCOMGTB,
24401 IX86_BUILTIN_VPCOMGEB,
24402 IX86_BUILTIN_VPCOMFALSEB,
24403 IX86_BUILTIN_VPCOMTRUEB,
24405 IX86_BUILTIN_VPCOMEQW,
24406 IX86_BUILTIN_VPCOMNEW,
24407 IX86_BUILTIN_VPCOMLTW,
24408 IX86_BUILTIN_VPCOMLEW,
24409 IX86_BUILTIN_VPCOMGTW,
24410 IX86_BUILTIN_VPCOMGEW,
24411 IX86_BUILTIN_VPCOMFALSEW,
24412 IX86_BUILTIN_VPCOMTRUEW,
24414 IX86_BUILTIN_VPCOMEQD,
24415 IX86_BUILTIN_VPCOMNED,
24416 IX86_BUILTIN_VPCOMLTD,
24417 IX86_BUILTIN_VPCOMLED,
24418 IX86_BUILTIN_VPCOMGTD,
24419 IX86_BUILTIN_VPCOMGED,
24420 IX86_BUILTIN_VPCOMFALSED,
24421 IX86_BUILTIN_VPCOMTRUED,
24423 IX86_BUILTIN_VPCOMEQQ,
24424 IX86_BUILTIN_VPCOMNEQ,
24425 IX86_BUILTIN_VPCOMLTQ,
24426 IX86_BUILTIN_VPCOMLEQ,
24427 IX86_BUILTIN_VPCOMGTQ,
24428 IX86_BUILTIN_VPCOMGEQ,
24429 IX86_BUILTIN_VPCOMFALSEQ,
24430 IX86_BUILTIN_VPCOMTRUEQ,
24432 /* LWP instructions. */
24433 IX86_BUILTIN_LLWPCB,
24434 IX86_BUILTIN_SLWPCB,
24435 IX86_BUILTIN_LWPVAL32,
24436 IX86_BUILTIN_LWPVAL64,
24437 IX86_BUILTIN_LWPINS32,
24438 IX86_BUILTIN_LWPINS64,
24442 /* BMI instructions. */
24443 IX86_BUILTIN_BEXTR32,
24444 IX86_BUILTIN_BEXTR64,
24447 /* TBM instructions. */
24448 IX86_BUILTIN_BEXTRI32,
24449 IX86_BUILTIN_BEXTRI64,
24452 /* FSGSBASE instructions. */
24453 IX86_BUILTIN_RDFSBASE32,
24454 IX86_BUILTIN_RDFSBASE64,
24455 IX86_BUILTIN_RDGSBASE32,
24456 IX86_BUILTIN_RDGSBASE64,
24457 IX86_BUILTIN_WRFSBASE32,
24458 IX86_BUILTIN_WRFSBASE64,
24459 IX86_BUILTIN_WRGSBASE32,
24460 IX86_BUILTIN_WRGSBASE64,
24462 /* RDRND instructions. */
24463 IX86_BUILTIN_RDRAND16_STEP,
24464 IX86_BUILTIN_RDRAND32_STEP,
24465 IX86_BUILTIN_RDRAND64_STEP,
24467 /* F16C instructions. */
24468 IX86_BUILTIN_CVTPH2PS,
24469 IX86_BUILTIN_CVTPH2PS256,
24470 IX86_BUILTIN_CVTPS2PH,
24471 IX86_BUILTIN_CVTPS2PH256,
24473 /* CFString built-in for darwin */
24474 IX86_BUILTIN_CFSTRING,
24479 /* Table for the ix86 builtin decls. */
24480 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24482 /* Table of all of the builtin functions that are possible with different ISA's
24483 but are waiting to be built until a function is declared to use that
24485 struct builtin_isa {
24486 const char *name; /* function name */
24487 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24488 int isa; /* isa_flags this builtin is defined for */
24489 bool const_p; /* true if the declaration is constant */
24490 bool set_and_not_built_p;
24493 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24496 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24497 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24498 function decl in the ix86_builtins array. Returns the function decl or
24499 NULL_TREE, if the builtin was not added.
24501 If the front end has a special hook for builtin functions, delay adding
24502 builtin functions that aren't in the current ISA until the ISA is changed
24503 with function specific optimization. Doing so, can save about 300K for the
24504 default compiler. When the builtin is expanded, check at that time whether
24507 If the front end doesn't have a special hook, record all builtins, even if
24508 it isn't an instruction set in the current ISA in case the user uses
24509 function specific options for a different ISA, so that we don't get scope
24510 errors if a builtin is added in the middle of a function scope. */
24513 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
24514 enum ix86_builtins code)
24516 tree decl = NULL_TREE;
24518 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24520 ix86_builtins_isa[(int) code].isa = mask;
24522 mask &= ~OPTION_MASK_ISA_64BIT;
24524 || (mask & ix86_isa_flags) != 0
24525 || (lang_hooks.builtin_function
24526 == lang_hooks.builtin_function_ext_scope))
24529 tree type = ix86_get_builtin_func_type (tcode);
24530 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24532 ix86_builtins[(int) code] = decl;
24533 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24537 ix86_builtins[(int) code] = NULL_TREE;
24538 ix86_builtins_isa[(int) code].tcode = tcode;
24539 ix86_builtins_isa[(int) code].name = name;
24540 ix86_builtins_isa[(int) code].const_p = false;
24541 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24548 /* Like def_builtin, but also marks the function decl "const". */
24551 def_builtin_const (int mask, const char *name,
24552 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24554 tree decl = def_builtin (mask, name, tcode, code);
24556 TREE_READONLY (decl) = 1;
24558 ix86_builtins_isa[(int) code].const_p = true;
24563 /* Add any new builtin functions for a given ISA that may not have been
24564 declared. This saves a bit of space compared to adding all of the
24565 declarations to the tree, even if we didn't use them. */
24568 ix86_add_new_builtins (int isa)
24572 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24574 if ((ix86_builtins_isa[i].isa & isa) != 0
24575 && ix86_builtins_isa[i].set_and_not_built_p)
24579 /* Don't define the builtin again. */
24580 ix86_builtins_isa[i].set_and_not_built_p = false;
24582 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24583 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24584 type, i, BUILT_IN_MD, NULL,
24587 ix86_builtins[i] = decl;
24588 if (ix86_builtins_isa[i].const_p)
24589 TREE_READONLY (decl) = 1;
24594 /* Bits for builtin_description.flag. */
24596 /* Set when we don't support the comparison natively, and should
24597 swap_comparison in order to support it. */
24598 #define BUILTIN_DESC_SWAP_OPERANDS 1
24600 struct builtin_description
24602 const unsigned int mask;
24603 const enum insn_code icode;
24604 const char *const name;
24605 const enum ix86_builtins code;
24606 const enum rtx_code comparison;
24610 static const struct builtin_description bdesc_comi[] =
24612 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24613 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24614 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24615 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24616 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24617 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24618 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24619 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24620 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24621 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24622 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24623 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24626 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24631 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24633 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24638 static const struct builtin_description bdesc_pcmpestr[] =
24641 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24642 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24643 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24644 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24645 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24646 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24647 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24650 static const struct builtin_description bdesc_pcmpistr[] =
24653 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24654 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24655 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24656 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24657 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24658 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24659 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24662 /* Special builtins with variable number of arguments. */
24663 static const struct builtin_description bdesc_special_args[] =
24665 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24666 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24669 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24672 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24675 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24676 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24677 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24679 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24680 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24681 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24682 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24684 /* SSE or 3DNow!A */
24685 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24686 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24690 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24691 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24692 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24693 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24694 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24696 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24697 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24700 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24703 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24706 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24709 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24710 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24716 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24717 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24718 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
24735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
24736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
24737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
24738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
24739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
24740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
24741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
24743 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24744 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24745 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24746 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24747 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24748 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24751 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24752 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24753 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24754 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24755 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24756 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24757 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24758 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24761 /* Builtins with variable number of arguments. */
24762 static const struct builtin_description bdesc_args[] =
24764 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24765 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24766 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24767 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24768 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24769 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24770 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24773 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24774 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24775 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24776 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24777 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24778 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24780 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24781 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24782 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24783 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24784 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24785 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24786 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24787 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24789 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24790 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24792 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24793 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24794 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24795 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24797 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24798 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24799 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24800 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24801 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24802 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24804 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24805 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24806 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24807 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24808 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24809 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24811 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24812 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24813 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24815 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24817 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24818 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24819 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24820 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24821 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24822 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24824 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24825 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24826 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24827 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24828 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24829 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24831 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24832 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24833 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24834 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24837 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24838 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24839 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24840 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24842 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24843 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24844 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24845 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24846 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24847 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24848 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24849 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24850 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24851 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24852 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24853 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24854 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24855 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24856 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24859 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24860 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24861 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24862 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24863 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24864 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24867 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24868 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24869 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24870 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24871 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24872 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24873 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24874 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24875 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24876 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24877 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24878 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24880 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24882 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24883 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24884 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24885 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24886 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24887 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24888 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24889 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24891 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24892 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24893 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24894 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24895 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24896 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24897 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24898 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24899 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24900 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24901 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24902 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24903 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24904 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24905 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24906 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24907 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24908 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24909 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24910 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24911 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24912 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24914 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24915 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24916 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24917 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24919 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24920 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24921 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24922 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24924 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24926 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24927 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24928 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24929 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24930 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24932 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24933 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24934 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24936 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24939 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24942 /* SSE MMX or 3Dnow!A */
24943 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24944 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24945 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24947 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24948 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24949 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24950 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24952 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24953 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24955 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24960 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24961 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24962 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24963 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24964 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24965 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24966 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24967 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24968 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24969 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24970 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24971 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24977 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24980 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24981 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24982 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24983 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24984 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24986 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24989 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24990 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24991 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24993 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24997 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24998 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24999 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25000 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25004 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25006 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
25007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
25008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
25009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
25011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
25013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
25014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
25015 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
25019 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
25020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
25021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
25023 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
25024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
25025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25028 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25032 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25034 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25035 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25037 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25040 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25041 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25043 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
25045 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25046 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25047 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25048 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25049 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25050 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25051 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25052 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25054 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25056 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25057 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25058 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25059 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25061 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25063 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25064 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
25066 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25068 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25069 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25072 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25081 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25082 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25083 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25084 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25086 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25087 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25088 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25089 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25090 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25091 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25092 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25093 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25095 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25096 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25099 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25100 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
25102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
25103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
25107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
25108 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
25109 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
25110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
25112 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25113 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25114 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25115 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25116 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25117 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25118 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25120 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25121 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25122 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25123 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25124 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25125 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25126 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25128 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25129 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25130 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25131 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
25134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
25139 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
25140 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
25142 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25145 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25146 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25149 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
25150 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25152 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25153 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25154 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25155 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25156 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25157 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25160 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
25161 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
25162 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25163 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
25164 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
25165 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
25167 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25168 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25169 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25170 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25171 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25172 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25173 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25174 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25175 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25176 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25177 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25178 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25179 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
25180 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
25181 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25182 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25183 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25184 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25185 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25186 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25187 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25188 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25189 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25190 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25193 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
25194 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
25197 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25198 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25199 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
25200 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
25201 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25202 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25203 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25204 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
25205 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
25206 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
25208 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25209 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25210 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25211 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25212 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25213 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25214 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25215 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25216 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25217 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25218 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25219 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25220 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25222 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25223 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25224 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25225 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25226 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25227 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25228 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25229 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25230 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25231 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25232 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25233 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25236 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25237 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25238 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25239 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25241 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
25242 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
25243 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
25244 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
25246 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
25247 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
25248 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
25249 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
25251 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25252 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25253 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25256 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25257 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
25258 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
25259 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25260 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25263 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
25264 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
25265 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
25266 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25269 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
25270 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25272 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25273 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25274 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25275 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25278 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
25281 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25282 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25283 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25284 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25285 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25286 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25287 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25288 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25289 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25290 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25291 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25292 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25293 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25294 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25295 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25296 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25297 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25298 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25299 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25300 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25301 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25302 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25303 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25304 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25305 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25306 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25308 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
25309 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
25310 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
25311 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
25313 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25314 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25315 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
25316 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
25317 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25318 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25319 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25320 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25321 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25322 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25323 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25324 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25325 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25326 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
25327 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
25328 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
25329 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
25330 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
25331 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
25332 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25333 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
25334 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25335 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25336 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25337 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25338 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25339 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
25340 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25341 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25342 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25343 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25344 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
25345 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
25346 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
25348 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25349 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25350 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25352 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25353 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25354 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25355 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25356 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25358 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25360 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25361 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25363 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
25364 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
25365 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
25366 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
25368 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
25369 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
25370 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
25371 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
25373 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25374 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25375 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25376 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25378 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
25379 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
25380 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
25381 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
25382 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
25383 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
25385 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25386 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25387 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25388 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25389 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25390 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25391 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25392 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25393 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25394 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25395 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25396 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25397 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25398 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25399 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25401 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
25402 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
25404 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25405 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25407 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25410 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25411 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25412 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25415 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25416 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25419 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
25420 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
25421 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
25422 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
25425 /* FMA4 and XOP. */
25426 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
25427 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
25428 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
25429 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
25430 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
25431 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
25432 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
25433 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
25434 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
25435 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
25436 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
25437 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
25438 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
25439 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
25440 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
25441 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
25442 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
25443 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
25444 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
25445 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
25446 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
25447 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
25448 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
25449 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
25450 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
25451 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
25452 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
25453 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
25454 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
25455 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
25456 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
25457 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
25458 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
25459 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
25460 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
25461 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25462 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25463 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25464 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25465 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25466 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25467 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25468 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25469 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25470 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25471 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25472 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25473 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25474 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25475 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25476 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25477 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25479 static const struct builtin_description bdesc_multi_arg[] =
25481 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25482 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25483 UNKNOWN, (int)MULTI_ARG_3_SF },
25484 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25485 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25486 UNKNOWN, (int)MULTI_ARG_3_DF },
25488 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
25489 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
25490 UNKNOWN, (int)MULTI_ARG_3_SF },
25491 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
25492 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
25493 UNKNOWN, (int)MULTI_ARG_3_DF },
25494 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
25495 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
25496 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25497 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
25498 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
25499 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25501 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
25502 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
25503 UNKNOWN, (int)MULTI_ARG_3_SF },
25504 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
25505 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
25506 UNKNOWN, (int)MULTI_ARG_3_DF },
25507 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
25508 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
25509 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25510 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
25511 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
25512 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
25515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
25516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
25517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
25518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
25519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
25520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
25547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
25548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
25549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
25550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25623 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25625 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25626 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25628 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25629 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25630 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25631 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25633 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25634 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25635 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25636 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25637 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25638 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25639 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25641 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25642 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25643 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25644 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25645 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25646 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25647 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25649 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25650 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25651 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25652 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25653 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25654 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25655 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25656 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25658 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25659 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25660 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25661 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25662 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25663 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25664 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25665 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25667 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25668 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25669 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25670 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25674 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25675 in the current target ISA to allow the user to compile particular modules
25676 with different target specific options that differ from the command line
25679 ix86_init_mmx_sse_builtins (void)
25681 const struct builtin_description * d;
25682 enum ix86_builtin_func_type ftype;
25685 /* Add all special builtins with variable number of operands. */
25686 for (i = 0, d = bdesc_special_args;
25687 i < ARRAY_SIZE (bdesc_special_args);
25693 ftype = (enum ix86_builtin_func_type) d->flag;
25694 def_builtin (d->mask, d->name, ftype, d->code);
25697 /* Add all builtins with variable number of operands. */
25698 for (i = 0, d = bdesc_args;
25699 i < ARRAY_SIZE (bdesc_args);
25705 ftype = (enum ix86_builtin_func_type) d->flag;
25706 def_builtin_const (d->mask, d->name, ftype, d->code);
25709 /* pcmpestr[im] insns. */
25710 for (i = 0, d = bdesc_pcmpestr;
25711 i < ARRAY_SIZE (bdesc_pcmpestr);
25714 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25715 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25717 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25718 def_builtin_const (d->mask, d->name, ftype, d->code);
25721 /* pcmpistr[im] insns. */
25722 for (i = 0, d = bdesc_pcmpistr;
25723 i < ARRAY_SIZE (bdesc_pcmpistr);
25726 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25727 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25729 ftype = INT_FTYPE_V16QI_V16QI_INT;
25730 def_builtin_const (d->mask, d->name, ftype, d->code);
25733 /* comi/ucomi insns. */
25734 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25736 if (d->mask == OPTION_MASK_ISA_SSE2)
25737 ftype = INT_FTYPE_V2DF_V2DF;
25739 ftype = INT_FTYPE_V4SF_V4SF;
25740 def_builtin_const (d->mask, d->name, ftype, d->code);
25744 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25745 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25746 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25747 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25749 /* SSE or 3DNow!A */
25750 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25751 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25752 IX86_BUILTIN_MASKMOVQ);
25755 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25756 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25758 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25759 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25760 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25761 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25764 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25765 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25766 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25767 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25770 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25771 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25772 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25773 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25774 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25775 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25776 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25777 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25778 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25779 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25780 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25781 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25784 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25785 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25788 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
25789 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
25790 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
25791 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
25792 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
25793 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
25794 IX86_BUILTIN_RDRAND64_STEP);
25796 /* MMX access to the vec_init patterns. */
25797 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25798 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25800 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25801 V4HI_FTYPE_HI_HI_HI_HI,
25802 IX86_BUILTIN_VEC_INIT_V4HI);
25804 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25805 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25806 IX86_BUILTIN_VEC_INIT_V8QI);
25808 /* Access to the vec_extract patterns. */
25809 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25810 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25811 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25812 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25813 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25814 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25815 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25816 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25817 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25818 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25820 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25821 "__builtin_ia32_vec_ext_v4hi",
25822 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25824 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25825 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25827 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25828 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25830 /* Access to the vec_set patterns. */
25831 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25832 "__builtin_ia32_vec_set_v2di",
25833 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25835 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25836 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25838 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25839 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25841 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25842 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25844 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25845 "__builtin_ia32_vec_set_v4hi",
25846 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25848 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25849 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25851 /* Add FMA4 multi-arg argument instructions */
25852 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25857 ftype = (enum ix86_builtin_func_type) d->flag;
25858 def_builtin_const (d->mask, d->name, ftype, d->code);
25862 /* Internal method for ix86_init_builtins. */
25865 ix86_init_builtins_va_builtins_abi (void)
25867 tree ms_va_ref, sysv_va_ref;
25868 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25869 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25870 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25871 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25875 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25876 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25877 ms_va_ref = build_reference_type (ms_va_list_type_node);
25879 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25882 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25883 fnvoid_va_start_ms =
25884 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25885 fnvoid_va_end_sysv =
25886 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25887 fnvoid_va_start_sysv =
25888 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25890 fnvoid_va_copy_ms =
25891 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25893 fnvoid_va_copy_sysv =
25894 build_function_type_list (void_type_node, sysv_va_ref,
25895 sysv_va_ref, NULL_TREE);
25897 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25898 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25899 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25900 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25901 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25902 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25903 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25904 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25905 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25906 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25907 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25908 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25912 ix86_init_builtin_types (void)
25914 tree float128_type_node, float80_type_node;
25916 /* The __float80 type. */
25917 float80_type_node = long_double_type_node;
25918 if (TYPE_MODE (float80_type_node) != XFmode)
25920 /* The __float80 type. */
25921 float80_type_node = make_node (REAL_TYPE);
25923 TYPE_PRECISION (float80_type_node) = 80;
25924 layout_type (float80_type_node);
25926 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25928 /* The __float128 type. */
25929 float128_type_node = make_node (REAL_TYPE);
25930 TYPE_PRECISION (float128_type_node) = 128;
25931 layout_type (float128_type_node);
25932 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25934 /* This macro is built by i386-builtin-types.awk. */
25935 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25939 ix86_init_builtins (void)
25943 ix86_init_builtin_types ();
25945 /* TFmode support builtins. */
25946 def_builtin_const (0, "__builtin_infq",
25947 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25948 def_builtin_const (0, "__builtin_huge_valq",
25949 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25951 /* We will expand them to normal call if SSE2 isn't available since
25952 they are used by libgcc. */
25953 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25954 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25955 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25956 TREE_READONLY (t) = 1;
25957 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25959 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25960 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25961 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25962 TREE_READONLY (t) = 1;
25963 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25965 ix86_init_mmx_sse_builtins ();
25968 ix86_init_builtins_va_builtins_abi ();
25970 #ifdef SUBTARGET_INIT_BUILTINS
25971 SUBTARGET_INIT_BUILTINS;
25975 /* Return the ix86 builtin for CODE. */
25978 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25980 if (code >= IX86_BUILTIN_MAX)
25981 return error_mark_node;
25983 return ix86_builtins[code];
25986 /* Errors in the source file can cause expand_expr to return const0_rtx
25987 where we expect a vector. To avoid crashing, use one of the vector
25988 clear instructions. */
25990 safe_vector_operand (rtx x, enum machine_mode mode)
25992 if (x == const0_rtx)
25993 x = CONST0_RTX (mode);
25997 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
26000 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
26003 tree arg0 = CALL_EXPR_ARG (exp, 0);
26004 tree arg1 = CALL_EXPR_ARG (exp, 1);
26005 rtx op0 = expand_normal (arg0);
26006 rtx op1 = expand_normal (arg1);
26007 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26008 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26009 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
26011 if (VECTOR_MODE_P (mode0))
26012 op0 = safe_vector_operand (op0, mode0);
26013 if (VECTOR_MODE_P (mode1))
26014 op1 = safe_vector_operand (op1, mode1);
26016 if (optimize || !target
26017 || GET_MODE (target) != tmode
26018 || !insn_data[icode].operand[0].predicate (target, tmode))
26019 target = gen_reg_rtx (tmode);
26021 if (GET_MODE (op1) == SImode && mode1 == TImode)
26023 rtx x = gen_reg_rtx (V4SImode);
26024 emit_insn (gen_sse2_loadd (x, op1));
26025 op1 = gen_lowpart (TImode, x);
26028 if (!insn_data[icode].operand[1].predicate (op0, mode0))
26029 op0 = copy_to_mode_reg (mode0, op0);
26030 if (!insn_data[icode].operand[2].predicate (op1, mode1))
26031 op1 = copy_to_mode_reg (mode1, op1);
26033 pat = GEN_FCN (icode) (target, op0, op1);
26042 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
26045 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
26046 enum ix86_builtin_func_type m_type,
26047 enum rtx_code sub_code)
26052 bool comparison_p = false;
26054 bool last_arg_constant = false;
26055 int num_memory = 0;
26058 enum machine_mode mode;
26061 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26065 case MULTI_ARG_4_DF2_DI_I:
26066 case MULTI_ARG_4_DF2_DI_I1:
26067 case MULTI_ARG_4_SF2_SI_I:
26068 case MULTI_ARG_4_SF2_SI_I1:
26070 last_arg_constant = true;
26073 case MULTI_ARG_3_SF:
26074 case MULTI_ARG_3_DF:
26075 case MULTI_ARG_3_SF2:
26076 case MULTI_ARG_3_DF2:
26077 case MULTI_ARG_3_DI:
26078 case MULTI_ARG_3_SI:
26079 case MULTI_ARG_3_SI_DI:
26080 case MULTI_ARG_3_HI:
26081 case MULTI_ARG_3_HI_SI:
26082 case MULTI_ARG_3_QI:
26083 case MULTI_ARG_3_DI2:
26084 case MULTI_ARG_3_SI2:
26085 case MULTI_ARG_3_HI2:
26086 case MULTI_ARG_3_QI2:
26090 case MULTI_ARG_2_SF:
26091 case MULTI_ARG_2_DF:
26092 case MULTI_ARG_2_DI:
26093 case MULTI_ARG_2_SI:
26094 case MULTI_ARG_2_HI:
26095 case MULTI_ARG_2_QI:
26099 case MULTI_ARG_2_DI_IMM:
26100 case MULTI_ARG_2_SI_IMM:
26101 case MULTI_ARG_2_HI_IMM:
26102 case MULTI_ARG_2_QI_IMM:
26104 last_arg_constant = true;
26107 case MULTI_ARG_1_SF:
26108 case MULTI_ARG_1_DF:
26109 case MULTI_ARG_1_SF2:
26110 case MULTI_ARG_1_DF2:
26111 case MULTI_ARG_1_DI:
26112 case MULTI_ARG_1_SI:
26113 case MULTI_ARG_1_HI:
26114 case MULTI_ARG_1_QI:
26115 case MULTI_ARG_1_SI_DI:
26116 case MULTI_ARG_1_HI_DI:
26117 case MULTI_ARG_1_HI_SI:
26118 case MULTI_ARG_1_QI_DI:
26119 case MULTI_ARG_1_QI_SI:
26120 case MULTI_ARG_1_QI_HI:
26124 case MULTI_ARG_2_DI_CMP:
26125 case MULTI_ARG_2_SI_CMP:
26126 case MULTI_ARG_2_HI_CMP:
26127 case MULTI_ARG_2_QI_CMP:
26129 comparison_p = true;
26132 case MULTI_ARG_2_SF_TF:
26133 case MULTI_ARG_2_DF_TF:
26134 case MULTI_ARG_2_DI_TF:
26135 case MULTI_ARG_2_SI_TF:
26136 case MULTI_ARG_2_HI_TF:
26137 case MULTI_ARG_2_QI_TF:
26143 gcc_unreachable ();
26146 if (optimize || !target
26147 || GET_MODE (target) != tmode
26148 || !insn_data[icode].operand[0].predicate (target, tmode))
26149 target = gen_reg_rtx (tmode);
26151 gcc_assert (nargs <= 4);
26153 for (i = 0; i < nargs; i++)
26155 tree arg = CALL_EXPR_ARG (exp, i);
26156 rtx op = expand_normal (arg);
26157 int adjust = (comparison_p) ? 1 : 0;
26158 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
26160 if (last_arg_constant && i == nargs-1)
26162 if (!CONST_INT_P (op))
26164 error ("last argument must be an immediate");
26165 return gen_reg_rtx (tmode);
26170 if (VECTOR_MODE_P (mode))
26171 op = safe_vector_operand (op, mode);
26173 /* If we aren't optimizing, only allow one memory operand to be
26175 if (memory_operand (op, mode))
26178 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
26181 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
26183 op = force_reg (mode, op);
26187 args[i].mode = mode;
26193 pat = GEN_FCN (icode) (target, args[0].op);
26198 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
26199 GEN_INT ((int)sub_code));
26200 else if (! comparison_p)
26201 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26204 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
26208 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
26213 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26217 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
26221 gcc_unreachable ();
26231 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
26232 insns with vec_merge. */
26235 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
26239 tree arg0 = CALL_EXPR_ARG (exp, 0);
26240 rtx op1, op0 = expand_normal (arg0);
26241 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26242 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26244 if (optimize || !target
26245 || GET_MODE (target) != tmode
26246 || !insn_data[icode].operand[0].predicate (target, tmode))
26247 target = gen_reg_rtx (tmode);
26249 if (VECTOR_MODE_P (mode0))
26250 op0 = safe_vector_operand (op0, mode0);
26252 if ((optimize && !register_operand (op0, mode0))
26253 || !insn_data[icode].operand[1].predicate (op0, mode0))
26254 op0 = copy_to_mode_reg (mode0, op0);
26257 if (!insn_data[icode].operand[2].predicate (op1, mode0))
26258 op1 = copy_to_mode_reg (mode0, op1);
26260 pat = GEN_FCN (icode) (target, op0, op1);
26267 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
26270 ix86_expand_sse_compare (const struct builtin_description *d,
26271 tree exp, rtx target, bool swap)
26274 tree arg0 = CALL_EXPR_ARG (exp, 0);
26275 tree arg1 = CALL_EXPR_ARG (exp, 1);
26276 rtx op0 = expand_normal (arg0);
26277 rtx op1 = expand_normal (arg1);
26279 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26280 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26281 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
26282 enum rtx_code comparison = d->comparison;
26284 if (VECTOR_MODE_P (mode0))
26285 op0 = safe_vector_operand (op0, mode0);
26286 if (VECTOR_MODE_P (mode1))
26287 op1 = safe_vector_operand (op1, mode1);
26289 /* Swap operands if we have a comparison that isn't available in
26293 rtx tmp = gen_reg_rtx (mode1);
26294 emit_move_insn (tmp, op1);
26299 if (optimize || !target
26300 || GET_MODE (target) != tmode
26301 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26302 target = gen_reg_rtx (tmode);
26304 if ((optimize && !register_operand (op0, mode0))
26305 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
26306 op0 = copy_to_mode_reg (mode0, op0);
26307 if ((optimize && !register_operand (op1, mode1))
26308 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
26309 op1 = copy_to_mode_reg (mode1, op1);
26311 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
26312 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
26319 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
26322 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
26326 tree arg0 = CALL_EXPR_ARG (exp, 0);
26327 tree arg1 = CALL_EXPR_ARG (exp, 1);
26328 rtx op0 = expand_normal (arg0);
26329 rtx op1 = expand_normal (arg1);
26330 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26331 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26332 enum rtx_code comparison = d->comparison;
26334 if (VECTOR_MODE_P (mode0))
26335 op0 = safe_vector_operand (op0, mode0);
26336 if (VECTOR_MODE_P (mode1))
26337 op1 = safe_vector_operand (op1, mode1);
26339 /* Swap operands if we have a comparison that isn't available in
26341 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
26348 target = gen_reg_rtx (SImode);
26349 emit_move_insn (target, const0_rtx);
26350 target = gen_rtx_SUBREG (QImode, target, 0);
26352 if ((optimize && !register_operand (op0, mode0))
26353 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26354 op0 = copy_to_mode_reg (mode0, op0);
26355 if ((optimize && !register_operand (op1, mode1))
26356 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26357 op1 = copy_to_mode_reg (mode1, op1);
26359 pat = GEN_FCN (d->icode) (op0, op1);
26363 emit_insn (gen_rtx_SET (VOIDmode,
26364 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26365 gen_rtx_fmt_ee (comparison, QImode,
26369 return SUBREG_REG (target);
26372 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
26375 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
26379 tree arg0 = CALL_EXPR_ARG (exp, 0);
26380 rtx op1, op0 = expand_normal (arg0);
26381 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26382 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26384 if (optimize || target == 0
26385 || GET_MODE (target) != tmode
26386 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26387 target = gen_reg_rtx (tmode);
26389 if (VECTOR_MODE_P (mode0))
26390 op0 = safe_vector_operand (op0, mode0);
26392 if ((optimize && !register_operand (op0, mode0))
26393 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26394 op0 = copy_to_mode_reg (mode0, op0);
26396 op1 = GEN_INT (d->comparison);
26398 pat = GEN_FCN (d->icode) (target, op0, op1);
26405 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
26408 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
26412 tree arg0 = CALL_EXPR_ARG (exp, 0);
26413 tree arg1 = CALL_EXPR_ARG (exp, 1);
26414 rtx op0 = expand_normal (arg0);
26415 rtx op1 = expand_normal (arg1);
26416 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26417 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26418 enum rtx_code comparison = d->comparison;
26420 if (VECTOR_MODE_P (mode0))
26421 op0 = safe_vector_operand (op0, mode0);
26422 if (VECTOR_MODE_P (mode1))
26423 op1 = safe_vector_operand (op1, mode1);
26425 target = gen_reg_rtx (SImode);
26426 emit_move_insn (target, const0_rtx);
26427 target = gen_rtx_SUBREG (QImode, target, 0);
26429 if ((optimize && !register_operand (op0, mode0))
26430 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26431 op0 = copy_to_mode_reg (mode0, op0);
26432 if ((optimize && !register_operand (op1, mode1))
26433 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26434 op1 = copy_to_mode_reg (mode1, op1);
26436 pat = GEN_FCN (d->icode) (op0, op1);
26440 emit_insn (gen_rtx_SET (VOIDmode,
26441 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26442 gen_rtx_fmt_ee (comparison, QImode,
26446 return SUBREG_REG (target);
26449 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26452 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
26453 tree exp, rtx target)
26456 tree arg0 = CALL_EXPR_ARG (exp, 0);
26457 tree arg1 = CALL_EXPR_ARG (exp, 1);
26458 tree arg2 = CALL_EXPR_ARG (exp, 2);
26459 tree arg3 = CALL_EXPR_ARG (exp, 3);
26460 tree arg4 = CALL_EXPR_ARG (exp, 4);
26461 rtx scratch0, scratch1;
26462 rtx op0 = expand_normal (arg0);
26463 rtx op1 = expand_normal (arg1);
26464 rtx op2 = expand_normal (arg2);
26465 rtx op3 = expand_normal (arg3);
26466 rtx op4 = expand_normal (arg4);
26467 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
26469 tmode0 = insn_data[d->icode].operand[0].mode;
26470 tmode1 = insn_data[d->icode].operand[1].mode;
26471 modev2 = insn_data[d->icode].operand[2].mode;
26472 modei3 = insn_data[d->icode].operand[3].mode;
26473 modev4 = insn_data[d->icode].operand[4].mode;
26474 modei5 = insn_data[d->icode].operand[5].mode;
26475 modeimm = insn_data[d->icode].operand[6].mode;
26477 if (VECTOR_MODE_P (modev2))
26478 op0 = safe_vector_operand (op0, modev2);
26479 if (VECTOR_MODE_P (modev4))
26480 op2 = safe_vector_operand (op2, modev4);
26482 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26483 op0 = copy_to_mode_reg (modev2, op0);
26484 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
26485 op1 = copy_to_mode_reg (modei3, op1);
26486 if ((optimize && !register_operand (op2, modev4))
26487 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
26488 op2 = copy_to_mode_reg (modev4, op2);
26489 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
26490 op3 = copy_to_mode_reg (modei5, op3);
26492 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
26494 error ("the fifth argument must be a 8-bit immediate");
26498 if (d->code == IX86_BUILTIN_PCMPESTRI128)
26500 if (optimize || !target
26501 || GET_MODE (target) != tmode0
26502 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26503 target = gen_reg_rtx (tmode0);
26505 scratch1 = gen_reg_rtx (tmode1);
26507 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
26509 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
26511 if (optimize || !target
26512 || GET_MODE (target) != tmode1
26513 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26514 target = gen_reg_rtx (tmode1);
26516 scratch0 = gen_reg_rtx (tmode0);
26518 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
26522 gcc_assert (d->flag);
26524 scratch0 = gen_reg_rtx (tmode0);
26525 scratch1 = gen_reg_rtx (tmode1);
26527 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
26537 target = gen_reg_rtx (SImode);
26538 emit_move_insn (target, const0_rtx);
26539 target = gen_rtx_SUBREG (QImode, target, 0);
26542 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26543 gen_rtx_fmt_ee (EQ, QImode,
26544 gen_rtx_REG ((enum machine_mode) d->flag,
26547 return SUBREG_REG (target);
26554 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26557 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
26558 tree exp, rtx target)
26561 tree arg0 = CALL_EXPR_ARG (exp, 0);
26562 tree arg1 = CALL_EXPR_ARG (exp, 1);
26563 tree arg2 = CALL_EXPR_ARG (exp, 2);
26564 rtx scratch0, scratch1;
26565 rtx op0 = expand_normal (arg0);
26566 rtx op1 = expand_normal (arg1);
26567 rtx op2 = expand_normal (arg2);
26568 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26570 tmode0 = insn_data[d->icode].operand[0].mode;
26571 tmode1 = insn_data[d->icode].operand[1].mode;
26572 modev2 = insn_data[d->icode].operand[2].mode;
26573 modev3 = insn_data[d->icode].operand[3].mode;
26574 modeimm = insn_data[d->icode].operand[4].mode;
26576 if (VECTOR_MODE_P (modev2))
26577 op0 = safe_vector_operand (op0, modev2);
26578 if (VECTOR_MODE_P (modev3))
26579 op1 = safe_vector_operand (op1, modev3);
26581 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26582 op0 = copy_to_mode_reg (modev2, op0);
26583 if ((optimize && !register_operand (op1, modev3))
26584 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26585 op1 = copy_to_mode_reg (modev3, op1);
26587 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26589 error ("the third argument must be a 8-bit immediate");
26593 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26595 if (optimize || !target
26596 || GET_MODE (target) != tmode0
26597 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26598 target = gen_reg_rtx (tmode0);
26600 scratch1 = gen_reg_rtx (tmode1);
26602 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26604 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26606 if (optimize || !target
26607 || GET_MODE (target) != tmode1
26608 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26609 target = gen_reg_rtx (tmode1);
26611 scratch0 = gen_reg_rtx (tmode0);
26613 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26617 gcc_assert (d->flag);
26619 scratch0 = gen_reg_rtx (tmode0);
26620 scratch1 = gen_reg_rtx (tmode1);
26622 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26632 target = gen_reg_rtx (SImode);
26633 emit_move_insn (target, const0_rtx);
26634 target = gen_rtx_SUBREG (QImode, target, 0);
26637 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26638 gen_rtx_fmt_ee (EQ, QImode,
26639 gen_rtx_REG ((enum machine_mode) d->flag,
26642 return SUBREG_REG (target);
26648 /* Subroutine of ix86_expand_builtin to take care of insns with
26649 variable number of operands. */
26652 ix86_expand_args_builtin (const struct builtin_description *d,
26653 tree exp, rtx target)
26655 rtx pat, real_target;
26656 unsigned int i, nargs;
26657 unsigned int nargs_constant = 0;
26658 int num_memory = 0;
26662 enum machine_mode mode;
26664 bool last_arg_count = false;
26665 enum insn_code icode = d->icode;
26666 const struct insn_data_d *insn_p = &insn_data[icode];
26667 enum machine_mode tmode = insn_p->operand[0].mode;
26668 enum machine_mode rmode = VOIDmode;
26670 enum rtx_code comparison = d->comparison;
26672 switch ((enum ix86_builtin_func_type) d->flag)
26674 case V2DF_FTYPE_V2DF_ROUND:
26675 case V4DF_FTYPE_V4DF_ROUND:
26676 case V4SF_FTYPE_V4SF_ROUND:
26677 case V8SF_FTYPE_V8SF_ROUND:
26678 return ix86_expand_sse_round (d, exp, target);
26679 case INT_FTYPE_V8SF_V8SF_PTEST:
26680 case INT_FTYPE_V4DI_V4DI_PTEST:
26681 case INT_FTYPE_V4DF_V4DF_PTEST:
26682 case INT_FTYPE_V4SF_V4SF_PTEST:
26683 case INT_FTYPE_V2DI_V2DI_PTEST:
26684 case INT_FTYPE_V2DF_V2DF_PTEST:
26685 return ix86_expand_sse_ptest (d, exp, target);
26686 case FLOAT128_FTYPE_FLOAT128:
26687 case FLOAT_FTYPE_FLOAT:
26688 case INT_FTYPE_INT:
26689 case UINT64_FTYPE_INT:
26690 case UINT16_FTYPE_UINT16:
26691 case INT64_FTYPE_INT64:
26692 case INT64_FTYPE_V4SF:
26693 case INT64_FTYPE_V2DF:
26694 case INT_FTYPE_V16QI:
26695 case INT_FTYPE_V8QI:
26696 case INT_FTYPE_V8SF:
26697 case INT_FTYPE_V4DF:
26698 case INT_FTYPE_V4SF:
26699 case INT_FTYPE_V2DF:
26700 case V16QI_FTYPE_V16QI:
26701 case V8SI_FTYPE_V8SF:
26702 case V8SI_FTYPE_V4SI:
26703 case V8HI_FTYPE_V8HI:
26704 case V8HI_FTYPE_V16QI:
26705 case V8QI_FTYPE_V8QI:
26706 case V8SF_FTYPE_V8SF:
26707 case V8SF_FTYPE_V8SI:
26708 case V8SF_FTYPE_V4SF:
26709 case V8SF_FTYPE_V8HI:
26710 case V4SI_FTYPE_V4SI:
26711 case V4SI_FTYPE_V16QI:
26712 case V4SI_FTYPE_V4SF:
26713 case V4SI_FTYPE_V8SI:
26714 case V4SI_FTYPE_V8HI:
26715 case V4SI_FTYPE_V4DF:
26716 case V4SI_FTYPE_V2DF:
26717 case V4HI_FTYPE_V4HI:
26718 case V4DF_FTYPE_V4DF:
26719 case V4DF_FTYPE_V4SI:
26720 case V4DF_FTYPE_V4SF:
26721 case V4DF_FTYPE_V2DF:
26722 case V4SF_FTYPE_V4SF:
26723 case V4SF_FTYPE_V4SI:
26724 case V4SF_FTYPE_V8SF:
26725 case V4SF_FTYPE_V4DF:
26726 case V4SF_FTYPE_V8HI:
26727 case V4SF_FTYPE_V2DF:
26728 case V2DI_FTYPE_V2DI:
26729 case V2DI_FTYPE_V16QI:
26730 case V2DI_FTYPE_V8HI:
26731 case V2DI_FTYPE_V4SI:
26732 case V2DF_FTYPE_V2DF:
26733 case V2DF_FTYPE_V4SI:
26734 case V2DF_FTYPE_V4DF:
26735 case V2DF_FTYPE_V4SF:
26736 case V2DF_FTYPE_V2SI:
26737 case V2SI_FTYPE_V2SI:
26738 case V2SI_FTYPE_V4SF:
26739 case V2SI_FTYPE_V2SF:
26740 case V2SI_FTYPE_V2DF:
26741 case V2SF_FTYPE_V2SF:
26742 case V2SF_FTYPE_V2SI:
26745 case V4SF_FTYPE_V4SF_VEC_MERGE:
26746 case V2DF_FTYPE_V2DF_VEC_MERGE:
26747 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26748 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26749 case V16QI_FTYPE_V16QI_V16QI:
26750 case V16QI_FTYPE_V8HI_V8HI:
26751 case V8QI_FTYPE_V8QI_V8QI:
26752 case V8QI_FTYPE_V4HI_V4HI:
26753 case V8HI_FTYPE_V8HI_V8HI:
26754 case V8HI_FTYPE_V16QI_V16QI:
26755 case V8HI_FTYPE_V4SI_V4SI:
26756 case V8SF_FTYPE_V8SF_V8SF:
26757 case V8SF_FTYPE_V8SF_V8SI:
26758 case V4SI_FTYPE_V4SI_V4SI:
26759 case V4SI_FTYPE_V8HI_V8HI:
26760 case V4SI_FTYPE_V4SF_V4SF:
26761 case V4SI_FTYPE_V2DF_V2DF:
26762 case V4HI_FTYPE_V4HI_V4HI:
26763 case V4HI_FTYPE_V8QI_V8QI:
26764 case V4HI_FTYPE_V2SI_V2SI:
26765 case V4DF_FTYPE_V4DF_V4DF:
26766 case V4DF_FTYPE_V4DF_V4DI:
26767 case V4SF_FTYPE_V4SF_V4SF:
26768 case V4SF_FTYPE_V4SF_V4SI:
26769 case V4SF_FTYPE_V4SF_V2SI:
26770 case V4SF_FTYPE_V4SF_V2DF:
26771 case V4SF_FTYPE_V4SF_DI:
26772 case V4SF_FTYPE_V4SF_SI:
26773 case V2DI_FTYPE_V2DI_V2DI:
26774 case V2DI_FTYPE_V16QI_V16QI:
26775 case V2DI_FTYPE_V4SI_V4SI:
26776 case V2DI_FTYPE_V2DI_V16QI:
26777 case V2DI_FTYPE_V2DF_V2DF:
26778 case V2SI_FTYPE_V2SI_V2SI:
26779 case V2SI_FTYPE_V4HI_V4HI:
26780 case V2SI_FTYPE_V2SF_V2SF:
26781 case V2DF_FTYPE_V2DF_V2DF:
26782 case V2DF_FTYPE_V2DF_V4SF:
26783 case V2DF_FTYPE_V2DF_V2DI:
26784 case V2DF_FTYPE_V2DF_DI:
26785 case V2DF_FTYPE_V2DF_SI:
26786 case V2SF_FTYPE_V2SF_V2SF:
26787 case V1DI_FTYPE_V1DI_V1DI:
26788 case V1DI_FTYPE_V8QI_V8QI:
26789 case V1DI_FTYPE_V2SI_V2SI:
26790 if (comparison == UNKNOWN)
26791 return ix86_expand_binop_builtin (icode, exp, target);
26794 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26795 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26796 gcc_assert (comparison != UNKNOWN);
26800 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26801 case V8HI_FTYPE_V8HI_SI_COUNT:
26802 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26803 case V4SI_FTYPE_V4SI_SI_COUNT:
26804 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26805 case V4HI_FTYPE_V4HI_SI_COUNT:
26806 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26807 case V2DI_FTYPE_V2DI_SI_COUNT:
26808 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26809 case V2SI_FTYPE_V2SI_SI_COUNT:
26810 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26811 case V1DI_FTYPE_V1DI_SI_COUNT:
26813 last_arg_count = true;
26815 case UINT64_FTYPE_UINT64_UINT64:
26816 case UINT_FTYPE_UINT_UINT:
26817 case UINT_FTYPE_UINT_USHORT:
26818 case UINT_FTYPE_UINT_UCHAR:
26819 case UINT16_FTYPE_UINT16_INT:
26820 case UINT8_FTYPE_UINT8_INT:
26823 case V2DI_FTYPE_V2DI_INT_CONVERT:
26826 nargs_constant = 1;
26828 case V8HI_FTYPE_V8HI_INT:
26829 case V8HI_FTYPE_V8SF_INT:
26830 case V8HI_FTYPE_V4SF_INT:
26831 case V8SF_FTYPE_V8SF_INT:
26832 case V4SI_FTYPE_V4SI_INT:
26833 case V4SI_FTYPE_V8SI_INT:
26834 case V4HI_FTYPE_V4HI_INT:
26835 case V4DF_FTYPE_V4DF_INT:
26836 case V4SF_FTYPE_V4SF_INT:
26837 case V4SF_FTYPE_V8SF_INT:
26838 case V2DI_FTYPE_V2DI_INT:
26839 case V2DF_FTYPE_V2DF_INT:
26840 case V2DF_FTYPE_V4DF_INT:
26842 nargs_constant = 1;
26844 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26845 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26846 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26847 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26848 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26851 case V16QI_FTYPE_V16QI_V16QI_INT:
26852 case V8HI_FTYPE_V8HI_V8HI_INT:
26853 case V8SI_FTYPE_V8SI_V8SI_INT:
26854 case V8SI_FTYPE_V8SI_V4SI_INT:
26855 case V8SF_FTYPE_V8SF_V8SF_INT:
26856 case V8SF_FTYPE_V8SF_V4SF_INT:
26857 case V4SI_FTYPE_V4SI_V4SI_INT:
26858 case V4DF_FTYPE_V4DF_V4DF_INT:
26859 case V4DF_FTYPE_V4DF_V2DF_INT:
26860 case V4SF_FTYPE_V4SF_V4SF_INT:
26861 case V2DI_FTYPE_V2DI_V2DI_INT:
26862 case V2DF_FTYPE_V2DF_V2DF_INT:
26864 nargs_constant = 1;
26866 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26869 nargs_constant = 1;
26871 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26874 nargs_constant = 1;
26876 case V2DI_FTYPE_V2DI_UINT_UINT:
26878 nargs_constant = 2;
26880 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26881 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26882 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26883 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26885 nargs_constant = 1;
26887 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26889 nargs_constant = 2;
26892 gcc_unreachable ();
26895 gcc_assert (nargs <= ARRAY_SIZE (args));
26897 if (comparison != UNKNOWN)
26899 gcc_assert (nargs == 2);
26900 return ix86_expand_sse_compare (d, exp, target, swap);
26903 if (rmode == VOIDmode || rmode == tmode)
26907 || GET_MODE (target) != tmode
26908 || !insn_p->operand[0].predicate (target, tmode))
26909 target = gen_reg_rtx (tmode);
26910 real_target = target;
26914 target = gen_reg_rtx (rmode);
26915 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26918 for (i = 0; i < nargs; i++)
26920 tree arg = CALL_EXPR_ARG (exp, i);
26921 rtx op = expand_normal (arg);
26922 enum machine_mode mode = insn_p->operand[i + 1].mode;
26923 bool match = insn_p->operand[i + 1].predicate (op, mode);
26925 if (last_arg_count && (i + 1) == nargs)
26927 /* SIMD shift insns take either an 8-bit immediate or
26928 register as count. But builtin functions take int as
26929 count. If count doesn't match, we put it in register. */
26932 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26933 if (!insn_p->operand[i + 1].predicate (op, mode))
26934 op = copy_to_reg (op);
26937 else if ((nargs - i) <= nargs_constant)
26942 case CODE_FOR_sse4_1_roundpd:
26943 case CODE_FOR_sse4_1_roundps:
26944 case CODE_FOR_sse4_1_roundsd:
26945 case CODE_FOR_sse4_1_roundss:
26946 case CODE_FOR_sse4_1_blendps:
26947 case CODE_FOR_avx_blendpd256:
26948 case CODE_FOR_avx_vpermilv4df:
26949 case CODE_FOR_avx_roundpd256:
26950 case CODE_FOR_avx_roundps256:
26951 error ("the last argument must be a 4-bit immediate");
26954 case CODE_FOR_sse4_1_blendpd:
26955 case CODE_FOR_avx_vpermilv2df:
26956 case CODE_FOR_xop_vpermil2v2df3:
26957 case CODE_FOR_xop_vpermil2v4sf3:
26958 case CODE_FOR_xop_vpermil2v4df3:
26959 case CODE_FOR_xop_vpermil2v8sf3:
26960 error ("the last argument must be a 2-bit immediate");
26963 case CODE_FOR_avx_vextractf128v4df:
26964 case CODE_FOR_avx_vextractf128v8sf:
26965 case CODE_FOR_avx_vextractf128v8si:
26966 case CODE_FOR_avx_vinsertf128v4df:
26967 case CODE_FOR_avx_vinsertf128v8sf:
26968 case CODE_FOR_avx_vinsertf128v8si:
26969 error ("the last argument must be a 1-bit immediate");
26972 case CODE_FOR_avx_vmcmpv2df3:
26973 case CODE_FOR_avx_vmcmpv4sf3:
26974 case CODE_FOR_avx_cmpv2df3:
26975 case CODE_FOR_avx_cmpv4sf3:
26976 case CODE_FOR_avx_cmpv4df3:
26977 case CODE_FOR_avx_cmpv8sf3:
26978 error ("the last argument must be a 5-bit immediate");
26982 switch (nargs_constant)
26985 if ((nargs - i) == nargs_constant)
26987 error ("the next to last argument must be an 8-bit immediate");
26991 error ("the last argument must be an 8-bit immediate");
26994 gcc_unreachable ();
27001 if (VECTOR_MODE_P (mode))
27002 op = safe_vector_operand (op, mode);
27004 /* If we aren't optimizing, only allow one memory operand to
27006 if (memory_operand (op, mode))
27009 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
27011 if (optimize || !match || num_memory > 1)
27012 op = copy_to_mode_reg (mode, op);
27016 op = copy_to_reg (op);
27017 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
27022 args[i].mode = mode;
27028 pat = GEN_FCN (icode) (real_target, args[0].op);
27031 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
27034 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
27038 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
27039 args[2].op, args[3].op);
27042 gcc_unreachable ();
27052 /* Subroutine of ix86_expand_builtin to take care of special insns
27053 with variable number of operands. */
27056 ix86_expand_special_args_builtin (const struct builtin_description *d,
27057 tree exp, rtx target)
27061 unsigned int i, nargs, arg_adjust, memory;
27065 enum machine_mode mode;
27067 enum insn_code icode = d->icode;
27068 bool last_arg_constant = false;
27069 const struct insn_data_d *insn_p = &insn_data[icode];
27070 enum machine_mode tmode = insn_p->operand[0].mode;
27071 enum { load, store } klass;
27073 switch ((enum ix86_builtin_func_type) d->flag)
27075 case VOID_FTYPE_VOID:
27076 if (icode == CODE_FOR_avx_vzeroupper)
27077 target = GEN_INT (vzeroupper_intrinsic);
27078 emit_insn (GEN_FCN (icode) (target));
27080 case VOID_FTYPE_UINT64:
27081 case VOID_FTYPE_UNSIGNED:
27087 case UINT64_FTYPE_VOID:
27088 case UNSIGNED_FTYPE_VOID:
27093 case UINT64_FTYPE_PUNSIGNED:
27094 case V2DI_FTYPE_PV2DI:
27095 case V32QI_FTYPE_PCCHAR:
27096 case V16QI_FTYPE_PCCHAR:
27097 case V8SF_FTYPE_PCV4SF:
27098 case V8SF_FTYPE_PCFLOAT:
27099 case V4SF_FTYPE_PCFLOAT:
27100 case V4DF_FTYPE_PCV2DF:
27101 case V4DF_FTYPE_PCDOUBLE:
27102 case V2DF_FTYPE_PCDOUBLE:
27103 case VOID_FTYPE_PVOID:
27108 case VOID_FTYPE_PV2SF_V4SF:
27109 case VOID_FTYPE_PV4DI_V4DI:
27110 case VOID_FTYPE_PV2DI_V2DI:
27111 case VOID_FTYPE_PCHAR_V32QI:
27112 case VOID_FTYPE_PCHAR_V16QI:
27113 case VOID_FTYPE_PFLOAT_V8SF:
27114 case VOID_FTYPE_PFLOAT_V4SF:
27115 case VOID_FTYPE_PDOUBLE_V4DF:
27116 case VOID_FTYPE_PDOUBLE_V2DF:
27117 case VOID_FTYPE_PULONGLONG_ULONGLONG:
27118 case VOID_FTYPE_PINT_INT:
27121 /* Reserve memory operand for target. */
27122 memory = ARRAY_SIZE (args);
27124 case V4SF_FTYPE_V4SF_PCV2SF:
27125 case V2DF_FTYPE_V2DF_PCDOUBLE:
27130 case V8SF_FTYPE_PCV8SF_V8SI:
27131 case V4DF_FTYPE_PCV4DF_V4DI:
27132 case V4SF_FTYPE_PCV4SF_V4SI:
27133 case V2DF_FTYPE_PCV2DF_V2DI:
27138 case VOID_FTYPE_PV8SF_V8SI_V8SF:
27139 case VOID_FTYPE_PV4DF_V4DI_V4DF:
27140 case VOID_FTYPE_PV4SF_V4SI_V4SF:
27141 case VOID_FTYPE_PV2DF_V2DI_V2DF:
27144 /* Reserve memory operand for target. */
27145 memory = ARRAY_SIZE (args);
27147 case VOID_FTYPE_UINT_UINT_UINT:
27148 case VOID_FTYPE_UINT64_UINT_UINT:
27149 case UCHAR_FTYPE_UINT_UINT_UINT:
27150 case UCHAR_FTYPE_UINT64_UINT_UINT:
27153 memory = ARRAY_SIZE (args);
27154 last_arg_constant = true;
27157 gcc_unreachable ();
27160 gcc_assert (nargs <= ARRAY_SIZE (args));
27162 if (klass == store)
27164 arg = CALL_EXPR_ARG (exp, 0);
27165 op = expand_normal (arg);
27166 gcc_assert (target == 0);
27168 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
27170 target = force_reg (tmode, op);
27178 || GET_MODE (target) != tmode
27179 || !insn_p->operand[0].predicate (target, tmode))
27180 target = gen_reg_rtx (tmode);
27183 for (i = 0; i < nargs; i++)
27185 enum machine_mode mode = insn_p->operand[i + 1].mode;
27188 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
27189 op = expand_normal (arg);
27190 match = insn_p->operand[i + 1].predicate (op, mode);
27192 if (last_arg_constant && (i + 1) == nargs)
27196 if (icode == CODE_FOR_lwp_lwpvalsi3
27197 || icode == CODE_FOR_lwp_lwpinssi3
27198 || icode == CODE_FOR_lwp_lwpvaldi3
27199 || icode == CODE_FOR_lwp_lwpinsdi3)
27200 error ("the last argument must be a 32-bit immediate");
27202 error ("the last argument must be an 8-bit immediate");
27210 /* This must be the memory operand. */
27211 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
27212 gcc_assert (GET_MODE (op) == mode
27213 || GET_MODE (op) == VOIDmode);
27217 /* This must be register. */
27218 if (VECTOR_MODE_P (mode))
27219 op = safe_vector_operand (op, mode);
27221 gcc_assert (GET_MODE (op) == mode
27222 || GET_MODE (op) == VOIDmode);
27223 op = copy_to_mode_reg (mode, op);
27228 args[i].mode = mode;
27234 pat = GEN_FCN (icode) (target);
27237 pat = GEN_FCN (icode) (target, args[0].op);
27240 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
27243 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
27246 gcc_unreachable ();
27252 return klass == store ? 0 : target;
27255 /* Return the integer constant in ARG. Constrain it to be in the range
27256 of the subparts of VEC_TYPE; issue an error if not. */
27259 get_element_number (tree vec_type, tree arg)
27261 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
27263 if (!host_integerp (arg, 1)
27264 || (elt = tree_low_cst (arg, 1), elt > max))
27266 error ("selector must be an integer constant in the range 0..%wi", max);
27273 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27274 ix86_expand_vector_init. We DO have language-level syntax for this, in
27275 the form of (type){ init-list }. Except that since we can't place emms
27276 instructions from inside the compiler, we can't allow the use of MMX
27277 registers unless the user explicitly asks for it. So we do *not* define
27278 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
27279 we have builtins invoked by mmintrin.h that gives us license to emit
27280 these sorts of instructions. */
27283 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
27285 enum machine_mode tmode = TYPE_MODE (type);
27286 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
27287 int i, n_elt = GET_MODE_NUNITS (tmode);
27288 rtvec v = rtvec_alloc (n_elt);
27290 gcc_assert (VECTOR_MODE_P (tmode));
27291 gcc_assert (call_expr_nargs (exp) == n_elt);
27293 for (i = 0; i < n_elt; ++i)
27295 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
27296 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
27299 if (!target || !register_operand (target, tmode))
27300 target = gen_reg_rtx (tmode);
27302 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
27306 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27307 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
27308 had a language-level syntax for referencing vector elements. */
27311 ix86_expand_vec_ext_builtin (tree exp, rtx target)
27313 enum machine_mode tmode, mode0;
27318 arg0 = CALL_EXPR_ARG (exp, 0);
27319 arg1 = CALL_EXPR_ARG (exp, 1);
27321 op0 = expand_normal (arg0);
27322 elt = get_element_number (TREE_TYPE (arg0), arg1);
27324 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27325 mode0 = TYPE_MODE (TREE_TYPE (arg0));
27326 gcc_assert (VECTOR_MODE_P (mode0));
27328 op0 = force_reg (mode0, op0);
27330 if (optimize || !target || !register_operand (target, tmode))
27331 target = gen_reg_rtx (tmode);
27333 ix86_expand_vector_extract (true, target, op0, elt);
27338 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27339 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
27340 a language-level syntax for referencing vector elements. */
27343 ix86_expand_vec_set_builtin (tree exp)
27345 enum machine_mode tmode, mode1;
27346 tree arg0, arg1, arg2;
27348 rtx op0, op1, target;
27350 arg0 = CALL_EXPR_ARG (exp, 0);
27351 arg1 = CALL_EXPR_ARG (exp, 1);
27352 arg2 = CALL_EXPR_ARG (exp, 2);
27354 tmode = TYPE_MODE (TREE_TYPE (arg0));
27355 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27356 gcc_assert (VECTOR_MODE_P (tmode));
27358 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
27359 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
27360 elt = get_element_number (TREE_TYPE (arg0), arg2);
27362 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
27363 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
27365 op0 = force_reg (tmode, op0);
27366 op1 = force_reg (mode1, op1);
27368 /* OP0 is the source of these builtin functions and shouldn't be
27369 modified. Create a copy, use it and return it as target. */
27370 target = gen_reg_rtx (tmode);
27371 emit_move_insn (target, op0);
27372 ix86_expand_vector_set (true, target, op1, elt);
27377 /* Expand an expression EXP that calls a built-in function,
27378 with result going to TARGET if that's convenient
27379 (and in mode MODE if that's convenient).
27380 SUBTARGET may be used as the target for computing one of EXP's operands.
27381 IGNORE is nonzero if the value is to be ignored. */
27384 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
27385 enum machine_mode mode ATTRIBUTE_UNUSED,
27386 int ignore ATTRIBUTE_UNUSED)
27388 const struct builtin_description *d;
27390 enum insn_code icode;
27391 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
27392 tree arg0, arg1, arg2;
27393 rtx op0, op1, op2, pat;
27394 enum machine_mode mode0, mode1, mode2;
27395 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
27397 /* Determine whether the builtin function is available under the current ISA.
27398 Originally the builtin was not created if it wasn't applicable to the
27399 current ISA based on the command line switches. With function specific
27400 options, we need to check in the context of the function making the call
27401 whether it is supported. */
27402 if (ix86_builtins_isa[fcode].isa
27403 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
27405 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
27406 NULL, (enum fpmath_unit) 0, false);
27409 error ("%qE needs unknown isa option", fndecl);
27412 gcc_assert (opts != NULL);
27413 error ("%qE needs isa option %s", fndecl, opts);
27421 case IX86_BUILTIN_MASKMOVQ:
27422 case IX86_BUILTIN_MASKMOVDQU:
27423 icode = (fcode == IX86_BUILTIN_MASKMOVQ
27424 ? CODE_FOR_mmx_maskmovq
27425 : CODE_FOR_sse2_maskmovdqu);
27426 /* Note the arg order is different from the operand order. */
27427 arg1 = CALL_EXPR_ARG (exp, 0);
27428 arg2 = CALL_EXPR_ARG (exp, 1);
27429 arg0 = CALL_EXPR_ARG (exp, 2);
27430 op0 = expand_normal (arg0);
27431 op1 = expand_normal (arg1);
27432 op2 = expand_normal (arg2);
27433 mode0 = insn_data[icode].operand[0].mode;
27434 mode1 = insn_data[icode].operand[1].mode;
27435 mode2 = insn_data[icode].operand[2].mode;
27437 op0 = force_reg (Pmode, op0);
27438 op0 = gen_rtx_MEM (mode1, op0);
27440 if (!insn_data[icode].operand[0].predicate (op0, mode0))
27441 op0 = copy_to_mode_reg (mode0, op0);
27442 if (!insn_data[icode].operand[1].predicate (op1, mode1))
27443 op1 = copy_to_mode_reg (mode1, op1);
27444 if (!insn_data[icode].operand[2].predicate (op2, mode2))
27445 op2 = copy_to_mode_reg (mode2, op2);
27446 pat = GEN_FCN (icode) (op0, op1, op2);
27452 case IX86_BUILTIN_LDMXCSR:
27453 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
27454 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27455 emit_move_insn (target, op0);
27456 emit_insn (gen_sse_ldmxcsr (target));
27459 case IX86_BUILTIN_STMXCSR:
27460 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27461 emit_insn (gen_sse_stmxcsr (target));
27462 return copy_to_mode_reg (SImode, target);
27464 case IX86_BUILTIN_CLFLUSH:
27465 arg0 = CALL_EXPR_ARG (exp, 0);
27466 op0 = expand_normal (arg0);
27467 icode = CODE_FOR_sse2_clflush;
27468 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27469 op0 = copy_to_mode_reg (Pmode, op0);
27471 emit_insn (gen_sse2_clflush (op0));
27474 case IX86_BUILTIN_MONITOR:
27475 arg0 = CALL_EXPR_ARG (exp, 0);
27476 arg1 = CALL_EXPR_ARG (exp, 1);
27477 arg2 = CALL_EXPR_ARG (exp, 2);
27478 op0 = expand_normal (arg0);
27479 op1 = expand_normal (arg1);
27480 op2 = expand_normal (arg2);
27482 op0 = copy_to_mode_reg (Pmode, op0);
27484 op1 = copy_to_mode_reg (SImode, op1);
27486 op2 = copy_to_mode_reg (SImode, op2);
27487 emit_insn (ix86_gen_monitor (op0, op1, op2));
27490 case IX86_BUILTIN_MWAIT:
27491 arg0 = CALL_EXPR_ARG (exp, 0);
27492 arg1 = CALL_EXPR_ARG (exp, 1);
27493 op0 = expand_normal (arg0);
27494 op1 = expand_normal (arg1);
27496 op0 = copy_to_mode_reg (SImode, op0);
27498 op1 = copy_to_mode_reg (SImode, op1);
27499 emit_insn (gen_sse3_mwait (op0, op1));
27502 case IX86_BUILTIN_VEC_INIT_V2SI:
27503 case IX86_BUILTIN_VEC_INIT_V4HI:
27504 case IX86_BUILTIN_VEC_INIT_V8QI:
27505 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
27507 case IX86_BUILTIN_VEC_EXT_V2DF:
27508 case IX86_BUILTIN_VEC_EXT_V2DI:
27509 case IX86_BUILTIN_VEC_EXT_V4SF:
27510 case IX86_BUILTIN_VEC_EXT_V4SI:
27511 case IX86_BUILTIN_VEC_EXT_V8HI:
27512 case IX86_BUILTIN_VEC_EXT_V2SI:
27513 case IX86_BUILTIN_VEC_EXT_V4HI:
27514 case IX86_BUILTIN_VEC_EXT_V16QI:
27515 return ix86_expand_vec_ext_builtin (exp, target);
27517 case IX86_BUILTIN_VEC_SET_V2DI:
27518 case IX86_BUILTIN_VEC_SET_V4SF:
27519 case IX86_BUILTIN_VEC_SET_V4SI:
27520 case IX86_BUILTIN_VEC_SET_V8HI:
27521 case IX86_BUILTIN_VEC_SET_V4HI:
27522 case IX86_BUILTIN_VEC_SET_V16QI:
27523 return ix86_expand_vec_set_builtin (exp);
27525 case IX86_BUILTIN_VEC_PERM_V2DF:
27526 case IX86_BUILTIN_VEC_PERM_V4SF:
27527 case IX86_BUILTIN_VEC_PERM_V2DI:
27528 case IX86_BUILTIN_VEC_PERM_V4SI:
27529 case IX86_BUILTIN_VEC_PERM_V8HI:
27530 case IX86_BUILTIN_VEC_PERM_V16QI:
27531 case IX86_BUILTIN_VEC_PERM_V2DI_U:
27532 case IX86_BUILTIN_VEC_PERM_V4SI_U:
27533 case IX86_BUILTIN_VEC_PERM_V8HI_U:
27534 case IX86_BUILTIN_VEC_PERM_V16QI_U:
27535 case IX86_BUILTIN_VEC_PERM_V4DF:
27536 case IX86_BUILTIN_VEC_PERM_V8SF:
27537 return ix86_expand_vec_perm_builtin (exp);
27539 case IX86_BUILTIN_INFQ:
27540 case IX86_BUILTIN_HUGE_VALQ:
27542 REAL_VALUE_TYPE inf;
27546 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
27548 tmp = validize_mem (force_const_mem (mode, tmp));
27551 target = gen_reg_rtx (mode);
27553 emit_move_insn (target, tmp);
27557 case IX86_BUILTIN_LLWPCB:
27558 arg0 = CALL_EXPR_ARG (exp, 0);
27559 op0 = expand_normal (arg0);
27560 icode = CODE_FOR_lwp_llwpcb;
27561 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27562 op0 = copy_to_mode_reg (Pmode, op0);
27563 emit_insn (gen_lwp_llwpcb (op0));
27566 case IX86_BUILTIN_SLWPCB:
27567 icode = CODE_FOR_lwp_slwpcb;
27569 || !insn_data[icode].operand[0].predicate (target, Pmode))
27570 target = gen_reg_rtx (Pmode);
27571 emit_insn (gen_lwp_slwpcb (target));
27574 case IX86_BUILTIN_BEXTRI32:
27575 case IX86_BUILTIN_BEXTRI64:
27576 arg0 = CALL_EXPR_ARG (exp, 0);
27577 arg1 = CALL_EXPR_ARG (exp, 1);
27578 op0 = expand_normal (arg0);
27579 op1 = expand_normal (arg1);
27580 icode = (fcode == IX86_BUILTIN_BEXTRI32
27581 ? CODE_FOR_tbm_bextri_si
27582 : CODE_FOR_tbm_bextri_di);
27583 if (!CONST_INT_P (op1))
27585 error ("last argument must be an immediate");
27590 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
27591 unsigned char lsb_index = INTVAL (op1) & 0xFF;
27592 op1 = GEN_INT (length);
27593 op2 = GEN_INT (lsb_index);
27594 pat = GEN_FCN (icode) (target, op0, op1, op2);
27600 case IX86_BUILTIN_RDRAND16_STEP:
27601 icode = CODE_FOR_rdrandhi_1;
27605 case IX86_BUILTIN_RDRAND32_STEP:
27606 icode = CODE_FOR_rdrandsi_1;
27610 case IX86_BUILTIN_RDRAND64_STEP:
27611 icode = CODE_FOR_rdranddi_1;
27615 op0 = gen_reg_rtx (mode0);
27616 emit_insn (GEN_FCN (icode) (op0));
27618 arg0 = CALL_EXPR_ARG (exp, 0);
27619 op1 = expand_normal (arg0);
27620 if (!address_operand (op1, VOIDmode))
27621 op1 = copy_addr_to_reg (op1);
27622 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
27624 op1 = gen_reg_rtx (SImode);
27625 emit_move_insn (op1, CONST1_RTX (SImode));
27627 /* Emit SImode conditional move. */
27628 if (mode0 == HImode)
27630 op2 = gen_reg_rtx (SImode);
27631 emit_insn (gen_zero_extendhisi2 (op2, op0));
27633 else if (mode0 == SImode)
27636 op2 = gen_rtx_SUBREG (SImode, op0, 0);
27639 target = gen_reg_rtx (SImode);
27641 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
27643 emit_insn (gen_rtx_SET (VOIDmode, target,
27644 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
27651 for (i = 0, d = bdesc_special_args;
27652 i < ARRAY_SIZE (bdesc_special_args);
27654 if (d->code == fcode)
27655 return ix86_expand_special_args_builtin (d, exp, target);
27657 for (i = 0, d = bdesc_args;
27658 i < ARRAY_SIZE (bdesc_args);
27660 if (d->code == fcode)
27663 case IX86_BUILTIN_FABSQ:
27664 case IX86_BUILTIN_COPYSIGNQ:
27666 /* Emit a normal call if SSE2 isn't available. */
27667 return expand_call (exp, target, ignore);
27669 return ix86_expand_args_builtin (d, exp, target);
27672 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27673 if (d->code == fcode)
27674 return ix86_expand_sse_comi (d, exp, target);
27676 for (i = 0, d = bdesc_pcmpestr;
27677 i < ARRAY_SIZE (bdesc_pcmpestr);
27679 if (d->code == fcode)
27680 return ix86_expand_sse_pcmpestr (d, exp, target);
27682 for (i = 0, d = bdesc_pcmpistr;
27683 i < ARRAY_SIZE (bdesc_pcmpistr);
27685 if (d->code == fcode)
27686 return ix86_expand_sse_pcmpistr (d, exp, target);
27688 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27689 if (d->code == fcode)
27690 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27691 (enum ix86_builtin_func_type)
27692 d->flag, d->comparison);
27694 gcc_unreachable ();
27697 /* Returns a function decl for a vectorized version of the builtin function
27698 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27699 if it is not available. */
27702 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27705 enum machine_mode in_mode, out_mode;
27707 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27709 if (TREE_CODE (type_out) != VECTOR_TYPE
27710 || TREE_CODE (type_in) != VECTOR_TYPE
27711 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27714 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27715 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27716 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27717 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27721 case BUILT_IN_SQRT:
27722 if (out_mode == DFmode && in_mode == DFmode)
27724 if (out_n == 2 && in_n == 2)
27725 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27726 else if (out_n == 4 && in_n == 4)
27727 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27731 case BUILT_IN_SQRTF:
27732 if (out_mode == SFmode && in_mode == SFmode)
27734 if (out_n == 4 && in_n == 4)
27735 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27736 else if (out_n == 8 && in_n == 8)
27737 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27741 case BUILT_IN_LRINT:
27742 if (out_mode == SImode && out_n == 4
27743 && in_mode == DFmode && in_n == 2)
27744 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27747 case BUILT_IN_LRINTF:
27748 if (out_mode == SImode && in_mode == SFmode)
27750 if (out_n == 4 && in_n == 4)
27751 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27752 else if (out_n == 8 && in_n == 8)
27753 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27757 case BUILT_IN_COPYSIGN:
27758 if (out_mode == DFmode && in_mode == DFmode)
27760 if (out_n == 2 && in_n == 2)
27761 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27762 else if (out_n == 4 && in_n == 4)
27763 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27767 case BUILT_IN_COPYSIGNF:
27768 if (out_mode == SFmode && in_mode == SFmode)
27770 if (out_n == 4 && in_n == 4)
27771 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27772 else if (out_n == 8 && in_n == 8)
27773 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27777 case BUILT_IN_FLOOR:
27778 /* The round insn does not trap on denormals. */
27779 if (flag_trapping_math || !TARGET_ROUND)
27782 if (out_mode == DFmode && in_mode == DFmode)
27784 if (out_n == 2 && in_n == 2)
27785 return ix86_builtins[IX86_BUILTIN_FLOORPD];
27786 else if (out_n == 4 && in_n == 4)
27787 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
27791 case BUILT_IN_FLOORF:
27792 /* The round insn does not trap on denormals. */
27793 if (flag_trapping_math || !TARGET_ROUND)
27796 if (out_mode == SFmode && in_mode == SFmode)
27798 if (out_n == 4 && in_n == 4)
27799 return ix86_builtins[IX86_BUILTIN_FLOORPS];
27800 else if (out_n == 8 && in_n == 8)
27801 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
27805 case BUILT_IN_CEIL:
27806 /* The round insn does not trap on denormals. */
27807 if (flag_trapping_math || !TARGET_ROUND)
27810 if (out_mode == DFmode && in_mode == DFmode)
27812 if (out_n == 2 && in_n == 2)
27813 return ix86_builtins[IX86_BUILTIN_CEILPD];
27814 else if (out_n == 4 && in_n == 4)
27815 return ix86_builtins[IX86_BUILTIN_CEILPD256];
27819 case BUILT_IN_CEILF:
27820 /* The round insn does not trap on denormals. */
27821 if (flag_trapping_math || !TARGET_ROUND)
27824 if (out_mode == SFmode && in_mode == SFmode)
27826 if (out_n == 4 && in_n == 4)
27827 return ix86_builtins[IX86_BUILTIN_CEILPS];
27828 else if (out_n == 8 && in_n == 8)
27829 return ix86_builtins[IX86_BUILTIN_CEILPS256];
27833 case BUILT_IN_TRUNC:
27834 /* The round insn does not trap on denormals. */
27835 if (flag_trapping_math || !TARGET_ROUND)
27838 if (out_mode == DFmode && in_mode == DFmode)
27840 if (out_n == 2 && in_n == 2)
27841 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
27842 else if (out_n == 4 && in_n == 4)
27843 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
27847 case BUILT_IN_TRUNCF:
27848 /* The round insn does not trap on denormals. */
27849 if (flag_trapping_math || !TARGET_ROUND)
27852 if (out_mode == SFmode && in_mode == SFmode)
27854 if (out_n == 4 && in_n == 4)
27855 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
27856 else if (out_n == 8 && in_n == 8)
27857 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
27861 case BUILT_IN_RINT:
27862 /* The round insn does not trap on denormals. */
27863 if (flag_trapping_math || !TARGET_ROUND)
27866 if (out_mode == DFmode && in_mode == DFmode)
27868 if (out_n == 2 && in_n == 2)
27869 return ix86_builtins[IX86_BUILTIN_RINTPD];
27870 else if (out_n == 4 && in_n == 4)
27871 return ix86_builtins[IX86_BUILTIN_RINTPD256];
27875 case BUILT_IN_RINTF:
27876 /* The round insn does not trap on denormals. */
27877 if (flag_trapping_math || !TARGET_ROUND)
27880 if (out_mode == SFmode && in_mode == SFmode)
27882 if (out_n == 4 && in_n == 4)
27883 return ix86_builtins[IX86_BUILTIN_RINTPS];
27884 else if (out_n == 8 && in_n == 8)
27885 return ix86_builtins[IX86_BUILTIN_RINTPS256];
27890 if (out_mode == DFmode && in_mode == DFmode)
27892 if (out_n == 2 && in_n == 2)
27893 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27894 if (out_n == 4 && in_n == 4)
27895 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27899 case BUILT_IN_FMAF:
27900 if (out_mode == SFmode && in_mode == SFmode)
27902 if (out_n == 4 && in_n == 4)
27903 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27904 if (out_n == 8 && in_n == 8)
27905 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27913 /* Dispatch to a handler for a vectorization library. */
27914 if (ix86_veclib_handler)
27915 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27921 /* Handler for an SVML-style interface to
27922 a library with vectorized intrinsics. */
27925 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27928 tree fntype, new_fndecl, args;
27931 enum machine_mode el_mode, in_mode;
27934 /* The SVML is suitable for unsafe math only. */
27935 if (!flag_unsafe_math_optimizations)
27938 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27939 n = TYPE_VECTOR_SUBPARTS (type_out);
27940 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27941 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27942 if (el_mode != in_mode
27950 case BUILT_IN_LOG10:
27952 case BUILT_IN_TANH:
27954 case BUILT_IN_ATAN:
27955 case BUILT_IN_ATAN2:
27956 case BUILT_IN_ATANH:
27957 case BUILT_IN_CBRT:
27958 case BUILT_IN_SINH:
27960 case BUILT_IN_ASINH:
27961 case BUILT_IN_ASIN:
27962 case BUILT_IN_COSH:
27964 case BUILT_IN_ACOSH:
27965 case BUILT_IN_ACOS:
27966 if (el_mode != DFmode || n != 2)
27970 case BUILT_IN_EXPF:
27971 case BUILT_IN_LOGF:
27972 case BUILT_IN_LOG10F:
27973 case BUILT_IN_POWF:
27974 case BUILT_IN_TANHF:
27975 case BUILT_IN_TANF:
27976 case BUILT_IN_ATANF:
27977 case BUILT_IN_ATAN2F:
27978 case BUILT_IN_ATANHF:
27979 case BUILT_IN_CBRTF:
27980 case BUILT_IN_SINHF:
27981 case BUILT_IN_SINF:
27982 case BUILT_IN_ASINHF:
27983 case BUILT_IN_ASINF:
27984 case BUILT_IN_COSHF:
27985 case BUILT_IN_COSF:
27986 case BUILT_IN_ACOSHF:
27987 case BUILT_IN_ACOSF:
27988 if (el_mode != SFmode || n != 4)
27996 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27998 if (fn == BUILT_IN_LOGF)
27999 strcpy (name, "vmlsLn4");
28000 else if (fn == BUILT_IN_LOG)
28001 strcpy (name, "vmldLn2");
28004 sprintf (name, "vmls%s", bname+10);
28005 name[strlen (name)-1] = '4';
28008 sprintf (name, "vmld%s2", bname+10);
28010 /* Convert to uppercase. */
28014 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
28015 args = TREE_CHAIN (args))
28019 fntype = build_function_type_list (type_out, type_in, NULL);
28021 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
28023 /* Build a function declaration for the vectorized function. */
28024 new_fndecl = build_decl (BUILTINS_LOCATION,
28025 FUNCTION_DECL, get_identifier (name), fntype);
28026 TREE_PUBLIC (new_fndecl) = 1;
28027 DECL_EXTERNAL (new_fndecl) = 1;
28028 DECL_IS_NOVOPS (new_fndecl) = 1;
28029 TREE_READONLY (new_fndecl) = 1;
28034 /* Handler for an ACML-style interface to
28035 a library with vectorized intrinsics. */
28038 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
28040 char name[20] = "__vr.._";
28041 tree fntype, new_fndecl, args;
28044 enum machine_mode el_mode, in_mode;
28047 /* The ACML is 64bits only and suitable for unsafe math only as
28048 it does not correctly support parts of IEEE with the required
28049 precision such as denormals. */
28051 || !flag_unsafe_math_optimizations)
28054 el_mode = TYPE_MODE (TREE_TYPE (type_out));
28055 n = TYPE_VECTOR_SUBPARTS (type_out);
28056 in_mode = TYPE_MODE (TREE_TYPE (type_in));
28057 in_n = TYPE_VECTOR_SUBPARTS (type_in);
28058 if (el_mode != in_mode
28068 case BUILT_IN_LOG2:
28069 case BUILT_IN_LOG10:
28072 if (el_mode != DFmode
28077 case BUILT_IN_SINF:
28078 case BUILT_IN_COSF:
28079 case BUILT_IN_EXPF:
28080 case BUILT_IN_POWF:
28081 case BUILT_IN_LOGF:
28082 case BUILT_IN_LOG2F:
28083 case BUILT_IN_LOG10F:
28086 if (el_mode != SFmode
28095 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
28096 sprintf (name + 7, "%s", bname+10);
28099 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
28100 args = TREE_CHAIN (args))
28104 fntype = build_function_type_list (type_out, type_in, NULL);
28106 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
28108 /* Build a function declaration for the vectorized function. */
28109 new_fndecl = build_decl (BUILTINS_LOCATION,
28110 FUNCTION_DECL, get_identifier (name), fntype);
28111 TREE_PUBLIC (new_fndecl) = 1;
28112 DECL_EXTERNAL (new_fndecl) = 1;
28113 DECL_IS_NOVOPS (new_fndecl) = 1;
28114 TREE_READONLY (new_fndecl) = 1;
28120 /* Returns a decl of a function that implements conversion of an integer vector
28121 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
28122 are the types involved when converting according to CODE.
28123 Return NULL_TREE if it is not available. */
28126 ix86_vectorize_builtin_conversion (unsigned int code,
28127 tree dest_type, tree src_type)
28135 switch (TYPE_MODE (src_type))
28138 switch (TYPE_MODE (dest_type))
28141 return (TYPE_UNSIGNED (src_type)
28142 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
28143 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
28145 return (TYPE_UNSIGNED (src_type)
28147 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
28153 switch (TYPE_MODE (dest_type))
28156 return (TYPE_UNSIGNED (src_type)
28158 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
28167 case FIX_TRUNC_EXPR:
28168 switch (TYPE_MODE (dest_type))
28171 switch (TYPE_MODE (src_type))
28174 return (TYPE_UNSIGNED (dest_type)
28176 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
28178 return (TYPE_UNSIGNED (dest_type)
28180 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
28187 switch (TYPE_MODE (src_type))
28190 return (TYPE_UNSIGNED (dest_type)
28192 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
28209 /* Returns a code for a target-specific builtin that implements
28210 reciprocal of the function, or NULL_TREE if not available. */
28213 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
28214 bool sqrt ATTRIBUTE_UNUSED)
28216 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
28217 && flag_finite_math_only && !flag_trapping_math
28218 && flag_unsafe_math_optimizations))
28222 /* Machine dependent builtins. */
28225 /* Vectorized version of sqrt to rsqrt conversion. */
28226 case IX86_BUILTIN_SQRTPS_NR:
28227 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
28229 case IX86_BUILTIN_SQRTPS_NR256:
28230 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
28236 /* Normal builtins. */
28239 /* Sqrt to rsqrt conversion. */
28240 case BUILT_IN_SQRTF:
28241 return ix86_builtins[IX86_BUILTIN_RSQRTF];
28248 /* Helper for avx_vpermilps256_operand et al. This is also used by
28249 the expansion functions to turn the parallel back into a mask.
28250 The return value is 0 for no match and the imm8+1 for a match. */
28253 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
28255 unsigned i, nelt = GET_MODE_NUNITS (mode);
28257 unsigned char ipar[8];
28259 if (XVECLEN (par, 0) != (int) nelt)
28262 /* Validate that all of the elements are constants, and not totally
28263 out of range. Copy the data into an integral array to make the
28264 subsequent checks easier. */
28265 for (i = 0; i < nelt; ++i)
28267 rtx er = XVECEXP (par, 0, i);
28268 unsigned HOST_WIDE_INT ei;
28270 if (!CONST_INT_P (er))
28281 /* In the 256-bit DFmode case, we can only move elements within
28283 for (i = 0; i < 2; ++i)
28287 mask |= ipar[i] << i;
28289 for (i = 2; i < 4; ++i)
28293 mask |= (ipar[i] - 2) << i;
28298 /* In the 256-bit SFmode case, we have full freedom of movement
28299 within the low 128-bit lane, but the high 128-bit lane must
28300 mirror the exact same pattern. */
28301 for (i = 0; i < 4; ++i)
28302 if (ipar[i] + 4 != ipar[i + 4])
28309 /* In the 128-bit case, we've full freedom in the placement of
28310 the elements from the source operand. */
28311 for (i = 0; i < nelt; ++i)
28312 mask |= ipar[i] << (i * (nelt / 2));
28316 gcc_unreachable ();
28319 /* Make sure success has a non-zero value by adding one. */
28323 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
28324 the expansion functions to turn the parallel back into a mask.
28325 The return value is 0 for no match and the imm8+1 for a match. */
28328 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
28330 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
28332 unsigned char ipar[8];
28334 if (XVECLEN (par, 0) != (int) nelt)
28337 /* Validate that all of the elements are constants, and not totally
28338 out of range. Copy the data into an integral array to make the
28339 subsequent checks easier. */
28340 for (i = 0; i < nelt; ++i)
28342 rtx er = XVECEXP (par, 0, i);
28343 unsigned HOST_WIDE_INT ei;
28345 if (!CONST_INT_P (er))
28348 if (ei >= 2 * nelt)
28353 /* Validate that the halves of the permute are halves. */
28354 for (i = 0; i < nelt2 - 1; ++i)
28355 if (ipar[i] + 1 != ipar[i + 1])
28357 for (i = nelt2; i < nelt - 1; ++i)
28358 if (ipar[i] + 1 != ipar[i + 1])
28361 /* Reconstruct the mask. */
28362 for (i = 0; i < 2; ++i)
28364 unsigned e = ipar[i * nelt2];
28368 mask |= e << (i * 4);
28371 /* Make sure success has a non-zero value by adding one. */
28376 /* Store OPERAND to the memory after reload is completed. This means
28377 that we can't easily use assign_stack_local. */
28379 ix86_force_to_memory (enum machine_mode mode, rtx operand)
28383 gcc_assert (reload_completed);
28384 if (ix86_using_red_zone ())
28386 result = gen_rtx_MEM (mode,
28387 gen_rtx_PLUS (Pmode,
28389 GEN_INT (-RED_ZONE_SIZE)));
28390 emit_move_insn (result, operand);
28392 else if (TARGET_64BIT)
28398 operand = gen_lowpart (DImode, operand);
28402 gen_rtx_SET (VOIDmode,
28403 gen_rtx_MEM (DImode,
28404 gen_rtx_PRE_DEC (DImode,
28405 stack_pointer_rtx)),
28409 gcc_unreachable ();
28411 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28420 split_double_mode (mode, &operand, 1, operands, operands + 1);
28422 gen_rtx_SET (VOIDmode,
28423 gen_rtx_MEM (SImode,
28424 gen_rtx_PRE_DEC (Pmode,
28425 stack_pointer_rtx)),
28428 gen_rtx_SET (VOIDmode,
28429 gen_rtx_MEM (SImode,
28430 gen_rtx_PRE_DEC (Pmode,
28431 stack_pointer_rtx)),
28436 /* Store HImodes as SImodes. */
28437 operand = gen_lowpart (SImode, operand);
28441 gen_rtx_SET (VOIDmode,
28442 gen_rtx_MEM (GET_MODE (operand),
28443 gen_rtx_PRE_DEC (SImode,
28444 stack_pointer_rtx)),
28448 gcc_unreachable ();
28450 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28455 /* Free operand from the memory. */
28457 ix86_free_from_memory (enum machine_mode mode)
28459 if (!ix86_using_red_zone ())
28463 if (mode == DImode || TARGET_64BIT)
28467 /* Use LEA to deallocate stack space. In peephole2 it will be converted
28468 to pop or add instruction if registers are available. */
28469 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
28470 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28475 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
28477 Put float CONST_DOUBLE in the constant pool instead of fp regs.
28478 QImode must go into class Q_REGS.
28479 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
28480 movdf to do mem-to-mem moves through integer regs. */
28483 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
28485 enum machine_mode mode = GET_MODE (x);
28487 /* We're only allowed to return a subclass of CLASS. Many of the
28488 following checks fail for NO_REGS, so eliminate that early. */
28489 if (regclass == NO_REGS)
28492 /* All classes can load zeros. */
28493 if (x == CONST0_RTX (mode))
28496 /* Force constants into memory if we are loading a (nonzero) constant into
28497 an MMX or SSE register. This is because there are no MMX/SSE instructions
28498 to load from a constant. */
28500 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
28503 /* Prefer SSE regs only, if we can use them for math. */
28504 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
28505 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
28507 /* Floating-point constants need more complex checks. */
28508 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
28510 /* General regs can load everything. */
28511 if (reg_class_subset_p (regclass, GENERAL_REGS))
28514 /* Floats can load 0 and 1 plus some others. Note that we eliminated
28515 zero above. We only want to wind up preferring 80387 registers if
28516 we plan on doing computation with them. */
28518 && standard_80387_constant_p (x) > 0)
28520 /* Limit class to non-sse. */
28521 if (regclass == FLOAT_SSE_REGS)
28523 if (regclass == FP_TOP_SSE_REGS)
28525 if (regclass == FP_SECOND_SSE_REGS)
28526 return FP_SECOND_REG;
28527 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
28534 /* Generally when we see PLUS here, it's the function invariant
28535 (plus soft-fp const_int). Which can only be computed into general
28537 if (GET_CODE (x) == PLUS)
28538 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
28540 /* QImode constants are easy to load, but non-constant QImode data
28541 must go into Q_REGS. */
28542 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
28544 if (reg_class_subset_p (regclass, Q_REGS))
28546 if (reg_class_subset_p (Q_REGS, regclass))
28554 /* Discourage putting floating-point values in SSE registers unless
28555 SSE math is being used, and likewise for the 387 registers. */
28557 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
28559 enum machine_mode mode = GET_MODE (x);
28561 /* Restrict the output reload class to the register bank that we are doing
28562 math on. If we would like not to return a subset of CLASS, reject this
28563 alternative: if reload cannot do this, it will still use its choice. */
28564 mode = GET_MODE (x);
28565 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
28566 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
28568 if (X87_FLOAT_MODE_P (mode))
28570 if (regclass == FP_TOP_SSE_REGS)
28572 else if (regclass == FP_SECOND_SSE_REGS)
28573 return FP_SECOND_REG;
28575 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
28582 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
28583 enum machine_mode mode,
28584 secondary_reload_info *sri ATTRIBUTE_UNUSED)
28586 /* QImode spills from non-QI registers require
28587 intermediate register on 32bit targets. */
28589 && !in_p && mode == QImode
28590 && (rclass == GENERAL_REGS
28591 || rclass == LEGACY_REGS
28592 || rclass == INDEX_REGS))
28601 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28602 regno = true_regnum (x);
28604 /* Return Q_REGS if the operand is in memory. */
28609 /* This condition handles corner case where an expression involving
28610 pointers gets vectorized. We're trying to use the address of a
28611 stack slot as a vector initializer.
28613 (set (reg:V2DI 74 [ vect_cst_.2 ])
28614 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
28616 Eventually frame gets turned into sp+offset like this:
28618 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28619 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28620 (const_int 392 [0x188]))))
28622 That later gets turned into:
28624 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28625 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28626 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
28628 We'll have the following reload recorded:
28630 Reload 0: reload_in (DI) =
28631 (plus:DI (reg/f:DI 7 sp)
28632 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
28633 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28634 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
28635 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
28636 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28637 reload_reg_rtx: (reg:V2DI 22 xmm1)
28639 Which isn't going to work since SSE instructions can't handle scalar
28640 additions. Returning GENERAL_REGS forces the addition into integer
28641 register and reload can handle subsequent reloads without problems. */
28643 if (in_p && GET_CODE (x) == PLUS
28644 && SSE_CLASS_P (rclass)
28645 && SCALAR_INT_MODE_P (mode))
28646 return GENERAL_REGS;
28651 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28654 ix86_class_likely_spilled_p (reg_class_t rclass)
28665 case SSE_FIRST_REG:
28667 case FP_SECOND_REG:
28677 /* If we are copying between general and FP registers, we need a memory
28678 location. The same is true for SSE and MMX registers.
28680 To optimize register_move_cost performance, allow inline variant.
28682 The macro can't work reliably when one of the CLASSES is class containing
28683 registers from multiple units (SSE, MMX, integer). We avoid this by never
28684 combining those units in single alternative in the machine description.
28685 Ensure that this constraint holds to avoid unexpected surprises.
28687 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28688 enforce these sanity checks. */
28691 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28692 enum machine_mode mode, int strict)
28694 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
28695 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
28696 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
28697 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
28698 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
28699 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
28701 gcc_assert (!strict);
28705 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
28708 /* ??? This is a lie. We do have moves between mmx/general, and for
28709 mmx/sse2. But by saying we need secondary memory we discourage the
28710 register allocator from using the mmx registers unless needed. */
28711 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
28714 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28716 /* SSE1 doesn't have any direct moves from other classes. */
28720 /* If the target says that inter-unit moves are more expensive
28721 than moving through memory, then don't generate them. */
28722 if (!TARGET_INTER_UNIT_MOVES)
28725 /* Between SSE and general, we have moves no larger than word size. */
28726 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
28734 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28735 enum machine_mode mode, int strict)
28737 return inline_secondary_memory_needed (class1, class2, mode, strict);
28740 /* Return true if the registers in CLASS cannot represent the change from
28741 modes FROM to TO. */
28744 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
28745 enum reg_class regclass)
28750 /* x87 registers can't do subreg at all, as all values are reformatted
28751 to extended precision. */
28752 if (MAYBE_FLOAT_CLASS_P (regclass))
28755 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
28757 /* Vector registers do not support QI or HImode loads. If we don't
28758 disallow a change to these modes, reload will assume it's ok to
28759 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28760 the vec_dupv4hi pattern. */
28761 if (GET_MODE_SIZE (from) < 4)
28764 /* Vector registers do not support subreg with nonzero offsets, which
28765 are otherwise valid for integer registers. Since we can't see
28766 whether we have a nonzero offset from here, prohibit all
28767 nonparadoxical subregs changing size. */
28768 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
28775 /* Return the cost of moving data of mode M between a
28776 register and memory. A value of 2 is the default; this cost is
28777 relative to those in `REGISTER_MOVE_COST'.
28779 This function is used extensively by register_move_cost that is used to
28780 build tables at startup. Make it inline in this case.
28781 When IN is 2, return maximum of in and out move cost.
28783 If moving between registers and memory is more expensive than
28784 between two registers, you should define this macro to express the
28787 Model also increased moving costs of QImode registers in non
28791 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28795 if (FLOAT_CLASS_P (regclass))
28813 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28814 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28816 if (SSE_CLASS_P (regclass))
28819 switch (GET_MODE_SIZE (mode))
28834 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28835 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28837 if (MMX_CLASS_P (regclass))
28840 switch (GET_MODE_SIZE (mode))
28852 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28853 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28855 switch (GET_MODE_SIZE (mode))
28858 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28861 return ix86_cost->int_store[0];
28862 if (TARGET_PARTIAL_REG_DEPENDENCY
28863 && optimize_function_for_speed_p (cfun))
28864 cost = ix86_cost->movzbl_load;
28866 cost = ix86_cost->int_load[0];
28868 return MAX (cost, ix86_cost->int_store[0]);
28874 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28876 return ix86_cost->movzbl_load;
28878 return ix86_cost->int_store[0] + 4;
28883 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28884 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28886 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28887 if (mode == TFmode)
28890 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28892 cost = ix86_cost->int_load[2];
28894 cost = ix86_cost->int_store[2];
28895 return (cost * (((int) GET_MODE_SIZE (mode)
28896 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28901 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28904 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28908 /* Return the cost of moving data from a register in class CLASS1 to
28909 one in class CLASS2.
28911 It is not required that the cost always equal 2 when FROM is the same as TO;
28912 on some machines it is expensive to move between registers if they are not
28913 general registers. */
28916 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28917 reg_class_t class2_i)
28919 enum reg_class class1 = (enum reg_class) class1_i;
28920 enum reg_class class2 = (enum reg_class) class2_i;
28922 /* In case we require secondary memory, compute cost of the store followed
28923 by load. In order to avoid bad register allocation choices, we need
28924 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28926 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28930 cost += inline_memory_move_cost (mode, class1, 2);
28931 cost += inline_memory_move_cost (mode, class2, 2);
28933 /* In case of copying from general_purpose_register we may emit multiple
28934 stores followed by single load causing memory size mismatch stall.
28935 Count this as arbitrarily high cost of 20. */
28936 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
28939 /* In the case of FP/MMX moves, the registers actually overlap, and we
28940 have to switch modes in order to treat them differently. */
28941 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28942 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28948 /* Moves between SSE/MMX and integer unit are expensive. */
28949 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28950 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28952 /* ??? By keeping returned value relatively high, we limit the number
28953 of moves between integer and MMX/SSE registers for all targets.
28954 Additionally, high value prevents problem with x86_modes_tieable_p(),
28955 where integer modes in MMX/SSE registers are not tieable
28956 because of missing QImode and HImode moves to, from or between
28957 MMX/SSE registers. */
28958 return MAX (8, ix86_cost->mmxsse_to_integer);
28960 if (MAYBE_FLOAT_CLASS_P (class1))
28961 return ix86_cost->fp_move;
28962 if (MAYBE_SSE_CLASS_P (class1))
28963 return ix86_cost->sse_move;
28964 if (MAYBE_MMX_CLASS_P (class1))
28965 return ix86_cost->mmx_move;
28969 /* Return TRUE if hard register REGNO can hold a value of machine-mode
28973 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28975 /* Flags and only flags can only hold CCmode values. */
28976 if (CC_REGNO_P (regno))
28977 return GET_MODE_CLASS (mode) == MODE_CC;
28978 if (GET_MODE_CLASS (mode) == MODE_CC
28979 || GET_MODE_CLASS (mode) == MODE_RANDOM
28980 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28982 if (FP_REGNO_P (regno))
28983 return VALID_FP_MODE_P (mode);
28984 if (SSE_REGNO_P (regno))
28986 /* We implement the move patterns for all vector modes into and
28987 out of SSE registers, even when no operation instructions
28988 are available. OImode move is available only when AVX is
28990 return ((TARGET_AVX && mode == OImode)
28991 || VALID_AVX256_REG_MODE (mode)
28992 || VALID_SSE_REG_MODE (mode)
28993 || VALID_SSE2_REG_MODE (mode)
28994 || VALID_MMX_REG_MODE (mode)
28995 || VALID_MMX_REG_MODE_3DNOW (mode));
28997 if (MMX_REGNO_P (regno))
28999 /* We implement the move patterns for 3DNOW modes even in MMX mode,
29000 so if the register is available at all, then we can move data of
29001 the given mode into or out of it. */
29002 return (VALID_MMX_REG_MODE (mode)
29003 || VALID_MMX_REG_MODE_3DNOW (mode));
29006 if (mode == QImode)
29008 /* Take care for QImode values - they can be in non-QI regs,
29009 but then they do cause partial register stalls. */
29010 if (regno <= BX_REG || TARGET_64BIT)
29012 if (!TARGET_PARTIAL_REG_STALL)
29014 return !can_create_pseudo_p ();
29016 /* We handle both integer and floats in the general purpose registers. */
29017 else if (VALID_INT_MODE_P (mode))
29019 else if (VALID_FP_MODE_P (mode))
29021 else if (VALID_DFP_MODE_P (mode))
29023 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
29024 on to use that value in smaller contexts, this can easily force a
29025 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
29026 supporting DImode, allow it. */
29027 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
29033 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
29034 tieable integer mode. */
29037 ix86_tieable_integer_mode_p (enum machine_mode mode)
29046 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
29049 return TARGET_64BIT;
29056 /* Return true if MODE1 is accessible in a register that can hold MODE2
29057 without copying. That is, all register classes that can hold MODE2
29058 can also hold MODE1. */
29061 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
29063 if (mode1 == mode2)
29066 if (ix86_tieable_integer_mode_p (mode1)
29067 && ix86_tieable_integer_mode_p (mode2))
29070 /* MODE2 being XFmode implies fp stack or general regs, which means we
29071 can tie any smaller floating point modes to it. Note that we do not
29072 tie this with TFmode. */
29073 if (mode2 == XFmode)
29074 return mode1 == SFmode || mode1 == DFmode;
29076 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
29077 that we can tie it with SFmode. */
29078 if (mode2 == DFmode)
29079 return mode1 == SFmode;
29081 /* If MODE2 is only appropriate for an SSE register, then tie with
29082 any other mode acceptable to SSE registers. */
29083 if (GET_MODE_SIZE (mode2) == 16
29084 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
29085 return (GET_MODE_SIZE (mode1) == 16
29086 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
29088 /* If MODE2 is appropriate for an MMX register, then tie
29089 with any other mode acceptable to MMX registers. */
29090 if (GET_MODE_SIZE (mode2) == 8
29091 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
29092 return (GET_MODE_SIZE (mode1) == 8
29093 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
29098 /* Compute a (partial) cost for rtx X. Return true if the complete
29099 cost has been computed, and false if subexpressions should be
29100 scanned. In either case, *TOTAL contains the cost result. */
29103 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
29105 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
29106 enum machine_mode mode = GET_MODE (x);
29107 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
29115 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
29117 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
29119 else if (flag_pic && SYMBOLIC_CONST (x)
29121 || (!GET_CODE (x) != LABEL_REF
29122 && (GET_CODE (x) != SYMBOL_REF
29123 || !SYMBOL_REF_LOCAL_P (x)))))
29130 if (mode == VOIDmode)
29133 switch (standard_80387_constant_p (x))
29138 default: /* Other constants */
29143 /* Start with (MEM (SYMBOL_REF)), since that's where
29144 it'll probably end up. Add a penalty for size. */
29145 *total = (COSTS_N_INSNS (1)
29146 + (flag_pic != 0 && !TARGET_64BIT)
29147 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
29153 /* The zero extensions is often completely free on x86_64, so make
29154 it as cheap as possible. */
29155 if (TARGET_64BIT && mode == DImode
29156 && GET_MODE (XEXP (x, 0)) == SImode)
29158 else if (TARGET_ZERO_EXTEND_WITH_AND)
29159 *total = cost->add;
29161 *total = cost->movzx;
29165 *total = cost->movsx;
29169 if (CONST_INT_P (XEXP (x, 1))
29170 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
29172 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
29175 *total = cost->add;
29178 if ((value == 2 || value == 3)
29179 && cost->lea <= cost->shift_const)
29181 *total = cost->lea;
29191 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
29193 if (CONST_INT_P (XEXP (x, 1)))
29195 if (INTVAL (XEXP (x, 1)) > 32)
29196 *total = cost->shift_const + COSTS_N_INSNS (2);
29198 *total = cost->shift_const * 2;
29202 if (GET_CODE (XEXP (x, 1)) == AND)
29203 *total = cost->shift_var * 2;
29205 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
29210 if (CONST_INT_P (XEXP (x, 1)))
29211 *total = cost->shift_const;
29213 *total = cost->shift_var;
29221 gcc_assert (FLOAT_MODE_P (mode));
29222 gcc_assert (TARGET_FMA || TARGET_FMA4);
29224 /* ??? SSE scalar/vector cost should be used here. */
29225 /* ??? Bald assumption that fma has the same cost as fmul. */
29226 *total = cost->fmul;
29227 *total += rtx_cost (XEXP (x, 1), FMA, speed);
29229 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
29231 if (GET_CODE (sub) == NEG)
29233 *total += rtx_cost (sub, FMA, speed);
29236 if (GET_CODE (sub) == NEG)
29238 *total += rtx_cost (sub, FMA, speed);
29243 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29245 /* ??? SSE scalar cost should be used here. */
29246 *total = cost->fmul;
29249 else if (X87_FLOAT_MODE_P (mode))
29251 *total = cost->fmul;
29254 else if (FLOAT_MODE_P (mode))
29256 /* ??? SSE vector cost should be used here. */
29257 *total = cost->fmul;
29262 rtx op0 = XEXP (x, 0);
29263 rtx op1 = XEXP (x, 1);
29265 if (CONST_INT_P (XEXP (x, 1)))
29267 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
29268 for (nbits = 0; value != 0; value &= value - 1)
29272 /* This is arbitrary. */
29275 /* Compute costs correctly for widening multiplication. */
29276 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
29277 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
29278 == GET_MODE_SIZE (mode))
29280 int is_mulwiden = 0;
29281 enum machine_mode inner_mode = GET_MODE (op0);
29283 if (GET_CODE (op0) == GET_CODE (op1))
29284 is_mulwiden = 1, op1 = XEXP (op1, 0);
29285 else if (CONST_INT_P (op1))
29287 if (GET_CODE (op0) == SIGN_EXTEND)
29288 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
29291 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
29295 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
29298 *total = (cost->mult_init[MODE_INDEX (mode)]
29299 + nbits * cost->mult_bit
29300 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
29309 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29310 /* ??? SSE cost should be used here. */
29311 *total = cost->fdiv;
29312 else if (X87_FLOAT_MODE_P (mode))
29313 *total = cost->fdiv;
29314 else if (FLOAT_MODE_P (mode))
29315 /* ??? SSE vector cost should be used here. */
29316 *total = cost->fdiv;
29318 *total = cost->divide[MODE_INDEX (mode)];
29322 if (GET_MODE_CLASS (mode) == MODE_INT
29323 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
29325 if (GET_CODE (XEXP (x, 0)) == PLUS
29326 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
29327 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
29328 && CONSTANT_P (XEXP (x, 1)))
29330 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
29331 if (val == 2 || val == 4 || val == 8)
29333 *total = cost->lea;
29334 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
29335 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
29336 outer_code, speed);
29337 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29341 else if (GET_CODE (XEXP (x, 0)) == MULT
29342 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
29344 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
29345 if (val == 2 || val == 4 || val == 8)
29347 *total = cost->lea;
29348 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
29349 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29353 else if (GET_CODE (XEXP (x, 0)) == PLUS)
29355 *total = cost->lea;
29356 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
29357 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
29358 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29365 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29367 /* ??? SSE cost should be used here. */
29368 *total = cost->fadd;
29371 else if (X87_FLOAT_MODE_P (mode))
29373 *total = cost->fadd;
29376 else if (FLOAT_MODE_P (mode))
29378 /* ??? SSE vector cost should be used here. */
29379 *total = cost->fadd;
29387 if (!TARGET_64BIT && mode == DImode)
29389 *total = (cost->add * 2
29390 + (rtx_cost (XEXP (x, 0), outer_code, speed)
29391 << (GET_MODE (XEXP (x, 0)) != DImode))
29392 + (rtx_cost (XEXP (x, 1), outer_code, speed)
29393 << (GET_MODE (XEXP (x, 1)) != DImode)));
29399 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29401 /* ??? SSE cost should be used here. */
29402 *total = cost->fchs;
29405 else if (X87_FLOAT_MODE_P (mode))
29407 *total = cost->fchs;
29410 else if (FLOAT_MODE_P (mode))
29412 /* ??? SSE vector cost should be used here. */
29413 *total = cost->fchs;
29419 if (!TARGET_64BIT && mode == DImode)
29420 *total = cost->add * 2;
29422 *total = cost->add;
29426 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
29427 && XEXP (XEXP (x, 0), 1) == const1_rtx
29428 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
29429 && XEXP (x, 1) == const0_rtx)
29431 /* This kind of construct is implemented using test[bwl].
29432 Treat it as if we had an AND. */
29433 *total = (cost->add
29434 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
29435 + rtx_cost (const1_rtx, outer_code, speed));
29441 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
29446 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29447 /* ??? SSE cost should be used here. */
29448 *total = cost->fabs;
29449 else if (X87_FLOAT_MODE_P (mode))
29450 *total = cost->fabs;
29451 else if (FLOAT_MODE_P (mode))
29452 /* ??? SSE vector cost should be used here. */
29453 *total = cost->fabs;
29457 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29458 /* ??? SSE cost should be used here. */
29459 *total = cost->fsqrt;
29460 else if (X87_FLOAT_MODE_P (mode))
29461 *total = cost->fsqrt;
29462 else if (FLOAT_MODE_P (mode))
29463 /* ??? SSE vector cost should be used here. */
29464 *total = cost->fsqrt;
29468 if (XINT (x, 1) == UNSPEC_TP)
29475 case VEC_DUPLICATE:
29476 /* ??? Assume all of these vector manipulation patterns are
29477 recognizable. In which case they all pretty much have the
29479 *total = COSTS_N_INSNS (1);
29489 static int current_machopic_label_num;
29491 /* Given a symbol name and its associated stub, write out the
29492 definition of the stub. */
29495 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29497 unsigned int length;
29498 char *binder_name, *symbol_name, lazy_ptr_name[32];
29499 int label = ++current_machopic_label_num;
29501 /* For 64-bit we shouldn't get here. */
29502 gcc_assert (!TARGET_64BIT);
29504 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29505 symb = targetm.strip_name_encoding (symb);
29507 length = strlen (stub);
29508 binder_name = XALLOCAVEC (char, length + 32);
29509 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
29511 length = strlen (symb);
29512 symbol_name = XALLOCAVEC (char, length + 32);
29513 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29515 sprintf (lazy_ptr_name, "L%d$lz", label);
29517 if (MACHOPIC_ATT_STUB)
29518 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
29519 else if (MACHOPIC_PURE)
29521 if (TARGET_DEEP_BRANCH_PREDICTION)
29522 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
29524 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
29527 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
29529 fprintf (file, "%s:\n", stub);
29530 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29532 if (MACHOPIC_ATT_STUB)
29534 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
29536 else if (MACHOPIC_PURE)
29539 if (TARGET_DEEP_BRANCH_PREDICTION)
29541 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29542 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
29543 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
29544 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
29548 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
29549 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
29550 fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
29552 fprintf (file, "\tjmp\t*%%ecx\n");
29555 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
29557 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
29558 it needs no stub-binding-helper. */
29559 if (MACHOPIC_ATT_STUB)
29562 fprintf (file, "%s:\n", binder_name);
29566 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
29567 fprintf (file, "\tpushl\t%%ecx\n");
29570 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
29572 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
29574 /* N.B. Keep the correspondence of these
29575 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29576 old-pic/new-pic/non-pic stubs; altering this will break
29577 compatibility with existing dylibs. */
29581 if (TARGET_DEEP_BRANCH_PREDICTION)
29582 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29583 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
29585 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
29586 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29589 /* 16-byte -mdynamic-no-pic stub. */
29590 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
29592 fprintf (file, "%s:\n", lazy_ptr_name);
29593 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29594 fprintf (file, ASM_LONG "%s\n", binder_name);
29596 #endif /* TARGET_MACHO */
29598 /* Order the registers for register allocator. */
29601 x86_order_regs_for_local_alloc (void)
29606 /* First allocate the local general purpose registers. */
29607 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29608 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29609 reg_alloc_order [pos++] = i;
29611 /* Global general purpose registers. */
29612 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29613 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29614 reg_alloc_order [pos++] = i;
29616 /* x87 registers come first in case we are doing FP math
29618 if (!TARGET_SSE_MATH)
29619 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29620 reg_alloc_order [pos++] = i;
29622 /* SSE registers. */
29623 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29624 reg_alloc_order [pos++] = i;
29625 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29626 reg_alloc_order [pos++] = i;
29628 /* x87 registers. */
29629 if (TARGET_SSE_MATH)
29630 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29631 reg_alloc_order [pos++] = i;
29633 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29634 reg_alloc_order [pos++] = i;
29636 /* Initialize the rest of array as we do not allocate some registers
29638 while (pos < FIRST_PSEUDO_REGISTER)
29639 reg_alloc_order [pos++] = 0;
29642 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29643 in struct attribute_spec handler. */
29645 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
29647 int flags ATTRIBUTE_UNUSED,
29648 bool *no_add_attrs)
29650 if (TREE_CODE (*node) != FUNCTION_TYPE
29651 && TREE_CODE (*node) != METHOD_TYPE
29652 && TREE_CODE (*node) != FIELD_DECL
29653 && TREE_CODE (*node) != TYPE_DECL)
29655 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29657 *no_add_attrs = true;
29662 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
29664 *no_add_attrs = true;
29667 if (is_attribute_p ("callee_pop_aggregate_return", name))
29671 cst = TREE_VALUE (args);
29672 if (TREE_CODE (cst) != INTEGER_CST)
29674 warning (OPT_Wattributes,
29675 "%qE attribute requires an integer constant argument",
29677 *no_add_attrs = true;
29679 else if (compare_tree_int (cst, 0) != 0
29680 && compare_tree_int (cst, 1) != 0)
29682 warning (OPT_Wattributes,
29683 "argument to %qE attribute is neither zero, nor one",
29685 *no_add_attrs = true;
29694 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29695 struct attribute_spec.handler. */
29697 ix86_handle_abi_attribute (tree *node, tree name,
29698 tree args ATTRIBUTE_UNUSED,
29699 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29701 if (TREE_CODE (*node) != FUNCTION_TYPE
29702 && TREE_CODE (*node) != METHOD_TYPE
29703 && TREE_CODE (*node) != FIELD_DECL
29704 && TREE_CODE (*node) != TYPE_DECL)
29706 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29708 *no_add_attrs = true;
29713 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
29715 *no_add_attrs = true;
29719 /* Can combine regparm with all attributes but fastcall. */
29720 if (is_attribute_p ("ms_abi", name))
29722 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
29724 error ("ms_abi and sysv_abi attributes are not compatible");
29729 else if (is_attribute_p ("sysv_abi", name))
29731 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
29733 error ("ms_abi and sysv_abi attributes are not compatible");
29742 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29743 struct attribute_spec.handler. */
29745 ix86_handle_struct_attribute (tree *node, tree name,
29746 tree args ATTRIBUTE_UNUSED,
29747 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29750 if (DECL_P (*node))
29752 if (TREE_CODE (*node) == TYPE_DECL)
29753 type = &TREE_TYPE (*node);
29758 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29759 || TREE_CODE (*type) == UNION_TYPE)))
29761 warning (OPT_Wattributes, "%qE attribute ignored",
29763 *no_add_attrs = true;
29766 else if ((is_attribute_p ("ms_struct", name)
29767 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29768 || ((is_attribute_p ("gcc_struct", name)
29769 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29771 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29773 *no_add_attrs = true;
29780 ix86_handle_fndecl_attribute (tree *node, tree name,
29781 tree args ATTRIBUTE_UNUSED,
29782 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29784 if (TREE_CODE (*node) != FUNCTION_DECL)
29786 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29788 *no_add_attrs = true;
29794 ix86_ms_bitfield_layout_p (const_tree record_type)
29796 return ((TARGET_MS_BITFIELD_LAYOUT
29797 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29798 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
29801 /* Returns an expression indicating where the this parameter is
29802 located on entry to the FUNCTION. */
29805 x86_this_parameter (tree function)
29807 tree type = TREE_TYPE (function);
29808 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
29813 const int *parm_regs;
29815 if (ix86_function_type_abi (type) == MS_ABI)
29816 parm_regs = x86_64_ms_abi_int_parameter_registers;
29818 parm_regs = x86_64_int_parameter_registers;
29819 return gen_rtx_REG (DImode, parm_regs[aggr]);
29822 nregs = ix86_function_regparm (type, function);
29824 if (nregs > 0 && !stdarg_p (type))
29827 unsigned int ccvt = ix86_get_callcvt (type);
29829 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29830 regno = aggr ? DX_REG : CX_REG;
29831 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29835 return gen_rtx_MEM (SImode,
29836 plus_constant (stack_pointer_rtx, 4));
29845 return gen_rtx_MEM (SImode,
29846 plus_constant (stack_pointer_rtx, 4));
29849 return gen_rtx_REG (SImode, regno);
29852 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29855 /* Determine whether x86_output_mi_thunk can succeed. */
29858 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29859 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29860 HOST_WIDE_INT vcall_offset, const_tree function)
29862 /* 64-bit can handle anything. */
29866 /* For 32-bit, everything's fine if we have one free register. */
29867 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29870 /* Need a free register for vcall_offset. */
29874 /* Need a free register for GOT references. */
29875 if (flag_pic && !targetm.binds_local_p (function))
29878 /* Otherwise ok. */
29882 /* Output the assembler code for a thunk function. THUNK_DECL is the
29883 declaration for the thunk function itself, FUNCTION is the decl for
29884 the target function. DELTA is an immediate constant offset to be
29885 added to THIS. If VCALL_OFFSET is nonzero, the word at
29886 *(*this + vcall_offset) should be added to THIS. */
29889 x86_output_mi_thunk (FILE *file,
29890 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29891 HOST_WIDE_INT vcall_offset, tree function)
29894 rtx this_param = x86_this_parameter (function);
29897 /* Make sure unwind info is emitted for the thunk if needed. */
29898 final_start_function (emit_barrier (), file, 1);
29900 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29901 pull it in now and let DELTA benefit. */
29902 if (REG_P (this_param))
29903 this_reg = this_param;
29904 else if (vcall_offset)
29906 /* Put the this parameter into %eax. */
29907 xops[0] = this_param;
29908 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
29909 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29912 this_reg = NULL_RTX;
29914 /* Adjust the this parameter by a fixed constant. */
29917 xops[0] = GEN_INT (delta);
29918 xops[1] = this_reg ? this_reg : this_param;
29921 if (!x86_64_general_operand (xops[0], DImode))
29923 tmp = gen_rtx_REG (DImode, R10_REG);
29925 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
29927 xops[1] = this_param;
29929 if (x86_maybe_negate_const_int (&xops[0], DImode))
29930 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
29932 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
29934 else if (x86_maybe_negate_const_int (&xops[0], SImode))
29935 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
29937 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
29940 /* Adjust the this parameter by a value stored in the vtable. */
29944 tmp = gen_rtx_REG (DImode, R10_REG);
29947 int tmp_regno = CX_REG;
29948 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
29949 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
29950 tmp_regno = AX_REG;
29951 tmp = gen_rtx_REG (SImode, tmp_regno);
29954 xops[0] = gen_rtx_MEM (Pmode, this_reg);
29956 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29958 /* Adjust the this parameter. */
29959 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
29960 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
29962 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
29963 xops[0] = GEN_INT (vcall_offset);
29965 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
29966 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
29968 xops[1] = this_reg;
29969 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
29972 /* If necessary, drop THIS back to its stack slot. */
29973 if (this_reg && this_reg != this_param)
29975 xops[0] = this_reg;
29976 xops[1] = this_param;
29977 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29980 xops[0] = XEXP (DECL_RTL (function), 0);
29983 if (!flag_pic || targetm.binds_local_p (function)
29984 || DEFAULT_ABI == MS_ABI)
29985 output_asm_insn ("jmp\t%P0", xops);
29986 /* All thunks should be in the same object as their target,
29987 and thus binds_local_p should be true. */
29988 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
29989 gcc_unreachable ();
29992 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
29993 tmp = gen_rtx_CONST (Pmode, tmp);
29994 tmp = gen_rtx_MEM (QImode, tmp);
29996 output_asm_insn ("jmp\t%A0", xops);
30001 if (!flag_pic || targetm.binds_local_p (function))
30002 output_asm_insn ("jmp\t%P0", xops);
30007 rtx sym_ref = XEXP (DECL_RTL (function), 0);
30008 if (TARGET_MACHO_BRANCH_ISLANDS)
30009 sym_ref = (gen_rtx_SYMBOL_REF
30011 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
30012 tmp = gen_rtx_MEM (QImode, sym_ref);
30014 output_asm_insn ("jmp\t%0", xops);
30017 #endif /* TARGET_MACHO */
30019 tmp = gen_rtx_REG (SImode, CX_REG);
30020 output_set_got (tmp, NULL_RTX);
30023 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
30024 output_asm_insn ("jmp\t{*}%1", xops);
30027 final_end_function ();
30031 x86_file_start (void)
30033 default_file_start ();
30035 darwin_file_start ();
30037 if (X86_FILE_START_VERSION_DIRECTIVE)
30038 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
30039 if (X86_FILE_START_FLTUSED)
30040 fputs ("\t.global\t__fltused\n", asm_out_file);
30041 if (ix86_asm_dialect == ASM_INTEL)
30042 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
30046 x86_field_alignment (tree field, int computed)
30048 enum machine_mode mode;
30049 tree type = TREE_TYPE (field);
30051 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
30053 mode = TYPE_MODE (strip_array_types (type));
30054 if (mode == DFmode || mode == DCmode
30055 || GET_MODE_CLASS (mode) == MODE_INT
30056 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
30057 return MIN (32, computed);
30061 /* Output assembler code to FILE to increment profiler label # LABELNO
30062 for profiling a function entry. */
30064 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
30066 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
30071 #ifndef NO_PROFILE_COUNTERS
30072 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
30075 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
30076 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
30078 fprintf (file, "\tcall\t%s\n", mcount_name);
30082 #ifndef NO_PROFILE_COUNTERS
30083 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
30086 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
30090 #ifndef NO_PROFILE_COUNTERS
30091 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
30094 fprintf (file, "\tcall\t%s\n", mcount_name);
30098 /* We don't have exact information about the insn sizes, but we may assume
30099 quite safely that we are informed about all 1 byte insns and memory
30100 address sizes. This is enough to eliminate unnecessary padding in
30104 min_insn_size (rtx insn)
30108 if (!INSN_P (insn) || !active_insn_p (insn))
30111 /* Discard alignments we've emit and jump instructions. */
30112 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
30113 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
30115 if (JUMP_TABLE_DATA_P (insn))
30118 /* Important case - calls are always 5 bytes.
30119 It is common to have many calls in the row. */
30121 && symbolic_reference_mentioned_p (PATTERN (insn))
30122 && !SIBLING_CALL_P (insn))
30124 len = get_attr_length (insn);
30128 /* For normal instructions we rely on get_attr_length being exact,
30129 with a few exceptions. */
30130 if (!JUMP_P (insn))
30132 enum attr_type type = get_attr_type (insn);
30137 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
30138 || asm_noperands (PATTERN (insn)) >= 0)
30145 /* Otherwise trust get_attr_length. */
30149 l = get_attr_length_address (insn);
30150 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
30159 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30161 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
30165 ix86_avoid_jump_mispredicts (void)
30167 rtx insn, start = get_insns ();
30168 int nbytes = 0, njumps = 0;
30171 /* Look for all minimal intervals of instructions containing 4 jumps.
30172 The intervals are bounded by START and INSN. NBYTES is the total
30173 size of instructions in the interval including INSN and not including
30174 START. When the NBYTES is smaller than 16 bytes, it is possible
30175 that the end of START and INSN ends up in the same 16byte page.
30177 The smallest offset in the page INSN can start is the case where START
30178 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
30179 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
30181 for (insn = start; insn; insn = NEXT_INSN (insn))
30185 if (LABEL_P (insn))
30187 int align = label_to_alignment (insn);
30188 int max_skip = label_to_max_skip (insn);
30192 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
30193 already in the current 16 byte page, because otherwise
30194 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
30195 bytes to reach 16 byte boundary. */
30197 || (align <= 3 && max_skip != (1 << align) - 1))
30200 fprintf (dump_file, "Label %i with max_skip %i\n",
30201 INSN_UID (insn), max_skip);
30204 while (nbytes + max_skip >= 16)
30206 start = NEXT_INSN (start);
30207 if ((JUMP_P (start)
30208 && GET_CODE (PATTERN (start)) != ADDR_VEC
30209 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
30211 njumps--, isjump = 1;
30214 nbytes -= min_insn_size (start);
30220 min_size = min_insn_size (insn);
30221 nbytes += min_size;
30223 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
30224 INSN_UID (insn), min_size);
30226 && GET_CODE (PATTERN (insn)) != ADDR_VEC
30227 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
30235 start = NEXT_INSN (start);
30236 if ((JUMP_P (start)
30237 && GET_CODE (PATTERN (start)) != ADDR_VEC
30238 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
30240 njumps--, isjump = 1;
30243 nbytes -= min_insn_size (start);
30245 gcc_assert (njumps >= 0);
30247 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
30248 INSN_UID (start), INSN_UID (insn), nbytes);
30250 if (njumps == 3 && isjump && nbytes < 16)
30252 int padsize = 15 - nbytes + min_insn_size (insn);
30255 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
30256 INSN_UID (insn), padsize);
30257 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
30263 /* AMD Athlon works faster
30264 when RET is not destination of conditional jump or directly preceded
30265 by other jump instruction. We avoid the penalty by inserting NOP just
30266 before the RET instructions in such cases. */
30268 ix86_pad_returns (void)
30273 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30275 basic_block bb = e->src;
30276 rtx ret = BB_END (bb);
30278 bool replace = false;
30280 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
30281 || optimize_bb_for_size_p (bb))
30283 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
30284 if (active_insn_p (prev) || LABEL_P (prev))
30286 if (prev && LABEL_P (prev))
30291 FOR_EACH_EDGE (e, ei, bb->preds)
30292 if (EDGE_FREQUENCY (e) && e->src->index >= 0
30293 && !(e->flags & EDGE_FALLTHRU))
30298 prev = prev_active_insn (ret);
30300 && ((JUMP_P (prev) && any_condjump_p (prev))
30303 /* Empty functions get branch mispredict even when
30304 the jump destination is not visible to us. */
30305 if (!prev && !optimize_function_for_size_p (cfun))
30310 emit_jump_insn_before (gen_return_internal_long (), ret);
30316 /* Count the minimum number of instructions in BB. Return 4 if the
30317 number of instructions >= 4. */
30320 ix86_count_insn_bb (basic_block bb)
30323 int insn_count = 0;
30325 /* Count number of instructions in this block. Return 4 if the number
30326 of instructions >= 4. */
30327 FOR_BB_INSNS (bb, insn)
30329 /* Only happen in exit blocks. */
30331 && GET_CODE (PATTERN (insn)) == RETURN)
30334 if (NONDEBUG_INSN_P (insn)
30335 && GET_CODE (PATTERN (insn)) != USE
30336 && GET_CODE (PATTERN (insn)) != CLOBBER)
30339 if (insn_count >= 4)
30348 /* Count the minimum number of instructions in code path in BB.
30349 Return 4 if the number of instructions >= 4. */
30352 ix86_count_insn (basic_block bb)
30356 int min_prev_count;
30358 /* Only bother counting instructions along paths with no
30359 more than 2 basic blocks between entry and exit. Given
30360 that BB has an edge to exit, determine if a predecessor
30361 of BB has an edge from entry. If so, compute the number
30362 of instructions in the predecessor block. If there
30363 happen to be multiple such blocks, compute the minimum. */
30364 min_prev_count = 4;
30365 FOR_EACH_EDGE (e, ei, bb->preds)
30368 edge_iterator prev_ei;
30370 if (e->src == ENTRY_BLOCK_PTR)
30372 min_prev_count = 0;
30375 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
30377 if (prev_e->src == ENTRY_BLOCK_PTR)
30379 int count = ix86_count_insn_bb (e->src);
30380 if (count < min_prev_count)
30381 min_prev_count = count;
30387 if (min_prev_count < 4)
30388 min_prev_count += ix86_count_insn_bb (bb);
30390 return min_prev_count;
30393 /* Pad short funtion to 4 instructions. */
30396 ix86_pad_short_function (void)
30401 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30403 rtx ret = BB_END (e->src);
30404 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
30406 int insn_count = ix86_count_insn (e->src);
30408 /* Pad short function. */
30409 if (insn_count < 4)
30413 /* Find epilogue. */
30416 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
30417 insn = PREV_INSN (insn);
30422 /* Two NOPs count as one instruction. */
30423 insn_count = 2 * (4 - insn_count);
30424 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
30430 /* Implement machine specific optimizations. We implement padding of returns
30431 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
30435 /* We are freeing block_for_insn in the toplev to keep compatibility
30436 with old MDEP_REORGS that are not CFG based. Recompute it now. */
30437 compute_bb_for_insn ();
30439 /* Run the vzeroupper optimization if needed. */
30440 if (TARGET_VZEROUPPER)
30441 move_or_delete_vzeroupper ();
30443 if (optimize && optimize_function_for_speed_p (cfun))
30445 if (TARGET_PAD_SHORT_FUNCTION)
30446 ix86_pad_short_function ();
30447 else if (TARGET_PAD_RETURNS)
30448 ix86_pad_returns ();
30449 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30450 if (TARGET_FOUR_JUMP_LIMIT)
30451 ix86_avoid_jump_mispredicts ();
30456 /* Return nonzero when QImode register that must be represented via REX prefix
30459 x86_extended_QIreg_mentioned_p (rtx insn)
30462 extract_insn_cached (insn);
30463 for (i = 0; i < recog_data.n_operands; i++)
30464 if (REG_P (recog_data.operand[i])
30465 && REGNO (recog_data.operand[i]) > BX_REG)
30470 /* Return nonzero when P points to register encoded via REX prefix.
30471 Called via for_each_rtx. */
30473 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
30475 unsigned int regno;
30478 regno = REGNO (*p);
30479 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
30482 /* Return true when INSN mentions register that must be encoded using REX
30485 x86_extended_reg_mentioned_p (rtx insn)
30487 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
30488 extended_reg_mentioned_1, NULL);
30491 /* If profitable, negate (without causing overflow) integer constant
30492 of mode MODE at location LOC. Return true in this case. */
30494 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
30498 if (!CONST_INT_P (*loc))
30504 /* DImode x86_64 constants must fit in 32 bits. */
30505 gcc_assert (x86_64_immediate_operand (*loc, mode));
30516 gcc_unreachable ();
30519 /* Avoid overflows. */
30520 if (mode_signbit_p (mode, *loc))
30523 val = INTVAL (*loc);
30525 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
30526 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
30527 if ((val < 0 && val != -128)
30530 *loc = GEN_INT (-val);
30537 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
30538 optabs would emit if we didn't have TFmode patterns. */
30541 x86_emit_floatuns (rtx operands[2])
30543 rtx neglab, donelab, i0, i1, f0, in, out;
30544 enum machine_mode mode, inmode;
30546 inmode = GET_MODE (operands[1]);
30547 gcc_assert (inmode == SImode || inmode == DImode);
30550 in = force_reg (inmode, operands[1]);
30551 mode = GET_MODE (out);
30552 neglab = gen_label_rtx ();
30553 donelab = gen_label_rtx ();
30554 f0 = gen_reg_rtx (mode);
30556 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
30558 expand_float (out, in, 0);
30560 emit_jump_insn (gen_jump (donelab));
30563 emit_label (neglab);
30565 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
30567 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
30569 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
30571 expand_float (f0, i0, 0);
30573 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
30575 emit_label (donelab);
30578 /* AVX does not support 32-byte integer vector operations,
30579 thus the longest vector we are faced with is V16QImode. */
30580 #define MAX_VECT_LEN 16
30582 struct expand_vec_perm_d
30584 rtx target, op0, op1;
30585 unsigned char perm[MAX_VECT_LEN];
30586 enum machine_mode vmode;
30587 unsigned char nelt;
30591 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
30592 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
30594 /* Get a vector mode of the same size as the original but with elements
30595 twice as wide. This is only guaranteed to apply to integral vectors. */
30597 static inline enum machine_mode
30598 get_mode_wider_vector (enum machine_mode o)
30600 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30601 enum machine_mode n = GET_MODE_WIDER_MODE (o);
30602 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
30603 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
30607 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30608 with all elements equal to VAR. Return true if successful. */
30611 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
30612 rtx target, rtx val)
30635 /* First attempt to recognize VAL as-is. */
30636 dup = gen_rtx_VEC_DUPLICATE (mode, val);
30637 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30638 if (recog_memoized (insn) < 0)
30641 /* If that fails, force VAL into a register. */
30644 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30645 seq = get_insns ();
30648 emit_insn_before (seq, insn);
30650 ok = recog_memoized (insn) >= 0;
30659 if (TARGET_SSE || TARGET_3DNOW_A)
30663 val = gen_lowpart (SImode, val);
30664 x = gen_rtx_TRUNCATE (HImode, val);
30665 x = gen_rtx_VEC_DUPLICATE (mode, x);
30666 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30679 struct expand_vec_perm_d dperm;
30683 memset (&dperm, 0, sizeof (dperm));
30684 dperm.target = target;
30685 dperm.vmode = mode;
30686 dperm.nelt = GET_MODE_NUNITS (mode);
30687 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
30689 /* Extend to SImode using a paradoxical SUBREG. */
30690 tmp1 = gen_reg_rtx (SImode);
30691 emit_move_insn (tmp1, gen_lowpart (SImode, val));
30693 /* Insert the SImode value as low element of a V4SImode vector. */
30694 tmp2 = gen_lowpart (V4SImode, dperm.op0);
30695 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
30697 ok = (expand_vec_perm_1 (&dperm)
30698 || expand_vec_perm_broadcast_1 (&dperm));
30710 /* Replicate the value once into the next wider mode and recurse. */
30712 enum machine_mode smode, wsmode, wvmode;
30715 smode = GET_MODE_INNER (mode);
30716 wvmode = get_mode_wider_vector (mode);
30717 wsmode = GET_MODE_INNER (wvmode);
30719 val = convert_modes (wsmode, smode, val, true);
30720 x = expand_simple_binop (wsmode, ASHIFT, val,
30721 GEN_INT (GET_MODE_BITSIZE (smode)),
30722 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30723 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
30725 x = gen_lowpart (wvmode, target);
30726 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
30734 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
30735 rtx x = gen_reg_rtx (hvmode);
30737 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
30740 x = gen_rtx_VEC_CONCAT (mode, x, x);
30741 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30750 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30751 whose ONE_VAR element is VAR, and other elements are zero. Return true
30755 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
30756 rtx target, rtx var, int one_var)
30758 enum machine_mode vsimode;
30761 bool use_vector_set = false;
30766 /* For SSE4.1, we normally use vector set. But if the second
30767 element is zero and inter-unit moves are OK, we use movq
30769 use_vector_set = (TARGET_64BIT
30771 && !(TARGET_INTER_UNIT_MOVES
30777 use_vector_set = TARGET_SSE4_1;
30780 use_vector_set = TARGET_SSE2;
30783 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
30790 use_vector_set = TARGET_AVX;
30793 /* Use ix86_expand_vector_set in 64bit mode only. */
30794 use_vector_set = TARGET_AVX && TARGET_64BIT;
30800 if (use_vector_set)
30802 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
30803 var = force_reg (GET_MODE_INNER (mode), var);
30804 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30820 var = force_reg (GET_MODE_INNER (mode), var);
30821 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
30822 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30827 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
30828 new_target = gen_reg_rtx (mode);
30830 new_target = target;
30831 var = force_reg (GET_MODE_INNER (mode), var);
30832 x = gen_rtx_VEC_DUPLICATE (mode, var);
30833 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
30834 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
30837 /* We need to shuffle the value to the correct position, so
30838 create a new pseudo to store the intermediate result. */
30840 /* With SSE2, we can use the integer shuffle insns. */
30841 if (mode != V4SFmode && TARGET_SSE2)
30843 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
30845 GEN_INT (one_var == 1 ? 0 : 1),
30846 GEN_INT (one_var == 2 ? 0 : 1),
30847 GEN_INT (one_var == 3 ? 0 : 1)));
30848 if (target != new_target)
30849 emit_move_insn (target, new_target);
30853 /* Otherwise convert the intermediate result to V4SFmode and
30854 use the SSE1 shuffle instructions. */
30855 if (mode != V4SFmode)
30857 tmp = gen_reg_rtx (V4SFmode);
30858 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30863 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30865 GEN_INT (one_var == 1 ? 0 : 1),
30866 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30867 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30869 if (mode != V4SFmode)
30870 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30871 else if (tmp != target)
30872 emit_move_insn (target, tmp);
30874 else if (target != new_target)
30875 emit_move_insn (target, new_target);
30880 vsimode = V4SImode;
30886 vsimode = V2SImode;
30892 /* Zero extend the variable element to SImode and recurse. */
30893 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30895 x = gen_reg_rtx (vsimode);
30896 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30898 gcc_unreachable ();
30900 emit_move_insn (target, gen_lowpart (mode, x));
30908 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30909 consisting of the values in VALS. It is known that all elements
30910 except ONE_VAR are constants. Return true if successful. */
30913 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30914 rtx target, rtx vals, int one_var)
30916 rtx var = XVECEXP (vals, 0, one_var);
30917 enum machine_mode wmode;
30920 const_vec = copy_rtx (vals);
30921 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30922 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30930 /* For the two element vectors, it's just as easy to use
30931 the general case. */
30935 /* Use ix86_expand_vector_set in 64bit mode only. */
30958 /* There's no way to set one QImode entry easily. Combine
30959 the variable value with its adjacent constant value, and
30960 promote to an HImode set. */
30961 x = XVECEXP (vals, 0, one_var ^ 1);
30964 var = convert_modes (HImode, QImode, var, true);
30965 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30966 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30967 x = GEN_INT (INTVAL (x) & 0xff);
30971 var = convert_modes (HImode, QImode, var, true);
30972 x = gen_int_mode (INTVAL (x) << 8, HImode);
30974 if (x != const0_rtx)
30975 var = expand_simple_binop (HImode, IOR, var, x, var,
30976 1, OPTAB_LIB_WIDEN);
30978 x = gen_reg_rtx (wmode);
30979 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30980 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30982 emit_move_insn (target, gen_lowpart (mode, x));
30989 emit_move_insn (target, const_vec);
30990 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30994 /* A subroutine of ix86_expand_vector_init_general. Use vector
30995 concatenate to handle the most general case: all values variable,
30996 and none identical. */
30999 ix86_expand_vector_init_concat (enum machine_mode mode,
31000 rtx target, rtx *ops, int n)
31002 enum machine_mode cmode, hmode = VOIDmode;
31003 rtx first[8], second[4];
31043 gcc_unreachable ();
31046 if (!register_operand (ops[1], cmode))
31047 ops[1] = force_reg (cmode, ops[1]);
31048 if (!register_operand (ops[0], cmode))
31049 ops[0] = force_reg (cmode, ops[0]);
31050 emit_insn (gen_rtx_SET (VOIDmode, target,
31051 gen_rtx_VEC_CONCAT (mode, ops[0],
31071 gcc_unreachable ();
31087 gcc_unreachable ();
31092 /* FIXME: We process inputs backward to help RA. PR 36222. */
31095 for (; i > 0; i -= 2, j--)
31097 first[j] = gen_reg_rtx (cmode);
31098 v = gen_rtvec (2, ops[i - 1], ops[i]);
31099 ix86_expand_vector_init (false, first[j],
31100 gen_rtx_PARALLEL (cmode, v));
31106 gcc_assert (hmode != VOIDmode);
31107 for (i = j = 0; i < n; i += 2, j++)
31109 second[j] = gen_reg_rtx (hmode);
31110 ix86_expand_vector_init_concat (hmode, second [j],
31114 ix86_expand_vector_init_concat (mode, target, second, n);
31117 ix86_expand_vector_init_concat (mode, target, first, n);
31121 gcc_unreachable ();
31125 /* A subroutine of ix86_expand_vector_init_general. Use vector
31126 interleave to handle the most general case: all values variable,
31127 and none identical. */
31130 ix86_expand_vector_init_interleave (enum machine_mode mode,
31131 rtx target, rtx *ops, int n)
31133 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
31136 rtx (*gen_load_even) (rtx, rtx, rtx);
31137 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
31138 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
31143 gen_load_even = gen_vec_setv8hi;
31144 gen_interleave_first_low = gen_vec_interleave_lowv4si;
31145 gen_interleave_second_low = gen_vec_interleave_lowv2di;
31146 inner_mode = HImode;
31147 first_imode = V4SImode;
31148 second_imode = V2DImode;
31149 third_imode = VOIDmode;
31152 gen_load_even = gen_vec_setv16qi;
31153 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
31154 gen_interleave_second_low = gen_vec_interleave_lowv4si;
31155 inner_mode = QImode;
31156 first_imode = V8HImode;
31157 second_imode = V4SImode;
31158 third_imode = V2DImode;
31161 gcc_unreachable ();
31164 for (i = 0; i < n; i++)
31166 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
31167 op0 = gen_reg_rtx (SImode);
31168 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
31170 /* Insert the SImode value as low element of V4SImode vector. */
31171 op1 = gen_reg_rtx (V4SImode);
31172 op0 = gen_rtx_VEC_MERGE (V4SImode,
31173 gen_rtx_VEC_DUPLICATE (V4SImode,
31175 CONST0_RTX (V4SImode),
31177 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
31179 /* Cast the V4SImode vector back to a vector in orignal mode. */
31180 op0 = gen_reg_rtx (mode);
31181 emit_move_insn (op0, gen_lowpart (mode, op1));
31183 /* Load even elements into the second positon. */
31184 emit_insn (gen_load_even (op0,
31185 force_reg (inner_mode,
31189 /* Cast vector to FIRST_IMODE vector. */
31190 ops[i] = gen_reg_rtx (first_imode);
31191 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
31194 /* Interleave low FIRST_IMODE vectors. */
31195 for (i = j = 0; i < n; i += 2, j++)
31197 op0 = gen_reg_rtx (first_imode);
31198 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
31200 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
31201 ops[j] = gen_reg_rtx (second_imode);
31202 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
31205 /* Interleave low SECOND_IMODE vectors. */
31206 switch (second_imode)
31209 for (i = j = 0; i < n / 2; i += 2, j++)
31211 op0 = gen_reg_rtx (second_imode);
31212 emit_insn (gen_interleave_second_low (op0, ops[i],
31215 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
31217 ops[j] = gen_reg_rtx (third_imode);
31218 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
31220 second_imode = V2DImode;
31221 gen_interleave_second_low = gen_vec_interleave_lowv2di;
31225 op0 = gen_reg_rtx (second_imode);
31226 emit_insn (gen_interleave_second_low (op0, ops[0],
31229 /* Cast the SECOND_IMODE vector back to a vector on original
31231 emit_insn (gen_rtx_SET (VOIDmode, target,
31232 gen_lowpart (mode, op0)));
31236 gcc_unreachable ();
31240 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
31241 all values variable, and none identical. */
31244 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
31245 rtx target, rtx vals)
31247 rtx ops[32], op0, op1;
31248 enum machine_mode half_mode = VOIDmode;
31255 if (!mmx_ok && !TARGET_SSE)
31267 n = GET_MODE_NUNITS (mode);
31268 for (i = 0; i < n; i++)
31269 ops[i] = XVECEXP (vals, 0, i);
31270 ix86_expand_vector_init_concat (mode, target, ops, n);
31274 half_mode = V16QImode;
31278 half_mode = V8HImode;
31282 n = GET_MODE_NUNITS (mode);
31283 for (i = 0; i < n; i++)
31284 ops[i] = XVECEXP (vals, 0, i);
31285 op0 = gen_reg_rtx (half_mode);
31286 op1 = gen_reg_rtx (half_mode);
31287 ix86_expand_vector_init_interleave (half_mode, op0, ops,
31289 ix86_expand_vector_init_interleave (half_mode, op1,
31290 &ops [n >> 1], n >> 2);
31291 emit_insn (gen_rtx_SET (VOIDmode, target,
31292 gen_rtx_VEC_CONCAT (mode, op0, op1)));
31296 if (!TARGET_SSE4_1)
31304 /* Don't use ix86_expand_vector_init_interleave if we can't
31305 move from GPR to SSE register directly. */
31306 if (!TARGET_INTER_UNIT_MOVES)
31309 n = GET_MODE_NUNITS (mode);
31310 for (i = 0; i < n; i++)
31311 ops[i] = XVECEXP (vals, 0, i);
31312 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
31320 gcc_unreachable ();
31324 int i, j, n_elts, n_words, n_elt_per_word;
31325 enum machine_mode inner_mode;
31326 rtx words[4], shift;
31328 inner_mode = GET_MODE_INNER (mode);
31329 n_elts = GET_MODE_NUNITS (mode);
31330 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
31331 n_elt_per_word = n_elts / n_words;
31332 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
31334 for (i = 0; i < n_words; ++i)
31336 rtx word = NULL_RTX;
31338 for (j = 0; j < n_elt_per_word; ++j)
31340 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
31341 elt = convert_modes (word_mode, inner_mode, elt, true);
31347 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
31348 word, 1, OPTAB_LIB_WIDEN);
31349 word = expand_simple_binop (word_mode, IOR, word, elt,
31350 word, 1, OPTAB_LIB_WIDEN);
31358 emit_move_insn (target, gen_lowpart (mode, words[0]));
31359 else if (n_words == 2)
31361 rtx tmp = gen_reg_rtx (mode);
31362 emit_clobber (tmp);
31363 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
31364 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
31365 emit_move_insn (target, tmp);
31367 else if (n_words == 4)
31369 rtx tmp = gen_reg_rtx (V4SImode);
31370 gcc_assert (word_mode == SImode);
31371 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
31372 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
31373 emit_move_insn (target, gen_lowpart (mode, tmp));
31376 gcc_unreachable ();
31380 /* Initialize vector TARGET via VALS. Suppress the use of MMX
31381 instructions unless MMX_OK is true. */
31384 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
31386 enum machine_mode mode = GET_MODE (target);
31387 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31388 int n_elts = GET_MODE_NUNITS (mode);
31389 int n_var = 0, one_var = -1;
31390 bool all_same = true, all_const_zero = true;
31394 for (i = 0; i < n_elts; ++i)
31396 x = XVECEXP (vals, 0, i);
31397 if (!(CONST_INT_P (x)
31398 || GET_CODE (x) == CONST_DOUBLE
31399 || GET_CODE (x) == CONST_FIXED))
31400 n_var++, one_var = i;
31401 else if (x != CONST0_RTX (inner_mode))
31402 all_const_zero = false;
31403 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
31407 /* Constants are best loaded from the constant pool. */
31410 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
31414 /* If all values are identical, broadcast the value. */
31416 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
31417 XVECEXP (vals, 0, 0)))
31420 /* Values where only one field is non-constant are best loaded from
31421 the pool and overwritten via move later. */
31425 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
31426 XVECEXP (vals, 0, one_var),
31430 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
31434 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
31438 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
31440 enum machine_mode mode = GET_MODE (target);
31441 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31442 enum machine_mode half_mode;
31443 bool use_vec_merge = false;
31445 static rtx (*gen_extract[6][2]) (rtx, rtx)
31447 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
31448 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
31449 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
31450 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
31451 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
31452 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
31454 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
31456 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
31457 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
31458 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
31459 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
31460 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
31461 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
31471 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31472 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
31474 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31476 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31477 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31483 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
31487 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31488 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
31490 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31492 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31493 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31500 /* For the two element vectors, we implement a VEC_CONCAT with
31501 the extraction of the other element. */
31503 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
31504 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
31507 op0 = val, op1 = tmp;
31509 op0 = tmp, op1 = val;
31511 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
31512 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31517 use_vec_merge = TARGET_SSE4_1;
31524 use_vec_merge = true;
31528 /* tmp = target = A B C D */
31529 tmp = copy_to_reg (target);
31530 /* target = A A B B */
31531 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
31532 /* target = X A B B */
31533 ix86_expand_vector_set (false, target, val, 0);
31534 /* target = A X C D */
31535 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31536 const1_rtx, const0_rtx,
31537 GEN_INT (2+4), GEN_INT (3+4)));
31541 /* tmp = target = A B C D */
31542 tmp = copy_to_reg (target);
31543 /* tmp = X B C D */
31544 ix86_expand_vector_set (false, tmp, val, 0);
31545 /* target = A B X D */
31546 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31547 const0_rtx, const1_rtx,
31548 GEN_INT (0+4), GEN_INT (3+4)));
31552 /* tmp = target = A B C D */
31553 tmp = copy_to_reg (target);
31554 /* tmp = X B C D */
31555 ix86_expand_vector_set (false, tmp, val, 0);
31556 /* target = A B X D */
31557 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31558 const0_rtx, const1_rtx,
31559 GEN_INT (2+4), GEN_INT (0+4)));
31563 gcc_unreachable ();
31568 use_vec_merge = TARGET_SSE4_1;
31572 /* Element 0 handled by vec_merge below. */
31575 use_vec_merge = true;
31581 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31582 store into element 0, then shuffle them back. */
31586 order[0] = GEN_INT (elt);
31587 order[1] = const1_rtx;
31588 order[2] = const2_rtx;
31589 order[3] = GEN_INT (3);
31590 order[elt] = const0_rtx;
31592 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31593 order[1], order[2], order[3]));
31595 ix86_expand_vector_set (false, target, val, 0);
31597 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31598 order[1], order[2], order[3]));
31602 /* For SSE1, we have to reuse the V4SF code. */
31603 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
31604 gen_lowpart (SFmode, val), elt);
31609 use_vec_merge = TARGET_SSE2;
31612 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31616 use_vec_merge = TARGET_SSE4_1;
31623 half_mode = V16QImode;
31629 half_mode = V8HImode;
31635 half_mode = V4SImode;
31641 half_mode = V2DImode;
31647 half_mode = V4SFmode;
31653 half_mode = V2DFmode;
31659 /* Compute offset. */
31663 gcc_assert (i <= 1);
31665 /* Extract the half. */
31666 tmp = gen_reg_rtx (half_mode);
31667 emit_insn (gen_extract[j][i] (tmp, target));
31669 /* Put val in tmp at elt. */
31670 ix86_expand_vector_set (false, tmp, val, elt);
31673 emit_insn (gen_insert[j][i] (target, target, tmp));
31682 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
31683 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
31684 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31688 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31690 emit_move_insn (mem, target);
31692 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31693 emit_move_insn (tmp, val);
31695 emit_move_insn (target, mem);
31700 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
31702 enum machine_mode mode = GET_MODE (vec);
31703 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31704 bool use_vec_extr = false;
31717 use_vec_extr = true;
31721 use_vec_extr = TARGET_SSE4_1;
31733 tmp = gen_reg_rtx (mode);
31734 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
31735 GEN_INT (elt), GEN_INT (elt),
31736 GEN_INT (elt+4), GEN_INT (elt+4)));
31740 tmp = gen_reg_rtx (mode);
31741 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
31745 gcc_unreachable ();
31748 use_vec_extr = true;
31753 use_vec_extr = TARGET_SSE4_1;
31767 tmp = gen_reg_rtx (mode);
31768 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
31769 GEN_INT (elt), GEN_INT (elt),
31770 GEN_INT (elt), GEN_INT (elt)));
31774 tmp = gen_reg_rtx (mode);
31775 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
31779 gcc_unreachable ();
31782 use_vec_extr = true;
31787 /* For SSE1, we have to reuse the V4SF code. */
31788 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
31789 gen_lowpart (V4SFmode, vec), elt);
31795 use_vec_extr = TARGET_SSE2;
31798 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31802 use_vec_extr = TARGET_SSE4_1;
31806 /* ??? Could extract the appropriate HImode element and shift. */
31813 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
31814 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
31816 /* Let the rtl optimizers know about the zero extension performed. */
31817 if (inner_mode == QImode || inner_mode == HImode)
31819 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
31820 target = gen_lowpart (SImode, target);
31823 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31827 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31829 emit_move_insn (mem, vec);
31831 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31832 emit_move_insn (target, tmp);
31836 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31837 pattern to reduce; DEST is the destination; IN is the input vector. */
31840 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
31842 rtx tmp1, tmp2, tmp3;
31844 tmp1 = gen_reg_rtx (V4SFmode);
31845 tmp2 = gen_reg_rtx (V4SFmode);
31846 tmp3 = gen_reg_rtx (V4SFmode);
31848 emit_insn (gen_sse_movhlps (tmp1, in, in));
31849 emit_insn (fn (tmp2, tmp1, in));
31851 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
31852 const1_rtx, const1_rtx,
31853 GEN_INT (1+4), GEN_INT (1+4)));
31854 emit_insn (fn (dest, tmp2, tmp3));
31857 /* Target hook for scalar_mode_supported_p. */
31859 ix86_scalar_mode_supported_p (enum machine_mode mode)
31861 if (DECIMAL_FLOAT_MODE_P (mode))
31862 return default_decimal_float_supported_p ();
31863 else if (mode == TFmode)
31866 return default_scalar_mode_supported_p (mode);
31869 /* Implements target hook vector_mode_supported_p. */
31871 ix86_vector_mode_supported_p (enum machine_mode mode)
31873 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31875 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31877 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31879 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31881 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31886 /* Target hook for c_mode_for_suffix. */
31887 static enum machine_mode
31888 ix86_c_mode_for_suffix (char suffix)
31898 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31900 We do this in the new i386 backend to maintain source compatibility
31901 with the old cc0-based compiler. */
31904 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31905 tree inputs ATTRIBUTE_UNUSED,
31908 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31910 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31915 /* Implements target vector targetm.asm.encode_section_info. This
31916 is not used by netware. */
31918 static void ATTRIBUTE_UNUSED
31919 ix86_encode_section_info (tree decl, rtx rtl, int first)
31921 default_encode_section_info (decl, rtl, first);
31923 if (TREE_CODE (decl) == VAR_DECL
31924 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31925 && ix86_in_large_data_p (decl))
31926 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31929 /* Worker function for REVERSE_CONDITION. */
31932 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31934 return (mode != CCFPmode && mode != CCFPUmode
31935 ? reverse_condition (code)
31936 : reverse_condition_maybe_unordered (code));
31939 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31943 output_387_reg_move (rtx insn, rtx *operands)
31945 if (REG_P (operands[0]))
31947 if (REG_P (operands[1])
31948 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31950 if (REGNO (operands[0]) == FIRST_STACK_REG)
31951 return output_387_ffreep (operands, 0);
31952 return "fstp\t%y0";
31954 if (STACK_TOP_P (operands[0]))
31955 return "fld%Z1\t%y1";
31958 else if (MEM_P (operands[0]))
31960 gcc_assert (REG_P (operands[1]));
31961 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31962 return "fstp%Z0\t%y0";
31965 /* There is no non-popping store to memory for XFmode.
31966 So if we need one, follow the store with a load. */
31967 if (GET_MODE (operands[0]) == XFmode)
31968 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31970 return "fst%Z0\t%y0";
31977 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31978 FP status register is set. */
31981 ix86_emit_fp_unordered_jump (rtx label)
31983 rtx reg = gen_reg_rtx (HImode);
31986 emit_insn (gen_x86_fnstsw_1 (reg));
31988 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31990 emit_insn (gen_x86_sahf_1 (reg));
31992 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31993 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31997 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31999 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
32000 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
32003 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
32004 gen_rtx_LABEL_REF (VOIDmode, label),
32006 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
32008 emit_jump_insn (temp);
32009 predict_jump (REG_BR_PROB_BASE * 10 / 100);
32012 /* Output code to perform a log1p XFmode calculation. */
32014 void ix86_emit_i387_log1p (rtx op0, rtx op1)
32016 rtx label1 = gen_label_rtx ();
32017 rtx label2 = gen_label_rtx ();
32019 rtx tmp = gen_reg_rtx (XFmode);
32020 rtx tmp2 = gen_reg_rtx (XFmode);
32023 emit_insn (gen_absxf2 (tmp, op1));
32024 test = gen_rtx_GE (VOIDmode, tmp,
32025 CONST_DOUBLE_FROM_REAL_VALUE (
32026 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
32028 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
32030 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
32031 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
32032 emit_jump (label2);
32034 emit_label (label1);
32035 emit_move_insn (tmp, CONST1_RTX (XFmode));
32036 emit_insn (gen_addxf3 (tmp, op1, tmp));
32037 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
32038 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
32040 emit_label (label2);
32043 /* Output code to perform a Newton-Rhapson approximation of a single precision
32044 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
32046 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
32048 rtx x0, x1, e0, e1;
32050 x0 = gen_reg_rtx (mode);
32051 e0 = gen_reg_rtx (mode);
32052 e1 = gen_reg_rtx (mode);
32053 x1 = gen_reg_rtx (mode);
32055 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
32057 /* x0 = rcp(b) estimate */
32058 emit_insn (gen_rtx_SET (VOIDmode, x0,
32059 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
32062 emit_insn (gen_rtx_SET (VOIDmode, e0,
32063 gen_rtx_MULT (mode, x0, b)));
32066 emit_insn (gen_rtx_SET (VOIDmode, e0,
32067 gen_rtx_MULT (mode, x0, e0)));
32070 emit_insn (gen_rtx_SET (VOIDmode, e1,
32071 gen_rtx_PLUS (mode, x0, x0)));
32074 emit_insn (gen_rtx_SET (VOIDmode, x1,
32075 gen_rtx_MINUS (mode, e1, e0)));
32078 emit_insn (gen_rtx_SET (VOIDmode, res,
32079 gen_rtx_MULT (mode, a, x1)));
32082 /* Output code to perform a Newton-Rhapson approximation of a
32083 single precision floating point [reciprocal] square root. */
32085 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
32088 rtx x0, e0, e1, e2, e3, mthree, mhalf;
32091 x0 = gen_reg_rtx (mode);
32092 e0 = gen_reg_rtx (mode);
32093 e1 = gen_reg_rtx (mode);
32094 e2 = gen_reg_rtx (mode);
32095 e3 = gen_reg_rtx (mode);
32097 real_from_integer (&r, VOIDmode, -3, -1, 0);
32098 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
32100 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
32101 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
32103 if (VECTOR_MODE_P (mode))
32105 mthree = ix86_build_const_vector (mode, true, mthree);
32106 mhalf = ix86_build_const_vector (mode, true, mhalf);
32109 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
32110 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
32112 /* x0 = rsqrt(a) estimate */
32113 emit_insn (gen_rtx_SET (VOIDmode, x0,
32114 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
32117 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
32122 zero = gen_reg_rtx (mode);
32123 mask = gen_reg_rtx (mode);
32125 zero = force_reg (mode, CONST0_RTX(mode));
32126 emit_insn (gen_rtx_SET (VOIDmode, mask,
32127 gen_rtx_NE (mode, zero, a)));
32129 emit_insn (gen_rtx_SET (VOIDmode, x0,
32130 gen_rtx_AND (mode, x0, mask)));
32134 emit_insn (gen_rtx_SET (VOIDmode, e0,
32135 gen_rtx_MULT (mode, x0, a)));
32137 emit_insn (gen_rtx_SET (VOIDmode, e1,
32138 gen_rtx_MULT (mode, e0, x0)));
32141 mthree = force_reg (mode, mthree);
32142 emit_insn (gen_rtx_SET (VOIDmode, e2,
32143 gen_rtx_PLUS (mode, e1, mthree)));
32145 mhalf = force_reg (mode, mhalf);
32147 /* e3 = -.5 * x0 */
32148 emit_insn (gen_rtx_SET (VOIDmode, e3,
32149 gen_rtx_MULT (mode, x0, mhalf)));
32151 /* e3 = -.5 * e0 */
32152 emit_insn (gen_rtx_SET (VOIDmode, e3,
32153 gen_rtx_MULT (mode, e0, mhalf)));
32154 /* ret = e2 * e3 */
32155 emit_insn (gen_rtx_SET (VOIDmode, res,
32156 gen_rtx_MULT (mode, e2, e3)));
32159 #ifdef TARGET_SOLARIS
32160 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
32163 i386_solaris_elf_named_section (const char *name, unsigned int flags,
32166 /* With Binutils 2.15, the "@unwind" marker must be specified on
32167 every occurrence of the ".eh_frame" section, not just the first
32170 && strcmp (name, ".eh_frame") == 0)
32172 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
32173 flags & SECTION_WRITE ? "aw" : "a");
32178 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
32180 solaris_elf_asm_comdat_section (name, flags, decl);
32185 default_elf_asm_named_section (name, flags, decl);
32187 #endif /* TARGET_SOLARIS */
32189 /* Return the mangling of TYPE if it is an extended fundamental type. */
32191 static const char *
32192 ix86_mangle_type (const_tree type)
32194 type = TYPE_MAIN_VARIANT (type);
32196 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32197 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32200 switch (TYPE_MODE (type))
32203 /* __float128 is "g". */
32206 /* "long double" or __float80 is "e". */
32213 /* For 32-bit code we can save PIC register setup by using
32214 __stack_chk_fail_local hidden function instead of calling
32215 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
32216 register, so it is better to call __stack_chk_fail directly. */
32219 ix86_stack_protect_fail (void)
32221 return TARGET_64BIT
32222 ? default_external_stack_protect_fail ()
32223 : default_hidden_stack_protect_fail ();
32226 /* Select a format to encode pointers in exception handling data. CODE
32227 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
32228 true if the symbol may be affected by dynamic relocations.
32230 ??? All x86 object file formats are capable of representing this.
32231 After all, the relocation needed is the same as for the call insn.
32232 Whether or not a particular assembler allows us to enter such, I
32233 guess we'll have to see. */
32235 asm_preferred_eh_data_format (int code, int global)
32239 int type = DW_EH_PE_sdata8;
32241 || ix86_cmodel == CM_SMALL_PIC
32242 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
32243 type = DW_EH_PE_sdata4;
32244 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
32246 if (ix86_cmodel == CM_SMALL
32247 || (ix86_cmodel == CM_MEDIUM && code))
32248 return DW_EH_PE_udata4;
32249 return DW_EH_PE_absptr;
32252 /* Expand copysign from SIGN to the positive value ABS_VALUE
32253 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
32256 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
32258 enum machine_mode mode = GET_MODE (sign);
32259 rtx sgn = gen_reg_rtx (mode);
32260 if (mask == NULL_RTX)
32262 enum machine_mode vmode;
32264 if (mode == SFmode)
32266 else if (mode == DFmode)
32271 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
32272 if (!VECTOR_MODE_P (mode))
32274 /* We need to generate a scalar mode mask in this case. */
32275 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32276 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32277 mask = gen_reg_rtx (mode);
32278 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32282 mask = gen_rtx_NOT (mode, mask);
32283 emit_insn (gen_rtx_SET (VOIDmode, sgn,
32284 gen_rtx_AND (mode, mask, sign)));
32285 emit_insn (gen_rtx_SET (VOIDmode, result,
32286 gen_rtx_IOR (mode, abs_value, sgn)));
32289 /* Expand fabs (OP0) and return a new rtx that holds the result. The
32290 mask for masking out the sign-bit is stored in *SMASK, if that is
32293 ix86_expand_sse_fabs (rtx op0, rtx *smask)
32295 enum machine_mode vmode, mode = GET_MODE (op0);
32298 xa = gen_reg_rtx (mode);
32299 if (mode == SFmode)
32301 else if (mode == DFmode)
32305 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
32306 if (!VECTOR_MODE_P (mode))
32308 /* We need to generate a scalar mode mask in this case. */
32309 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32310 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32311 mask = gen_reg_rtx (mode);
32312 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32314 emit_insn (gen_rtx_SET (VOIDmode, xa,
32315 gen_rtx_AND (mode, op0, mask)));
32323 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
32324 swapping the operands if SWAP_OPERANDS is true. The expanded
32325 code is a forward jump to a newly created label in case the
32326 comparison is true. The generated label rtx is returned. */
32328 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
32329 bool swap_operands)
32340 label = gen_label_rtx ();
32341 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
32342 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32343 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
32344 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
32345 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
32346 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
32347 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32348 JUMP_LABEL (tmp) = label;
32353 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
32354 using comparison code CODE. Operands are swapped for the comparison if
32355 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
32357 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
32358 bool swap_operands)
32360 rtx (*insn)(rtx, rtx, rtx, rtx);
32361 enum machine_mode mode = GET_MODE (op0);
32362 rtx mask = gen_reg_rtx (mode);
32371 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
32373 emit_insn (insn (mask, op0, op1,
32374 gen_rtx_fmt_ee (code, mode, op0, op1)));
32378 /* Generate and return a rtx of mode MODE for 2**n where n is the number
32379 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
32381 ix86_gen_TWO52 (enum machine_mode mode)
32383 REAL_VALUE_TYPE TWO52r;
32386 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
32387 TWO52 = const_double_from_real_value (TWO52r, mode);
32388 TWO52 = force_reg (mode, TWO52);
32393 /* Expand SSE sequence for computing lround from OP1 storing
32396 ix86_expand_lround (rtx op0, rtx op1)
32398 /* C code for the stuff we're doing below:
32399 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
32402 enum machine_mode mode = GET_MODE (op1);
32403 const struct real_format *fmt;
32404 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32407 /* load nextafter (0.5, 0.0) */
32408 fmt = REAL_MODE_FORMAT (mode);
32409 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32410 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32412 /* adj = copysign (0.5, op1) */
32413 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
32414 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
32416 /* adj = op1 + adj */
32417 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
32419 /* op0 = (imode)adj */
32420 expand_fix (op0, adj, 0);
32423 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
32426 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
32428 /* C code for the stuff we're doing below (for do_floor):
32430 xi -= (double)xi > op1 ? 1 : 0;
32433 enum machine_mode fmode = GET_MODE (op1);
32434 enum machine_mode imode = GET_MODE (op0);
32435 rtx ireg, freg, label, tmp;
32437 /* reg = (long)op1 */
32438 ireg = gen_reg_rtx (imode);
32439 expand_fix (ireg, op1, 0);
32441 /* freg = (double)reg */
32442 freg = gen_reg_rtx (fmode);
32443 expand_float (freg, ireg, 0);
32445 /* ireg = (freg > op1) ? ireg - 1 : ireg */
32446 label = ix86_expand_sse_compare_and_jump (UNLE,
32447 freg, op1, !do_floor);
32448 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
32449 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
32450 emit_move_insn (ireg, tmp);
32452 emit_label (label);
32453 LABEL_NUSES (label) = 1;
32455 emit_move_insn (op0, ireg);
32458 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
32459 result in OPERAND0. */
32461 ix86_expand_rint (rtx operand0, rtx operand1)
32463 /* C code for the stuff we're doing below:
32464 xa = fabs (operand1);
32465 if (!isless (xa, 2**52))
32467 xa = xa + 2**52 - 2**52;
32468 return copysign (xa, operand1);
32470 enum machine_mode mode = GET_MODE (operand0);
32471 rtx res, xa, label, TWO52, mask;
32473 res = gen_reg_rtx (mode);
32474 emit_move_insn (res, operand1);
32476 /* xa = abs (operand1) */
32477 xa = ix86_expand_sse_fabs (res, &mask);
32479 /* if (!isless (xa, TWO52)) goto label; */
32480 TWO52 = ix86_gen_TWO52 (mode);
32481 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32483 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32484 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32486 ix86_sse_copysign_to_positive (res, xa, res, mask);
32488 emit_label (label);
32489 LABEL_NUSES (label) = 1;
32491 emit_move_insn (operand0, res);
32494 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32497 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
32499 /* C code for the stuff we expand below.
32500 double xa = fabs (x), x2;
32501 if (!isless (xa, TWO52))
32503 xa = xa + TWO52 - TWO52;
32504 x2 = copysign (xa, x);
32513 enum machine_mode mode = GET_MODE (operand0);
32514 rtx xa, TWO52, tmp, label, one, res, mask;
32516 TWO52 = ix86_gen_TWO52 (mode);
32518 /* Temporary for holding the result, initialized to the input
32519 operand to ease control flow. */
32520 res = gen_reg_rtx (mode);
32521 emit_move_insn (res, operand1);
32523 /* xa = abs (operand1) */
32524 xa = ix86_expand_sse_fabs (res, &mask);
32526 /* if (!isless (xa, TWO52)) goto label; */
32527 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32529 /* xa = xa + TWO52 - TWO52; */
32530 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32531 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32533 /* xa = copysign (xa, operand1) */
32534 ix86_sse_copysign_to_positive (xa, xa, res, mask);
32536 /* generate 1.0 or -1.0 */
32537 one = force_reg (mode,
32538 const_double_from_real_value (do_floor
32539 ? dconst1 : dconstm1, mode));
32541 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32542 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32543 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32544 gen_rtx_AND (mode, one, tmp)));
32545 /* We always need to subtract here to preserve signed zero. */
32546 tmp = expand_simple_binop (mode, MINUS,
32547 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32548 emit_move_insn (res, tmp);
32550 emit_label (label);
32551 LABEL_NUSES (label) = 1;
32553 emit_move_insn (operand0, res);
32556 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32559 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
32561 /* C code for the stuff we expand below.
32562 double xa = fabs (x), x2;
32563 if (!isless (xa, TWO52))
32565 x2 = (double)(long)x;
32572 if (HONOR_SIGNED_ZEROS (mode))
32573 return copysign (x2, x);
32576 enum machine_mode mode = GET_MODE (operand0);
32577 rtx xa, xi, TWO52, tmp, label, one, res, mask;
32579 TWO52 = ix86_gen_TWO52 (mode);
32581 /* Temporary for holding the result, initialized to the input
32582 operand to ease control flow. */
32583 res = gen_reg_rtx (mode);
32584 emit_move_insn (res, operand1);
32586 /* xa = abs (operand1) */
32587 xa = ix86_expand_sse_fabs (res, &mask);
32589 /* if (!isless (xa, TWO52)) goto label; */
32590 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32592 /* xa = (double)(long)x */
32593 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32594 expand_fix (xi, res, 0);
32595 expand_float (xa, xi, 0);
32598 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32600 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32601 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32602 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32603 gen_rtx_AND (mode, one, tmp)));
32604 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
32605 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32606 emit_move_insn (res, tmp);
32608 if (HONOR_SIGNED_ZEROS (mode))
32609 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32611 emit_label (label);
32612 LABEL_NUSES (label) = 1;
32614 emit_move_insn (operand0, res);
32617 /* Expand SSE sequence for computing round from OPERAND1 storing
32618 into OPERAND0. Sequence that works without relying on DImode truncation
32619 via cvttsd2siq that is only available on 64bit targets. */
32621 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
32623 /* C code for the stuff we expand below.
32624 double xa = fabs (x), xa2, x2;
32625 if (!isless (xa, TWO52))
32627 Using the absolute value and copying back sign makes
32628 -0.0 -> -0.0 correct.
32629 xa2 = xa + TWO52 - TWO52;
32634 else if (dxa > 0.5)
32636 x2 = copysign (xa2, x);
32639 enum machine_mode mode = GET_MODE (operand0);
32640 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
32642 TWO52 = ix86_gen_TWO52 (mode);
32644 /* Temporary for holding the result, initialized to the input
32645 operand to ease control flow. */
32646 res = gen_reg_rtx (mode);
32647 emit_move_insn (res, operand1);
32649 /* xa = abs (operand1) */
32650 xa = ix86_expand_sse_fabs (res, &mask);
32652 /* if (!isless (xa, TWO52)) goto label; */
32653 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32655 /* xa2 = xa + TWO52 - TWO52; */
32656 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32657 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
32659 /* dxa = xa2 - xa; */
32660 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
32662 /* generate 0.5, 1.0 and -0.5 */
32663 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
32664 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
32665 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
32669 tmp = gen_reg_rtx (mode);
32670 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32671 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
32672 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32673 gen_rtx_AND (mode, one, tmp)));
32674 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32675 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32676 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
32677 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32678 gen_rtx_AND (mode, one, tmp)));
32679 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32681 /* res = copysign (xa2, operand1) */
32682 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
32684 emit_label (label);
32685 LABEL_NUSES (label) = 1;
32687 emit_move_insn (operand0, res);
32690 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32693 ix86_expand_trunc (rtx operand0, rtx operand1)
32695 /* C code for SSE variant we expand below.
32696 double xa = fabs (x), x2;
32697 if (!isless (xa, TWO52))
32699 x2 = (double)(long)x;
32700 if (HONOR_SIGNED_ZEROS (mode))
32701 return copysign (x2, x);
32704 enum machine_mode mode = GET_MODE (operand0);
32705 rtx xa, xi, TWO52, label, res, mask;
32707 TWO52 = ix86_gen_TWO52 (mode);
32709 /* Temporary for holding the result, initialized to the input
32710 operand to ease control flow. */
32711 res = gen_reg_rtx (mode);
32712 emit_move_insn (res, operand1);
32714 /* xa = abs (operand1) */
32715 xa = ix86_expand_sse_fabs (res, &mask);
32717 /* if (!isless (xa, TWO52)) goto label; */
32718 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32720 /* x = (double)(long)x */
32721 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32722 expand_fix (xi, res, 0);
32723 expand_float (res, xi, 0);
32725 if (HONOR_SIGNED_ZEROS (mode))
32726 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32728 emit_label (label);
32729 LABEL_NUSES (label) = 1;
32731 emit_move_insn (operand0, res);
32734 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32737 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
32739 enum machine_mode mode = GET_MODE (operand0);
32740 rtx xa, mask, TWO52, label, one, res, smask, tmp;
32742 /* C code for SSE variant we expand below.
32743 double xa = fabs (x), x2;
32744 if (!isless (xa, TWO52))
32746 xa2 = xa + TWO52 - TWO52;
32750 x2 = copysign (xa2, x);
32754 TWO52 = ix86_gen_TWO52 (mode);
32756 /* Temporary for holding the result, initialized to the input
32757 operand to ease control flow. */
32758 res = gen_reg_rtx (mode);
32759 emit_move_insn (res, operand1);
32761 /* xa = abs (operand1) */
32762 xa = ix86_expand_sse_fabs (res, &smask);
32764 /* if (!isless (xa, TWO52)) goto label; */
32765 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32767 /* res = xa + TWO52 - TWO52; */
32768 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32769 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
32770 emit_move_insn (res, tmp);
32773 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32775 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32776 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
32777 emit_insn (gen_rtx_SET (VOIDmode, mask,
32778 gen_rtx_AND (mode, mask, one)));
32779 tmp = expand_simple_binop (mode, MINUS,
32780 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
32781 emit_move_insn (res, tmp);
32783 /* res = copysign (res, operand1) */
32784 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
32786 emit_label (label);
32787 LABEL_NUSES (label) = 1;
32789 emit_move_insn (operand0, res);
32792 /* Expand SSE sequence for computing round from OPERAND1 storing
32795 ix86_expand_round (rtx operand0, rtx operand1)
32797 /* C code for the stuff we're doing below:
32798 double xa = fabs (x);
32799 if (!isless (xa, TWO52))
32801 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32802 return copysign (xa, x);
32804 enum machine_mode mode = GET_MODE (operand0);
32805 rtx res, TWO52, xa, label, xi, half, mask;
32806 const struct real_format *fmt;
32807 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32809 /* Temporary for holding the result, initialized to the input
32810 operand to ease control flow. */
32811 res = gen_reg_rtx (mode);
32812 emit_move_insn (res, operand1);
32814 TWO52 = ix86_gen_TWO52 (mode);
32815 xa = ix86_expand_sse_fabs (res, &mask);
32816 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32818 /* load nextafter (0.5, 0.0) */
32819 fmt = REAL_MODE_FORMAT (mode);
32820 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32821 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32823 /* xa = xa + 0.5 */
32824 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32825 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
32827 /* xa = (double)(int64_t)xa */
32828 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32829 expand_fix (xi, xa, 0);
32830 expand_float (xa, xi, 0);
32832 /* res = copysign (xa, operand1) */
32833 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
32835 emit_label (label);
32836 LABEL_NUSES (label) = 1;
32838 emit_move_insn (operand0, res);
32842 /* Table of valid machine attributes. */
32843 static const struct attribute_spec ix86_attribute_table[] =
32845 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
32846 affects_type_identity } */
32847 /* Stdcall attribute says callee is responsible for popping arguments
32848 if they are not variable. */
32849 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32851 /* Fastcall attribute says callee is responsible for popping arguments
32852 if they are not variable. */
32853 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32855 /* Thiscall attribute says callee is responsible for popping arguments
32856 if they are not variable. */
32857 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32859 /* Cdecl attribute says the callee is a normal C declaration */
32860 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32862 /* Regparm attribute specifies how many integer arguments are to be
32863 passed in registers. */
32864 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
32866 /* Sseregparm attribute says we are using x86_64 calling conventions
32867 for FP arguments. */
32868 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32870 /* force_align_arg_pointer says this function realigns the stack at entry. */
32871 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
32872 false, true, true, ix86_handle_cconv_attribute, false },
32873 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32874 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
32875 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
32876 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
32879 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32881 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32883 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32884 SUBTARGET_ATTRIBUTE_TABLE,
32886 /* ms_abi and sysv_abi calling convention function attributes. */
32887 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32888 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32889 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
32891 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32892 ix86_handle_callee_pop_aggregate_return, true },
32894 { NULL, 0, 0, false, false, false, NULL, false }
32897 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32899 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32900 tree vectype ATTRIBUTE_UNUSED,
32901 int misalign ATTRIBUTE_UNUSED)
32903 switch (type_of_cost)
32906 return ix86_cost->scalar_stmt_cost;
32909 return ix86_cost->scalar_load_cost;
32912 return ix86_cost->scalar_store_cost;
32915 return ix86_cost->vec_stmt_cost;
32918 return ix86_cost->vec_align_load_cost;
32921 return ix86_cost->vec_store_cost;
32923 case vec_to_scalar:
32924 return ix86_cost->vec_to_scalar_cost;
32926 case scalar_to_vec:
32927 return ix86_cost->scalar_to_vec_cost;
32929 case unaligned_load:
32930 case unaligned_store:
32931 return ix86_cost->vec_unalign_load_cost;
32933 case cond_branch_taken:
32934 return ix86_cost->cond_taken_branch_cost;
32936 case cond_branch_not_taken:
32937 return ix86_cost->cond_not_taken_branch_cost;
32943 gcc_unreachable ();
32948 /* Implement targetm.vectorize.builtin_vec_perm. */
32951 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32953 tree itype = TREE_TYPE (vec_type);
32954 bool u = TYPE_UNSIGNED (itype);
32955 enum machine_mode vmode = TYPE_MODE (vec_type);
32956 enum ix86_builtins fcode;
32957 bool ok = TARGET_SSE2;
32963 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32966 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32968 itype = ix86_get_builtin_type (IX86_BT_DI);
32973 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32977 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32979 itype = ix86_get_builtin_type (IX86_BT_SI);
32983 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32986 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32989 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32992 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
33002 *mask_type = itype;
33003 return ix86_builtins[(int) fcode];
33006 /* Return a vector mode with twice as many elements as VMODE. */
33007 /* ??? Consider moving this to a table generated by genmodes.c. */
33009 static enum machine_mode
33010 doublesize_vector_mode (enum machine_mode vmode)
33014 case V2SFmode: return V4SFmode;
33015 case V1DImode: return V2DImode;
33016 case V2SImode: return V4SImode;
33017 case V4HImode: return V8HImode;
33018 case V8QImode: return V16QImode;
33020 case V2DFmode: return V4DFmode;
33021 case V4SFmode: return V8SFmode;
33022 case V2DImode: return V4DImode;
33023 case V4SImode: return V8SImode;
33024 case V8HImode: return V16HImode;
33025 case V16QImode: return V32QImode;
33027 case V4DFmode: return V8DFmode;
33028 case V8SFmode: return V16SFmode;
33029 case V4DImode: return V8DImode;
33030 case V8SImode: return V16SImode;
33031 case V16HImode: return V32HImode;
33032 case V32QImode: return V64QImode;
33035 gcc_unreachable ();
33039 /* Construct (set target (vec_select op0 (parallel perm))) and
33040 return true if that's a valid instruction in the active ISA. */
33043 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
33045 rtx rperm[MAX_VECT_LEN], x;
33048 for (i = 0; i < nelt; ++i)
33049 rperm[i] = GEN_INT (perm[i]);
33051 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
33052 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
33053 x = gen_rtx_SET (VOIDmode, target, x);
33056 if (recog_memoized (x) < 0)
33064 /* Similar, but generate a vec_concat from op0 and op1 as well. */
33067 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
33068 const unsigned char *perm, unsigned nelt)
33070 enum machine_mode v2mode;
33073 v2mode = doublesize_vector_mode (GET_MODE (op0));
33074 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
33075 return expand_vselect (target, x, perm, nelt);
33078 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33079 in terms of blendp[sd] / pblendw / pblendvb. */
33082 expand_vec_perm_blend (struct expand_vec_perm_d *d)
33084 enum machine_mode vmode = d->vmode;
33085 unsigned i, mask, nelt = d->nelt;
33086 rtx target, op0, op1, x;
33088 if (!TARGET_SSE4_1 || d->op0 == d->op1)
33090 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
33093 /* This is a blend, not a permute. Elements must stay in their
33094 respective lanes. */
33095 for (i = 0; i < nelt; ++i)
33097 unsigned e = d->perm[i];
33098 if (!(e == i || e == i + nelt))
33105 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
33106 decision should be extracted elsewhere, so that we only try that
33107 sequence once all budget==3 options have been tried. */
33109 /* For bytes, see if bytes move in pairs so we can use pblendw with
33110 an immediate argument, rather than pblendvb with a vector argument. */
33111 if (vmode == V16QImode)
33113 bool pblendw_ok = true;
33114 for (i = 0; i < 16 && pblendw_ok; i += 2)
33115 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
33119 rtx rperm[16], vperm;
33121 for (i = 0; i < nelt; ++i)
33122 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
33124 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
33125 vperm = force_reg (V16QImode, vperm);
33127 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
33132 target = d->target;
33144 for (i = 0; i < nelt; ++i)
33145 mask |= (d->perm[i] >= nelt) << i;
33149 for (i = 0; i < 2; ++i)
33150 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
33154 for (i = 0; i < 4; ++i)
33155 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
33159 for (i = 0; i < 8; ++i)
33160 mask |= (d->perm[i * 2] >= 16) << i;
33164 target = gen_lowpart (vmode, target);
33165 op0 = gen_lowpart (vmode, op0);
33166 op1 = gen_lowpart (vmode, op1);
33170 gcc_unreachable ();
33173 /* This matches five different patterns with the different modes. */
33174 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
33175 x = gen_rtx_SET (VOIDmode, target, x);
33181 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33182 in terms of the variable form of vpermilps.
33184 Note that we will have already failed the immediate input vpermilps,
33185 which requires that the high and low part shuffle be identical; the
33186 variable form doesn't require that. */
33189 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
33191 rtx rperm[8], vperm;
33194 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
33197 /* We can only permute within the 128-bit lane. */
33198 for (i = 0; i < 8; ++i)
33200 unsigned e = d->perm[i];
33201 if (i < 4 ? e >= 4 : e < 4)
33208 for (i = 0; i < 8; ++i)
33210 unsigned e = d->perm[i];
33212 /* Within each 128-bit lane, the elements of op0 are numbered
33213 from 0 and the elements of op1 are numbered from 4. */
33219 rperm[i] = GEN_INT (e);
33222 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
33223 vperm = force_reg (V8SImode, vperm);
33224 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
33229 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33230 in terms of pshufb or vpperm. */
33233 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
33235 unsigned i, nelt, eltsz;
33236 rtx rperm[16], vperm, target, op0, op1;
33238 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
33240 if (GET_MODE_SIZE (d->vmode) != 16)
33247 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33249 for (i = 0; i < nelt; ++i)
33251 unsigned j, e = d->perm[i];
33252 for (j = 0; j < eltsz; ++j)
33253 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
33256 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
33257 vperm = force_reg (V16QImode, vperm);
33259 target = gen_lowpart (V16QImode, d->target);
33260 op0 = gen_lowpart (V16QImode, d->op0);
33261 if (d->op0 == d->op1)
33262 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
33265 op1 = gen_lowpart (V16QImode, d->op1);
33266 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
33272 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
33273 in a single instruction. */
33276 expand_vec_perm_1 (struct expand_vec_perm_d *d)
33278 unsigned i, nelt = d->nelt;
33279 unsigned char perm2[MAX_VECT_LEN];
33281 /* Check plain VEC_SELECT first, because AVX has instructions that could
33282 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
33283 input where SEL+CONCAT may not. */
33284 if (d->op0 == d->op1)
33286 int mask = nelt - 1;
33288 for (i = 0; i < nelt; i++)
33289 perm2[i] = d->perm[i] & mask;
33291 if (expand_vselect (d->target, d->op0, perm2, nelt))
33294 /* There are plenty of patterns in sse.md that are written for
33295 SEL+CONCAT and are not replicated for a single op. Perhaps
33296 that should be changed, to avoid the nastiness here. */
33298 /* Recognize interleave style patterns, which means incrementing
33299 every other permutation operand. */
33300 for (i = 0; i < nelt; i += 2)
33302 perm2[i] = d->perm[i] & mask;
33303 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
33305 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33308 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
33311 for (i = 0; i < nelt; i += 4)
33313 perm2[i + 0] = d->perm[i + 0] & mask;
33314 perm2[i + 1] = d->perm[i + 1] & mask;
33315 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
33316 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
33319 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33324 /* Finally, try the fully general two operand permute. */
33325 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
33328 /* Recognize interleave style patterns with reversed operands. */
33329 if (d->op0 != d->op1)
33331 for (i = 0; i < nelt; ++i)
33333 unsigned e = d->perm[i];
33341 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
33345 /* Try the SSE4.1 blend variable merge instructions. */
33346 if (expand_vec_perm_blend (d))
33349 /* Try one of the AVX vpermil variable permutations. */
33350 if (expand_vec_perm_vpermil (d))
33353 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
33354 if (expand_vec_perm_pshufb (d))
33360 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33361 in terms of a pair of pshuflw + pshufhw instructions. */
33364 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
33366 unsigned char perm2[MAX_VECT_LEN];
33370 if (d->vmode != V8HImode || d->op0 != d->op1)
33373 /* The two permutations only operate in 64-bit lanes. */
33374 for (i = 0; i < 4; ++i)
33375 if (d->perm[i] >= 4)
33377 for (i = 4; i < 8; ++i)
33378 if (d->perm[i] < 4)
33384 /* Emit the pshuflw. */
33385 memcpy (perm2, d->perm, 4);
33386 for (i = 4; i < 8; ++i)
33388 ok = expand_vselect (d->target, d->op0, perm2, 8);
33391 /* Emit the pshufhw. */
33392 memcpy (perm2 + 4, d->perm + 4, 4);
33393 for (i = 0; i < 4; ++i)
33395 ok = expand_vselect (d->target, d->target, perm2, 8);
33401 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33402 the permutation using the SSSE3 palignr instruction. This succeeds
33403 when all of the elements in PERM fit within one vector and we merely
33404 need to shift them down so that a single vector permutation has a
33405 chance to succeed. */
33408 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
33410 unsigned i, nelt = d->nelt;
33415 /* Even with AVX, palignr only operates on 128-bit vectors. */
33416 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33419 min = nelt, max = 0;
33420 for (i = 0; i < nelt; ++i)
33422 unsigned e = d->perm[i];
33428 if (min == 0 || max - min >= nelt)
33431 /* Given that we have SSSE3, we know we'll be able to implement the
33432 single operand permutation after the palignr with pshufb. */
33436 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
33437 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
33438 gen_lowpart (TImode, d->op1),
33439 gen_lowpart (TImode, d->op0), shift));
33441 d->op0 = d->op1 = d->target;
33444 for (i = 0; i < nelt; ++i)
33446 unsigned e = d->perm[i] - min;
33452 /* Test for the degenerate case where the alignment by itself
33453 produces the desired permutation. */
33457 ok = expand_vec_perm_1 (d);
33463 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33464 a two vector permutation into a single vector permutation by using
33465 an interleave operation to merge the vectors. */
33468 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
33470 struct expand_vec_perm_d dremap, dfinal;
33471 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
33472 unsigned contents, h1, h2, h3, h4;
33473 unsigned char remap[2 * MAX_VECT_LEN];
33477 if (d->op0 == d->op1)
33480 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
33481 lanes. We can use similar techniques with the vperm2f128 instruction,
33482 but it requires slightly different logic. */
33483 if (GET_MODE_SIZE (d->vmode) != 16)
33486 /* Examine from whence the elements come. */
33488 for (i = 0; i < nelt; ++i)
33489 contents |= 1u << d->perm[i];
33491 /* Split the two input vectors into 4 halves. */
33492 h1 = (1u << nelt2) - 1;
33497 memset (remap, 0xff, sizeof (remap));
33500 /* If the elements from the low halves use interleave low, and similarly
33501 for interleave high. If the elements are from mis-matched halves, we
33502 can use shufps for V4SF/V4SI or do a DImode shuffle. */
33503 if ((contents & (h1 | h3)) == contents)
33505 for (i = 0; i < nelt2; ++i)
33508 remap[i + nelt] = i * 2 + 1;
33509 dremap.perm[i * 2] = i;
33510 dremap.perm[i * 2 + 1] = i + nelt;
33513 else if ((contents & (h2 | h4)) == contents)
33515 for (i = 0; i < nelt2; ++i)
33517 remap[i + nelt2] = i * 2;
33518 remap[i + nelt + nelt2] = i * 2 + 1;
33519 dremap.perm[i * 2] = i + nelt2;
33520 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
33523 else if ((contents & (h1 | h4)) == contents)
33525 for (i = 0; i < nelt2; ++i)
33528 remap[i + nelt + nelt2] = i + nelt2;
33529 dremap.perm[i] = i;
33530 dremap.perm[i + nelt2] = i + nelt + nelt2;
33534 dremap.vmode = V2DImode;
33536 dremap.perm[0] = 0;
33537 dremap.perm[1] = 3;
33540 else if ((contents & (h2 | h3)) == contents)
33542 for (i = 0; i < nelt2; ++i)
33544 remap[i + nelt2] = i;
33545 remap[i + nelt] = i + nelt2;
33546 dremap.perm[i] = i + nelt2;
33547 dremap.perm[i + nelt2] = i + nelt;
33551 dremap.vmode = V2DImode;
33553 dremap.perm[0] = 1;
33554 dremap.perm[1] = 2;
33560 /* Use the remapping array set up above to move the elements from their
33561 swizzled locations into their final destinations. */
33563 for (i = 0; i < nelt; ++i)
33565 unsigned e = remap[d->perm[i]];
33566 gcc_assert (e < nelt);
33567 dfinal.perm[i] = e;
33569 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
33570 dfinal.op1 = dfinal.op0;
33571 dremap.target = dfinal.op0;
33573 /* Test if the final remap can be done with a single insn. For V4SFmode or
33574 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
33576 ok = expand_vec_perm_1 (&dfinal);
33577 seq = get_insns ();
33583 if (dremap.vmode != dfinal.vmode)
33585 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
33586 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
33587 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
33590 ok = expand_vec_perm_1 (&dremap);
33597 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33598 permutation with two pshufb insns and an ior. We should have already
33599 failed all two instruction sequences. */
33602 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
33604 rtx rperm[2][16], vperm, l, h, op, m128;
33605 unsigned int i, nelt, eltsz;
33607 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33609 gcc_assert (d->op0 != d->op1);
33612 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33614 /* Generate two permutation masks. If the required element is within
33615 the given vector it is shuffled into the proper lane. If the required
33616 element is in the other vector, force a zero into the lane by setting
33617 bit 7 in the permutation mask. */
33618 m128 = GEN_INT (-128);
33619 for (i = 0; i < nelt; ++i)
33621 unsigned j, e = d->perm[i];
33622 unsigned which = (e >= nelt);
33626 for (j = 0; j < eltsz; ++j)
33628 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
33629 rperm[1-which][i*eltsz + j] = m128;
33633 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
33634 vperm = force_reg (V16QImode, vperm);
33636 l = gen_reg_rtx (V16QImode);
33637 op = gen_lowpart (V16QImode, d->op0);
33638 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
33640 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
33641 vperm = force_reg (V16QImode, vperm);
33643 h = gen_reg_rtx (V16QImode);
33644 op = gen_lowpart (V16QImode, d->op1);
33645 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
33647 op = gen_lowpart (V16QImode, d->target);
33648 emit_insn (gen_iorv16qi3 (op, l, h));
33653 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33654 and extract-odd permutations. */
33657 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
33664 t1 = gen_reg_rtx (V4DFmode);
33665 t2 = gen_reg_rtx (V4DFmode);
33667 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33668 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
33669 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
33671 /* Now an unpck[lh]pd will produce the result required. */
33673 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
33675 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
33681 int mask = odd ? 0xdd : 0x88;
33683 t1 = gen_reg_rtx (V8SFmode);
33684 t2 = gen_reg_rtx (V8SFmode);
33685 t3 = gen_reg_rtx (V8SFmode);
33687 /* Shuffle within the 128-bit lanes to produce:
33688 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33689 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
33692 /* Shuffle the lanes around to produce:
33693 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33694 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
33697 /* Shuffle within the 128-bit lanes to produce:
33698 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33699 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
33701 /* Shuffle within the 128-bit lanes to produce:
33702 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33703 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
33705 /* Shuffle the lanes around to produce:
33706 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33707 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
33716 /* These are always directly implementable by expand_vec_perm_1. */
33717 gcc_unreachable ();
33721 return expand_vec_perm_pshufb2 (d);
33724 /* We need 2*log2(N)-1 operations to achieve odd/even
33725 with interleave. */
33726 t1 = gen_reg_rtx (V8HImode);
33727 t2 = gen_reg_rtx (V8HImode);
33728 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
33729 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
33730 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
33731 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
33733 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
33735 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
33742 return expand_vec_perm_pshufb2 (d);
33745 t1 = gen_reg_rtx (V16QImode);
33746 t2 = gen_reg_rtx (V16QImode);
33747 t3 = gen_reg_rtx (V16QImode);
33748 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
33749 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
33750 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
33751 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
33752 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
33753 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
33755 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
33757 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
33763 gcc_unreachable ();
33769 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33770 extract-even and extract-odd permutations. */
33773 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
33775 unsigned i, odd, nelt = d->nelt;
33778 if (odd != 0 && odd != 1)
33781 for (i = 1; i < nelt; ++i)
33782 if (d->perm[i] != 2 * i + odd)
33785 return expand_vec_perm_even_odd_1 (d, odd);
33788 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33789 permutations. We assume that expand_vec_perm_1 has already failed. */
33792 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
33794 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
33795 enum machine_mode vmode = d->vmode;
33796 unsigned char perm2[4];
33804 /* These are special-cased in sse.md so that we can optionally
33805 use the vbroadcast instruction. They expand to two insns
33806 if the input happens to be in a register. */
33807 gcc_unreachable ();
33813 /* These are always implementable using standard shuffle patterns. */
33814 gcc_unreachable ();
33818 /* These can be implemented via interleave. We save one insn by
33819 stopping once we have promoted to V4SImode and then use pshufd. */
33822 optab otab = vec_interleave_low_optab;
33826 otab = vec_interleave_high_optab;
33831 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
33832 vmode = get_mode_wider_vector (vmode);
33833 op0 = gen_lowpart (vmode, op0);
33835 while (vmode != V4SImode);
33837 memset (perm2, elt, 4);
33838 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
33843 gcc_unreachable ();
33847 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33848 broadcast permutations. */
33851 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
33853 unsigned i, elt, nelt = d->nelt;
33855 if (d->op0 != d->op1)
33859 for (i = 1; i < nelt; ++i)
33860 if (d->perm[i] != elt)
33863 return expand_vec_perm_broadcast_1 (d);
33866 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33867 With all of the interface bits taken care of, perform the expansion
33868 in D and return true on success. */
33871 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
33873 /* Try a single instruction expansion. */
33874 if (expand_vec_perm_1 (d))
33877 /* Try sequences of two instructions. */
33879 if (expand_vec_perm_pshuflw_pshufhw (d))
33882 if (expand_vec_perm_palignr (d))
33885 if (expand_vec_perm_interleave2 (d))
33888 if (expand_vec_perm_broadcast (d))
33891 /* Try sequences of three instructions. */
33893 if (expand_vec_perm_pshufb2 (d))
33896 /* ??? Look for narrow permutations whose element orderings would
33897 allow the promotion to a wider mode. */
33899 /* ??? Look for sequences of interleave or a wider permute that place
33900 the data into the correct lanes for a half-vector shuffle like
33901 pshuf[lh]w or vpermilps. */
33903 /* ??? Look for sequences of interleave that produce the desired results.
33904 The combinatorics of punpck[lh] get pretty ugly... */
33906 if (expand_vec_perm_even_odd (d))
33912 /* Extract the values from the vector CST into the permutation array in D.
33913 Return 0 on error, 1 if all values from the permutation come from the
33914 first vector, 2 if all values from the second vector, and 3 otherwise. */
33917 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33919 tree list = TREE_VECTOR_CST_ELTS (cst);
33920 unsigned i, nelt = d->nelt;
33923 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33925 unsigned HOST_WIDE_INT e;
33927 if (!host_integerp (TREE_VALUE (list), 1))
33929 e = tree_low_cst (TREE_VALUE (list), 1);
33933 ret |= (e < nelt ? 1 : 2);
33936 gcc_assert (list == NULL);
33938 /* For all elements from second vector, fold the elements to first. */
33940 for (i = 0; i < nelt; ++i)
33941 d->perm[i] -= nelt;
33947 ix86_expand_vec_perm_builtin (tree exp)
33949 struct expand_vec_perm_d d;
33950 tree arg0, arg1, arg2;
33952 arg0 = CALL_EXPR_ARG (exp, 0);
33953 arg1 = CALL_EXPR_ARG (exp, 1);
33954 arg2 = CALL_EXPR_ARG (exp, 2);
33956 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33957 d.nelt = GET_MODE_NUNITS (d.vmode);
33958 d.testing_p = false;
33959 gcc_assert (VECTOR_MODE_P (d.vmode));
33961 if (TREE_CODE (arg2) != VECTOR_CST)
33963 error_at (EXPR_LOCATION (exp),
33964 "vector permutation requires vector constant");
33968 switch (extract_vec_perm_cst (&d, arg2))
33974 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33978 if (!operand_equal_p (arg0, arg1, 0))
33980 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33981 d.op0 = force_reg (d.vmode, d.op0);
33982 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33983 d.op1 = force_reg (d.vmode, d.op1);
33987 /* The elements of PERM do not suggest that only the first operand
33988 is used, but both operands are identical. Allow easier matching
33989 of the permutation by folding the permutation into the single
33992 unsigned i, nelt = d.nelt;
33993 for (i = 0; i < nelt; ++i)
33994 if (d.perm[i] >= nelt)
34000 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
34001 d.op0 = force_reg (d.vmode, d.op0);
34006 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
34007 d.op0 = force_reg (d.vmode, d.op0);
34012 d.target = gen_reg_rtx (d.vmode);
34013 if (ix86_expand_vec_perm_builtin_1 (&d))
34016 /* For compiler generated permutations, we should never got here, because
34017 the compiler should also be checking the ok hook. But since this is a
34018 builtin the user has access too, so don't abort. */
34022 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
34025 sorry ("vector permutation (%d %d %d %d)",
34026 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
34029 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
34030 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
34031 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
34034 sorry ("vector permutation "
34035 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
34036 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
34037 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
34038 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
34039 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
34042 gcc_unreachable ();
34045 return CONST0_RTX (d.vmode);
34048 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
34051 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
34053 struct expand_vec_perm_d d;
34057 d.vmode = TYPE_MODE (vec_type);
34058 d.nelt = GET_MODE_NUNITS (d.vmode);
34059 d.testing_p = true;
34061 /* Given sufficient ISA support we can just return true here
34062 for selected vector modes. */
34063 if (GET_MODE_SIZE (d.vmode) == 16)
34065 /* All implementable with a single vpperm insn. */
34068 /* All implementable with 2 pshufb + 1 ior. */
34071 /* All implementable with shufpd or unpck[lh]pd. */
34076 vec_mask = extract_vec_perm_cst (&d, mask);
34078 /* This hook is cannot be called in response to something that the
34079 user does (unlike the builtin expander) so we shouldn't ever see
34080 an error generated from the extract. */
34081 gcc_assert (vec_mask > 0 && vec_mask <= 3);
34082 one_vec = (vec_mask != 3);
34084 /* Implementable with shufps or pshufd. */
34085 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
34088 /* Otherwise we have to go through the motions and see if we can
34089 figure out how to generate the requested permutation. */
34090 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
34091 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
34093 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
34096 ret = ix86_expand_vec_perm_builtin_1 (&d);
34103 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
34105 struct expand_vec_perm_d d;
34111 d.vmode = GET_MODE (targ);
34112 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
34113 d.testing_p = false;
34115 for (i = 0; i < nelt; ++i)
34116 d.perm[i] = i * 2 + odd;
34118 /* We'll either be able to implement the permutation directly... */
34119 if (expand_vec_perm_1 (&d))
34122 /* ... or we use the special-case patterns. */
34123 expand_vec_perm_even_odd_1 (&d, odd);
34126 /* Expand an insert into a vector register through pinsr insn.
34127 Return true if successful. */
34130 ix86_expand_pinsr (rtx *operands)
34132 rtx dst = operands[0];
34133 rtx src = operands[3];
34135 unsigned int size = INTVAL (operands[1]);
34136 unsigned int pos = INTVAL (operands[2]);
34138 if (GET_CODE (dst) == SUBREG)
34140 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
34141 dst = SUBREG_REG (dst);
34144 if (GET_CODE (src) == SUBREG)
34145 src = SUBREG_REG (src);
34147 switch (GET_MODE (dst))
34154 enum machine_mode srcmode, dstmode;
34155 rtx (*pinsr)(rtx, rtx, rtx, rtx);
34157 srcmode = mode_for_size (size, MODE_INT, 0);
34162 if (!TARGET_SSE4_1)
34164 dstmode = V16QImode;
34165 pinsr = gen_sse4_1_pinsrb;
34171 dstmode = V8HImode;
34172 pinsr = gen_sse2_pinsrw;
34176 if (!TARGET_SSE4_1)
34178 dstmode = V4SImode;
34179 pinsr = gen_sse4_1_pinsrd;
34183 gcc_assert (TARGET_64BIT);
34184 if (!TARGET_SSE4_1)
34186 dstmode = V2DImode;
34187 pinsr = gen_sse4_1_pinsrq;
34194 dst = gen_lowpart (dstmode, dst);
34195 src = gen_lowpart (srcmode, src);
34199 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
34208 /* This function returns the calling abi specific va_list type node.
34209 It returns the FNDECL specific va_list type. */
34212 ix86_fn_abi_va_list (tree fndecl)
34215 return va_list_type_node;
34216 gcc_assert (fndecl != NULL_TREE);
34218 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
34219 return ms_va_list_type_node;
34221 return sysv_va_list_type_node;
34224 /* Returns the canonical va_list type specified by TYPE. If there
34225 is no valid TYPE provided, it return NULL_TREE. */
34228 ix86_canonical_va_list_type (tree type)
34232 /* Resolve references and pointers to va_list type. */
34233 if (TREE_CODE (type) == MEM_REF)
34234 type = TREE_TYPE (type);
34235 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
34236 type = TREE_TYPE (type);
34237 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
34238 type = TREE_TYPE (type);
34240 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
34242 wtype = va_list_type_node;
34243 gcc_assert (wtype != NULL_TREE);
34245 if (TREE_CODE (wtype) == ARRAY_TYPE)
34247 /* If va_list is an array type, the argument may have decayed
34248 to a pointer type, e.g. by being passed to another function.
34249 In that case, unwrap both types so that we can compare the
34250 underlying records. */
34251 if (TREE_CODE (htype) == ARRAY_TYPE
34252 || POINTER_TYPE_P (htype))
34254 wtype = TREE_TYPE (wtype);
34255 htype = TREE_TYPE (htype);
34258 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34259 return va_list_type_node;
34260 wtype = sysv_va_list_type_node;
34261 gcc_assert (wtype != NULL_TREE);
34263 if (TREE_CODE (wtype) == ARRAY_TYPE)
34265 /* If va_list is an array type, the argument may have decayed
34266 to a pointer type, e.g. by being passed to another function.
34267 In that case, unwrap both types so that we can compare the
34268 underlying records. */
34269 if (TREE_CODE (htype) == ARRAY_TYPE
34270 || POINTER_TYPE_P (htype))
34272 wtype = TREE_TYPE (wtype);
34273 htype = TREE_TYPE (htype);
34276 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34277 return sysv_va_list_type_node;
34278 wtype = ms_va_list_type_node;
34279 gcc_assert (wtype != NULL_TREE);
34281 if (TREE_CODE (wtype) == ARRAY_TYPE)
34283 /* If va_list is an array type, the argument may have decayed
34284 to a pointer type, e.g. by being passed to another function.
34285 In that case, unwrap both types so that we can compare the
34286 underlying records. */
34287 if (TREE_CODE (htype) == ARRAY_TYPE
34288 || POINTER_TYPE_P (htype))
34290 wtype = TREE_TYPE (wtype);
34291 htype = TREE_TYPE (htype);
34294 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34295 return ms_va_list_type_node;
34298 return std_canonical_va_list_type (type);
34301 /* Iterate through the target-specific builtin types for va_list.
34302 IDX denotes the iterator, *PTREE is set to the result type of
34303 the va_list builtin, and *PNAME to its internal type.
34304 Returns zero if there is no element for this index, otherwise
34305 IDX should be increased upon the next call.
34306 Note, do not iterate a base builtin's name like __builtin_va_list.
34307 Used from c_common_nodes_and_builtins. */
34310 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
34320 *ptree = ms_va_list_type_node;
34321 *pname = "__builtin_ms_va_list";
34325 *ptree = sysv_va_list_type_node;
34326 *pname = "__builtin_sysv_va_list";
34334 #undef TARGET_SCHED_DISPATCH
34335 #define TARGET_SCHED_DISPATCH has_dispatch
34336 #undef TARGET_SCHED_DISPATCH_DO
34337 #define TARGET_SCHED_DISPATCH_DO do_dispatch
34339 /* The size of the dispatch window is the total number of bytes of
34340 object code allowed in a window. */
34341 #define DISPATCH_WINDOW_SIZE 16
34343 /* Number of dispatch windows considered for scheduling. */
34344 #define MAX_DISPATCH_WINDOWS 3
34346 /* Maximum number of instructions in a window. */
34349 /* Maximum number of immediate operands in a window. */
34352 /* Maximum number of immediate bits allowed in a window. */
34353 #define MAX_IMM_SIZE 128
34355 /* Maximum number of 32 bit immediates allowed in a window. */
34356 #define MAX_IMM_32 4
34358 /* Maximum number of 64 bit immediates allowed in a window. */
34359 #define MAX_IMM_64 2
34361 /* Maximum total of loads or prefetches allowed in a window. */
34364 /* Maximum total of stores allowed in a window. */
34365 #define MAX_STORE 1
34371 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
34372 enum dispatch_group {
34387 /* Number of allowable groups in a dispatch window. It is an array
34388 indexed by dispatch_group enum. 100 is used as a big number,
34389 because the number of these kind of operations does not have any
34390 effect in dispatch window, but we need them for other reasons in
34392 static unsigned int num_allowable_groups[disp_last] = {
34393 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
34396 char group_name[disp_last + 1][16] = {
34397 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
34398 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
34399 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
34402 /* Instruction path. */
34405 path_single, /* Single micro op. */
34406 path_double, /* Double micro op. */
34407 path_multi, /* Instructions with more than 2 micro op.. */
34411 /* sched_insn_info defines a window to the instructions scheduled in
34412 the basic block. It contains a pointer to the insn_info table and
34413 the instruction scheduled.
34415 Windows are allocated for each basic block and are linked
34417 typedef struct sched_insn_info_s {
34419 enum dispatch_group group;
34420 enum insn_path path;
34425 /* Linked list of dispatch windows. This is a two way list of
34426 dispatch windows of a basic block. It contains information about
34427 the number of uops in the window and the total number of
34428 instructions and of bytes in the object code for this dispatch
34430 typedef struct dispatch_windows_s {
34431 int num_insn; /* Number of insn in the window. */
34432 int num_uops; /* Number of uops in the window. */
34433 int window_size; /* Number of bytes in the window. */
34434 int window_num; /* Window number between 0 or 1. */
34435 int num_imm; /* Number of immediates in an insn. */
34436 int num_imm_32; /* Number of 32 bit immediates in an insn. */
34437 int num_imm_64; /* Number of 64 bit immediates in an insn. */
34438 int imm_size; /* Total immediates in the window. */
34439 int num_loads; /* Total memory loads in the window. */
34440 int num_stores; /* Total memory stores in the window. */
34441 int violation; /* Violation exists in window. */
34442 sched_insn_info *window; /* Pointer to the window. */
34443 struct dispatch_windows_s *next;
34444 struct dispatch_windows_s *prev;
34445 } dispatch_windows;
34447 /* Immediate valuse used in an insn. */
34448 typedef struct imm_info_s
34455 static dispatch_windows *dispatch_window_list;
34456 static dispatch_windows *dispatch_window_list1;
34458 /* Get dispatch group of insn. */
34460 static enum dispatch_group
34461 get_mem_group (rtx insn)
34463 enum attr_memory memory;
34465 if (INSN_CODE (insn) < 0)
34466 return disp_no_group;
34467 memory = get_attr_memory (insn);
34468 if (memory == MEMORY_STORE)
34471 if (memory == MEMORY_LOAD)
34474 if (memory == MEMORY_BOTH)
34475 return disp_load_store;
34477 return disp_no_group;
34480 /* Return true if insn is a compare instruction. */
34485 enum attr_type type;
34487 type = get_attr_type (insn);
34488 return (type == TYPE_TEST
34489 || type == TYPE_ICMP
34490 || type == TYPE_FCMP
34491 || GET_CODE (PATTERN (insn)) == COMPARE);
34494 /* Return true if a dispatch violation encountered. */
34497 dispatch_violation (void)
34499 if (dispatch_window_list->next)
34500 return dispatch_window_list->next->violation;
34501 return dispatch_window_list->violation;
34504 /* Return true if insn is a branch instruction. */
34507 is_branch (rtx insn)
34509 return (CALL_P (insn) || JUMP_P (insn));
34512 /* Return true if insn is a prefetch instruction. */
34515 is_prefetch (rtx insn)
34517 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
34520 /* This function initializes a dispatch window and the list container holding a
34521 pointer to the window. */
34524 init_window (int window_num)
34527 dispatch_windows *new_list;
34529 if (window_num == 0)
34530 new_list = dispatch_window_list;
34532 new_list = dispatch_window_list1;
34534 new_list->num_insn = 0;
34535 new_list->num_uops = 0;
34536 new_list->window_size = 0;
34537 new_list->next = NULL;
34538 new_list->prev = NULL;
34539 new_list->window_num = window_num;
34540 new_list->num_imm = 0;
34541 new_list->num_imm_32 = 0;
34542 new_list->num_imm_64 = 0;
34543 new_list->imm_size = 0;
34544 new_list->num_loads = 0;
34545 new_list->num_stores = 0;
34546 new_list->violation = false;
34548 for (i = 0; i < MAX_INSN; i++)
34550 new_list->window[i].insn = NULL;
34551 new_list->window[i].group = disp_no_group;
34552 new_list->window[i].path = no_path;
34553 new_list->window[i].byte_len = 0;
34554 new_list->window[i].imm_bytes = 0;
34559 /* This function allocates and initializes a dispatch window and the
34560 list container holding a pointer to the window. */
34562 static dispatch_windows *
34563 allocate_window (void)
34565 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
34566 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
34571 /* This routine initializes the dispatch scheduling information. It
34572 initiates building dispatch scheduler tables and constructs the
34573 first dispatch window. */
34576 init_dispatch_sched (void)
34578 /* Allocate a dispatch list and a window. */
34579 dispatch_window_list = allocate_window ();
34580 dispatch_window_list1 = allocate_window ();
34585 /* This function returns true if a branch is detected. End of a basic block
34586 does not have to be a branch, but here we assume only branches end a
34590 is_end_basic_block (enum dispatch_group group)
34592 return group == disp_branch;
34595 /* This function is called when the end of a window processing is reached. */
34598 process_end_window (void)
34600 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
34601 if (dispatch_window_list->next)
34603 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
34604 gcc_assert (dispatch_window_list->window_size
34605 + dispatch_window_list1->window_size <= 48);
34611 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
34612 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
34613 for 48 bytes of instructions. Note that these windows are not dispatch
34614 windows that their sizes are DISPATCH_WINDOW_SIZE. */
34616 static dispatch_windows *
34617 allocate_next_window (int window_num)
34619 if (window_num == 0)
34621 if (dispatch_window_list->next)
34624 return dispatch_window_list;
34627 dispatch_window_list->next = dispatch_window_list1;
34628 dispatch_window_list1->prev = dispatch_window_list;
34630 return dispatch_window_list1;
34633 /* Increment the number of immediate operands of an instruction. */
34636 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
34641 switch ( GET_CODE (*in_rtx))
34646 (imm_values->imm)++;
34647 if (x86_64_immediate_operand (*in_rtx, SImode))
34648 (imm_values->imm32)++;
34650 (imm_values->imm64)++;
34654 (imm_values->imm)++;
34655 (imm_values->imm64)++;
34659 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
34661 (imm_values->imm)++;
34662 (imm_values->imm32)++;
34673 /* Compute number of immediate operands of an instruction. */
34676 find_constant (rtx in_rtx, imm_info *imm_values)
34678 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
34679 (rtx_function) find_constant_1, (void *) imm_values);
34682 /* Return total size of immediate operands of an instruction along with number
34683 of corresponding immediate-operands. It initializes its parameters to zero
34684 befor calling FIND_CONSTANT.
34685 INSN is the input instruction. IMM is the total of immediates.
34686 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34690 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
34692 imm_info imm_values = {0, 0, 0};
34694 find_constant (insn, &imm_values);
34695 *imm = imm_values.imm;
34696 *imm32 = imm_values.imm32;
34697 *imm64 = imm_values.imm64;
34698 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
34701 /* This function indicates if an operand of an instruction is an
34705 has_immediate (rtx insn)
34707 int num_imm_operand;
34708 int num_imm32_operand;
34709 int num_imm64_operand;
34712 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34713 &num_imm64_operand);
34717 /* Return single or double path for instructions. */
34719 static enum insn_path
34720 get_insn_path (rtx insn)
34722 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
34724 if ((int)path == 0)
34725 return path_single;
34727 if ((int)path == 1)
34728 return path_double;
34733 /* Return insn dispatch group. */
34735 static enum dispatch_group
34736 get_insn_group (rtx insn)
34738 enum dispatch_group group = get_mem_group (insn);
34742 if (is_branch (insn))
34743 return disp_branch;
34748 if (has_immediate (insn))
34751 if (is_prefetch (insn))
34752 return disp_prefetch;
34754 return disp_no_group;
34757 /* Count number of GROUP restricted instructions in a dispatch
34758 window WINDOW_LIST. */
34761 count_num_restricted (rtx insn, dispatch_windows *window_list)
34763 enum dispatch_group group = get_insn_group (insn);
34765 int num_imm_operand;
34766 int num_imm32_operand;
34767 int num_imm64_operand;
34769 if (group == disp_no_group)
34772 if (group == disp_imm)
34774 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34775 &num_imm64_operand);
34776 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
34777 || num_imm_operand + window_list->num_imm > MAX_IMM
34778 || (num_imm32_operand > 0
34779 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
34780 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
34781 || (num_imm64_operand > 0
34782 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
34783 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
34784 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
34785 && num_imm64_operand > 0
34786 && ((window_list->num_imm_64 > 0
34787 && window_list->num_insn >= 2)
34788 || window_list->num_insn >= 3)))
34794 if ((group == disp_load_store
34795 && (window_list->num_loads >= MAX_LOAD
34796 || window_list->num_stores >= MAX_STORE))
34797 || ((group == disp_load
34798 || group == disp_prefetch)
34799 && window_list->num_loads >= MAX_LOAD)
34800 || (group == disp_store
34801 && window_list->num_stores >= MAX_STORE))
34807 /* This function returns true if insn satisfies dispatch rules on the
34808 last window scheduled. */
34811 fits_dispatch_window (rtx insn)
34813 dispatch_windows *window_list = dispatch_window_list;
34814 dispatch_windows *window_list_next = dispatch_window_list->next;
34815 unsigned int num_restrict;
34816 enum dispatch_group group = get_insn_group (insn);
34817 enum insn_path path = get_insn_path (insn);
34820 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34821 instructions should be given the lowest priority in the
34822 scheduling process in Haifa scheduler to make sure they will be
34823 scheduled in the same dispatch window as the refrence to them. */
34824 if (group == disp_jcc || group == disp_cmp)
34827 /* Check nonrestricted. */
34828 if (group == disp_no_group || group == disp_branch)
34831 /* Get last dispatch window. */
34832 if (window_list_next)
34833 window_list = window_list_next;
34835 if (window_list->window_num == 1)
34837 sum = window_list->prev->window_size + window_list->window_size;
34840 || (min_insn_size (insn) + sum) >= 48)
34841 /* Window 1 is full. Go for next window. */
34845 num_restrict = count_num_restricted (insn, window_list);
34847 if (num_restrict > num_allowable_groups[group])
34850 /* See if it fits in the first window. */
34851 if (window_list->window_num == 0)
34853 /* The first widow should have only single and double path
34855 if (path == path_double
34856 && (window_list->num_uops + 2) > MAX_INSN)
34858 else if (path != path_single)
34864 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34865 dispatch window WINDOW_LIST. */
34868 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
34870 int byte_len = min_insn_size (insn);
34871 int num_insn = window_list->num_insn;
34873 sched_insn_info *window = window_list->window;
34874 enum dispatch_group group = get_insn_group (insn);
34875 enum insn_path path = get_insn_path (insn);
34876 int num_imm_operand;
34877 int num_imm32_operand;
34878 int num_imm64_operand;
34880 if (!window_list->violation && group != disp_cmp
34881 && !fits_dispatch_window (insn))
34882 window_list->violation = true;
34884 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34885 &num_imm64_operand);
34887 /* Initialize window with new instruction. */
34888 window[num_insn].insn = insn;
34889 window[num_insn].byte_len = byte_len;
34890 window[num_insn].group = group;
34891 window[num_insn].path = path;
34892 window[num_insn].imm_bytes = imm_size;
34894 window_list->window_size += byte_len;
34895 window_list->num_insn = num_insn + 1;
34896 window_list->num_uops = window_list->num_uops + num_uops;
34897 window_list->imm_size += imm_size;
34898 window_list->num_imm += num_imm_operand;
34899 window_list->num_imm_32 += num_imm32_operand;
34900 window_list->num_imm_64 += num_imm64_operand;
34902 if (group == disp_store)
34903 window_list->num_stores += 1;
34904 else if (group == disp_load
34905 || group == disp_prefetch)
34906 window_list->num_loads += 1;
34907 else if (group == disp_load_store)
34909 window_list->num_stores += 1;
34910 window_list->num_loads += 1;
34914 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34915 If the total bytes of instructions or the number of instructions in
34916 the window exceed allowable, it allocates a new window. */
34919 add_to_dispatch_window (rtx insn)
34922 dispatch_windows *window_list;
34923 dispatch_windows *next_list;
34924 dispatch_windows *window0_list;
34925 enum insn_path path;
34926 enum dispatch_group insn_group;
34934 if (INSN_CODE (insn) < 0)
34937 byte_len = min_insn_size (insn);
34938 window_list = dispatch_window_list;
34939 next_list = window_list->next;
34940 path = get_insn_path (insn);
34941 insn_group = get_insn_group (insn);
34943 /* Get the last dispatch window. */
34945 window_list = dispatch_window_list->next;
34947 if (path == path_single)
34949 else if (path == path_double)
34952 insn_num_uops = (int) path;
34954 /* If current window is full, get a new window.
34955 Window number zero is full, if MAX_INSN uops are scheduled in it.
34956 Window number one is full, if window zero's bytes plus window
34957 one's bytes is 32, or if the bytes of the new instruction added
34958 to the total makes it greater than 48, or it has already MAX_INSN
34959 instructions in it. */
34960 num_insn = window_list->num_insn;
34961 num_uops = window_list->num_uops;
34962 window_num = window_list->window_num;
34963 insn_fits = fits_dispatch_window (insn);
34965 if (num_insn >= MAX_INSN
34966 || num_uops + insn_num_uops > MAX_INSN
34969 window_num = ~window_num & 1;
34970 window_list = allocate_next_window (window_num);
34973 if (window_num == 0)
34975 add_insn_window (insn, window_list, insn_num_uops);
34976 if (window_list->num_insn >= MAX_INSN
34977 && insn_group == disp_branch)
34979 process_end_window ();
34983 else if (window_num == 1)
34985 window0_list = window_list->prev;
34986 sum = window0_list->window_size + window_list->window_size;
34988 || (byte_len + sum) >= 48)
34990 process_end_window ();
34991 window_list = dispatch_window_list;
34994 add_insn_window (insn, window_list, insn_num_uops);
34997 gcc_unreachable ();
34999 if (is_end_basic_block (insn_group))
35001 /* End of basic block is reached do end-basic-block process. */
35002 process_end_window ();
35007 /* Print the dispatch window, WINDOW_NUM, to FILE. */
35009 DEBUG_FUNCTION static void
35010 debug_dispatch_window_file (FILE *file, int window_num)
35012 dispatch_windows *list;
35015 if (window_num == 0)
35016 list = dispatch_window_list;
35018 list = dispatch_window_list1;
35020 fprintf (file, "Window #%d:\n", list->window_num);
35021 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
35022 list->num_insn, list->num_uops, list->window_size);
35023 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
35024 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
35026 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
35028 fprintf (file, " insn info:\n");
35030 for (i = 0; i < MAX_INSN; i++)
35032 if (!list->window[i].insn)
35034 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
35035 i, group_name[list->window[i].group],
35036 i, (void *)list->window[i].insn,
35037 i, list->window[i].path,
35038 i, list->window[i].byte_len,
35039 i, list->window[i].imm_bytes);
35043 /* Print to stdout a dispatch window. */
35045 DEBUG_FUNCTION void
35046 debug_dispatch_window (int window_num)
35048 debug_dispatch_window_file (stdout, window_num);
35051 /* Print INSN dispatch information to FILE. */
35053 DEBUG_FUNCTION static void
35054 debug_insn_dispatch_info_file (FILE *file, rtx insn)
35057 enum insn_path path;
35058 enum dispatch_group group;
35060 int num_imm_operand;
35061 int num_imm32_operand;
35062 int num_imm64_operand;
35064 if (INSN_CODE (insn) < 0)
35067 byte_len = min_insn_size (insn);
35068 path = get_insn_path (insn);
35069 group = get_insn_group (insn);
35070 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
35071 &num_imm64_operand);
35073 fprintf (file, " insn info:\n");
35074 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
35075 group_name[group], path, byte_len);
35076 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
35077 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
35080 /* Print to STDERR the status of the ready list with respect to
35081 dispatch windows. */
35083 DEBUG_FUNCTION void
35084 debug_ready_dispatch (void)
35087 int no_ready = number_in_ready ();
35089 fprintf (stdout, "Number of ready: %d\n", no_ready);
35091 for (i = 0; i < no_ready; i++)
35092 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
35095 /* This routine is the driver of the dispatch scheduler. */
35098 do_dispatch (rtx insn, int mode)
35100 if (mode == DISPATCH_INIT)
35101 init_dispatch_sched ();
35102 else if (mode == ADD_TO_DISPATCH_WINDOW)
35103 add_to_dispatch_window (insn);
35106 /* Return TRUE if Dispatch Scheduling is supported. */
35109 has_dispatch (rtx insn, int action)
35111 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
35117 case IS_DISPATCH_ON:
35122 return is_cmp (insn);
35124 case DISPATCH_VIOLATION:
35125 return dispatch_violation ();
35127 case FITS_DISPATCH_WINDOW:
35128 return fits_dispatch_window (insn);
35134 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
35135 place emms and femms instructions. */
35137 static enum machine_mode
35138 ix86_preferred_simd_mode (enum machine_mode mode)
35155 if (TARGET_AVX && !flag_prefer_avx128)
35161 if (!TARGET_VECTORIZE_DOUBLE)
35163 else if (TARGET_AVX && !flag_prefer_avx128)
35165 else if (TARGET_SSE2)
35174 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
35177 static unsigned int
35178 ix86_autovectorize_vector_sizes (void)
35180 return (TARGET_AVX && !flag_prefer_avx128) ? 32 | 16 : 0;
35183 /* Initialize the GCC target structure. */
35184 #undef TARGET_RETURN_IN_MEMORY
35185 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
35187 #undef TARGET_LEGITIMIZE_ADDRESS
35188 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
35190 #undef TARGET_ATTRIBUTE_TABLE
35191 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
35192 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35193 # undef TARGET_MERGE_DECL_ATTRIBUTES
35194 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
35197 #undef TARGET_COMP_TYPE_ATTRIBUTES
35198 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
35200 #undef TARGET_INIT_BUILTINS
35201 #define TARGET_INIT_BUILTINS ix86_init_builtins
35202 #undef TARGET_BUILTIN_DECL
35203 #define TARGET_BUILTIN_DECL ix86_builtin_decl
35204 #undef TARGET_EXPAND_BUILTIN
35205 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
35207 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
35208 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
35209 ix86_builtin_vectorized_function
35211 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
35212 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
35214 #undef TARGET_BUILTIN_RECIPROCAL
35215 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
35217 #undef TARGET_ASM_FUNCTION_EPILOGUE
35218 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
35220 #undef TARGET_ENCODE_SECTION_INFO
35221 #ifndef SUBTARGET_ENCODE_SECTION_INFO
35222 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
35224 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
35227 #undef TARGET_ASM_OPEN_PAREN
35228 #define TARGET_ASM_OPEN_PAREN ""
35229 #undef TARGET_ASM_CLOSE_PAREN
35230 #define TARGET_ASM_CLOSE_PAREN ""
35232 #undef TARGET_ASM_BYTE_OP
35233 #define TARGET_ASM_BYTE_OP ASM_BYTE
35235 #undef TARGET_ASM_ALIGNED_HI_OP
35236 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
35237 #undef TARGET_ASM_ALIGNED_SI_OP
35238 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
35240 #undef TARGET_ASM_ALIGNED_DI_OP
35241 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
35244 #undef TARGET_PROFILE_BEFORE_PROLOGUE
35245 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
35247 #undef TARGET_ASM_UNALIGNED_HI_OP
35248 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
35249 #undef TARGET_ASM_UNALIGNED_SI_OP
35250 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
35251 #undef TARGET_ASM_UNALIGNED_DI_OP
35252 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
35254 #undef TARGET_PRINT_OPERAND
35255 #define TARGET_PRINT_OPERAND ix86_print_operand
35256 #undef TARGET_PRINT_OPERAND_ADDRESS
35257 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
35258 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
35259 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
35260 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
35261 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
35263 #undef TARGET_SCHED_INIT_GLOBAL
35264 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
35265 #undef TARGET_SCHED_ADJUST_COST
35266 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
35267 #undef TARGET_SCHED_ISSUE_RATE
35268 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
35269 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
35270 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
35271 ia32_multipass_dfa_lookahead
35273 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
35274 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
35277 #undef TARGET_HAVE_TLS
35278 #define TARGET_HAVE_TLS true
35280 #undef TARGET_CANNOT_FORCE_CONST_MEM
35281 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
35282 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
35283 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
35285 #undef TARGET_DELEGITIMIZE_ADDRESS
35286 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
35288 #undef TARGET_MS_BITFIELD_LAYOUT_P
35289 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
35292 #undef TARGET_BINDS_LOCAL_P
35293 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
35295 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35296 #undef TARGET_BINDS_LOCAL_P
35297 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
35300 #undef TARGET_ASM_OUTPUT_MI_THUNK
35301 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
35302 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
35303 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
35305 #undef TARGET_ASM_FILE_START
35306 #define TARGET_ASM_FILE_START x86_file_start
35308 #undef TARGET_DEFAULT_TARGET_FLAGS
35309 #define TARGET_DEFAULT_TARGET_FLAGS \
35311 | TARGET_SUBTARGET_DEFAULT \
35312 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
35314 #undef TARGET_HANDLE_OPTION
35315 #define TARGET_HANDLE_OPTION ix86_handle_option
35317 #undef TARGET_OPTION_OVERRIDE
35318 #define TARGET_OPTION_OVERRIDE ix86_option_override
35319 #undef TARGET_OPTION_OPTIMIZATION_TABLE
35320 #define TARGET_OPTION_OPTIMIZATION_TABLE ix86_option_optimization_table
35321 #undef TARGET_OPTION_INIT_STRUCT
35322 #define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
35324 #undef TARGET_REGISTER_MOVE_COST
35325 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
35326 #undef TARGET_MEMORY_MOVE_COST
35327 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
35328 #undef TARGET_RTX_COSTS
35329 #define TARGET_RTX_COSTS ix86_rtx_costs
35330 #undef TARGET_ADDRESS_COST
35331 #define TARGET_ADDRESS_COST ix86_address_cost
35333 #undef TARGET_FIXED_CONDITION_CODE_REGS
35334 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
35335 #undef TARGET_CC_MODES_COMPATIBLE
35336 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
35338 #undef TARGET_MACHINE_DEPENDENT_REORG
35339 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
35341 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
35342 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
35344 #undef TARGET_BUILD_BUILTIN_VA_LIST
35345 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
35347 #undef TARGET_ENUM_VA_LIST_P
35348 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
35350 #undef TARGET_FN_ABI_VA_LIST
35351 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
35353 #undef TARGET_CANONICAL_VA_LIST_TYPE
35354 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
35356 #undef TARGET_EXPAND_BUILTIN_VA_START
35357 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
35359 #undef TARGET_MD_ASM_CLOBBERS
35360 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
35362 #undef TARGET_PROMOTE_PROTOTYPES
35363 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
35364 #undef TARGET_STRUCT_VALUE_RTX
35365 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
35366 #undef TARGET_SETUP_INCOMING_VARARGS
35367 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
35368 #undef TARGET_MUST_PASS_IN_STACK
35369 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
35370 #undef TARGET_FUNCTION_ARG_ADVANCE
35371 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
35372 #undef TARGET_FUNCTION_ARG
35373 #define TARGET_FUNCTION_ARG ix86_function_arg
35374 #undef TARGET_FUNCTION_ARG_BOUNDARY
35375 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
35376 #undef TARGET_PASS_BY_REFERENCE
35377 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
35378 #undef TARGET_INTERNAL_ARG_POINTER
35379 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
35380 #undef TARGET_UPDATE_STACK_BOUNDARY
35381 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
35382 #undef TARGET_GET_DRAP_RTX
35383 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
35384 #undef TARGET_STRICT_ARGUMENT_NAMING
35385 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
35386 #undef TARGET_STATIC_CHAIN
35387 #define TARGET_STATIC_CHAIN ix86_static_chain
35388 #undef TARGET_TRAMPOLINE_INIT
35389 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
35390 #undef TARGET_RETURN_POPS_ARGS
35391 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
35393 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
35394 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
35396 #undef TARGET_SCALAR_MODE_SUPPORTED_P
35397 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
35399 #undef TARGET_VECTOR_MODE_SUPPORTED_P
35400 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
35402 #undef TARGET_C_MODE_FOR_SUFFIX
35403 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
35406 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
35407 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
35410 #ifdef SUBTARGET_INSERT_ATTRIBUTES
35411 #undef TARGET_INSERT_ATTRIBUTES
35412 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
35415 #undef TARGET_MANGLE_TYPE
35416 #define TARGET_MANGLE_TYPE ix86_mangle_type
35418 #undef TARGET_STACK_PROTECT_FAIL
35419 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
35421 #undef TARGET_SUPPORTS_SPLIT_STACK
35422 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
35424 #undef TARGET_FUNCTION_VALUE
35425 #define TARGET_FUNCTION_VALUE ix86_function_value
35427 #undef TARGET_FUNCTION_VALUE_REGNO_P
35428 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
35430 #undef TARGET_SECONDARY_RELOAD
35431 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
35433 #undef TARGET_PREFERRED_RELOAD_CLASS
35434 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
35435 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
35436 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
35437 #undef TARGET_CLASS_LIKELY_SPILLED_P
35438 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
35440 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
35441 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
35442 ix86_builtin_vectorization_cost
35443 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
35444 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
35445 ix86_vectorize_builtin_vec_perm
35446 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
35447 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
35448 ix86_vectorize_builtin_vec_perm_ok
35449 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
35450 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
35451 ix86_preferred_simd_mode
35452 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
35453 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
35454 ix86_autovectorize_vector_sizes
35456 #undef TARGET_SET_CURRENT_FUNCTION
35457 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
35459 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
35460 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
35462 #undef TARGET_OPTION_SAVE
35463 #define TARGET_OPTION_SAVE ix86_function_specific_save
35465 #undef TARGET_OPTION_RESTORE
35466 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
35468 #undef TARGET_OPTION_PRINT
35469 #define TARGET_OPTION_PRINT ix86_function_specific_print
35471 #undef TARGET_CAN_INLINE_P
35472 #define TARGET_CAN_INLINE_P ix86_can_inline_p
35474 #undef TARGET_EXPAND_TO_RTL_HOOK
35475 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
35477 #undef TARGET_LEGITIMATE_ADDRESS_P
35478 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
35480 #undef TARGET_LEGITIMATE_CONSTANT_P
35481 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
35483 #undef TARGET_FRAME_POINTER_REQUIRED
35484 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
35486 #undef TARGET_CAN_ELIMINATE
35487 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
35489 #undef TARGET_EXTRA_LIVE_ON_ENTRY
35490 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
35492 #undef TARGET_ASM_CODE_END
35493 #define TARGET_ASM_CODE_END ix86_code_end
35495 #undef TARGET_CONDITIONAL_REGISTER_USAGE
35496 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
35499 #undef TARGET_INIT_LIBFUNCS
35500 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
35503 struct gcc_target targetm = TARGET_INITIALIZER;
35505 #include "gt-i386.h"