1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 #include "sched-int.h"
62 #include "diagnostic.h"
64 enum upper_128bits_state
71 typedef struct block_info_def
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
78 /* TRUE if block has been processed. */
80 /* TRUE if block has been scanned. */
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev;
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88 enum call_avx256_state
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256 = -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256,
94 /* Callee passes 256bit AVX register. */
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
99 /* vzeroupper intrinsic. */
103 /* Check if a 256bit AVX register is referenced in stores. */
106 check_avx256_stores (rtx dest, const_rtx set, void *data)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
110 || (GET_CODE (set) == SET
111 && REG_P (SET_SRC (set))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
114 enum upper_128bits_state *state
115 = (enum upper_128bits_state *) data;
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
124 STATE is state of the upper 128bits of AVX registers at entry. */
127 move_or_delete_vzeroupper_2 (basic_block bb,
128 enum upper_128bits_state state)
131 rtx vzeroupper_insn = NULL_RTX;
136 if (BLOCK_INFO (bb)->unchanged)
139 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
142 BLOCK_INFO (bb)->state = state;
146 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
149 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
150 bb->index, BLOCK_INFO (bb)->state);
154 BLOCK_INFO (bb)->prev = state;
157 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
162 /* BB_END changes when it is deleted. */
163 bb_end = BB_END (bb);
165 while (insn != bb_end)
167 insn = NEXT_INSN (insn);
169 if (!NONDEBUG_INSN_P (insn))
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn) || CALL_P (insn))
175 if (!vzeroupper_insn)
178 if (PREV_INSN (insn) != vzeroupper_insn)
182 fprintf (dump_file, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file, PREV_INSN (insn));
184 fprintf (dump_file, "before:\n");
185 print_rtl_single (dump_file, insn);
187 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
190 vzeroupper_insn = NULL_RTX;
194 pat = PATTERN (insn);
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat) == UNSPEC_VOLATILE
198 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file, "Found vzeroupper:\n");
204 print_rtl_single (dump_file, insn);
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat) == PARALLEL
211 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
217 /* Delete pending vzeroupper insertion. */
220 delete_insn (vzeroupper_insn);
221 vzeroupper_insn = NULL_RTX;
224 else if (state != used)
226 note_stores (pat, check_avx256_stores, &state);
233 /* Process vzeroupper intrinsic. */
234 avx256 = INTVAL (XVECEXP (pat, 0, 0));
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256 == callee_return_avx256)
247 /* Remove unnecessary vzeroupper since upper 128bits are
251 fprintf (dump_file, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file, insn);
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
260 if (avx256 != callee_return_pass_avx256)
263 if (avx256 == callee_return_pass_avx256
264 || avx256 == callee_pass_avx256)
266 /* Must remove vzeroupper since callee passes in 256bit
270 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file, insn);
277 vzeroupper_insn = insn;
283 BLOCK_INFO (bb)->state = state;
284 BLOCK_INFO (bb)->unchanged = unchanged;
285 BLOCK_INFO (bb)->scanned = true;
288 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb->index, unchanged ? "unchanged" : "changed",
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
299 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
303 enum upper_128bits_state state, old_state, new_state;
307 fprintf (dump_file, " Process [bb %i]: status: %d\n",
308 block->index, BLOCK_INFO (block)->processed);
310 if (BLOCK_INFO (block)->processed)
315 /* Check all predecessor edges of this block. */
316 seen_unknown = false;
317 FOR_EACH_EDGE (e, ei, block->preds)
321 switch (BLOCK_INFO (e->src)->state)
324 if (!unknown_is_unused)
338 old_state = BLOCK_INFO (block)->state;
339 move_or_delete_vzeroupper_2 (block, state);
340 new_state = BLOCK_INFO (block)->state;
342 if (state != unknown || new_state == used)
343 BLOCK_INFO (block)->processed = true;
345 /* Need to rescan if the upper 128bits of AVX registers are changed
347 if (new_state != old_state)
349 if (new_state == used)
350 cfun->machine->rescan_vzeroupper_p = 1;
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
362 move_or_delete_vzeroupper (void)
367 fibheap_t worklist, pending, fibheap_swap;
368 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def));
376 /* Process outgoing edges of entry point. */
378 fprintf (dump_file, "Process outgoing edges of entry point\n");
380 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
382 move_or_delete_vzeroupper_2 (e->dest,
383 cfun->machine->caller_pass_avx256_p
385 BLOCK_INFO (e->dest)->processed = true;
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
391 bb_order = XNEWVEC (int, last_basic_block);
392 pre_and_rev_post_order_compute (NULL, rc_order, false);
393 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
394 bb_order[rc_order[i]] = i;
397 worklist = fibheap_new ();
398 pending = fibheap_new ();
399 visited = sbitmap_alloc (last_basic_block);
400 in_worklist = sbitmap_alloc (last_basic_block);
401 in_pending = sbitmap_alloc (last_basic_block);
402 sbitmap_zero (in_worklist);
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending);
407 if (BLOCK_INFO (bb)->processed)
408 RESET_BIT (in_pending, bb->index);
411 move_or_delete_vzeroupper_1 (bb, false);
412 fibheap_insert (pending, bb_order[bb->index], bb);
416 fprintf (dump_file, "Check remaining basic blocks\n");
418 while (!fibheap_empty (pending))
420 fibheap_swap = pending;
422 worklist = fibheap_swap;
423 sbitmap_swap = in_pending;
424 in_pending = in_worklist;
425 in_worklist = sbitmap_swap;
427 sbitmap_zero (visited);
429 cfun->machine->rescan_vzeroupper_p = 0;
431 while (!fibheap_empty (worklist))
433 bb = (basic_block) fibheap_extract_min (worklist);
434 RESET_BIT (in_worklist, bb->index);
435 gcc_assert (!TEST_BIT (visited, bb->index));
436 if (!TEST_BIT (visited, bb->index))
440 SET_BIT (visited, bb->index);
442 if (move_or_delete_vzeroupper_1 (bb, false))
443 FOR_EACH_EDGE (e, ei, bb->succs)
445 if (e->dest == EXIT_BLOCK_PTR
446 || BLOCK_INFO (e->dest)->processed)
449 if (TEST_BIT (visited, e->dest->index))
451 if (!TEST_BIT (in_pending, e->dest->index))
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending, e->dest->index);
455 fibheap_insert (pending,
456 bb_order[e->dest->index],
460 else if (!TEST_BIT (in_worklist, e->dest->index))
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist, e->dest->index);
464 fibheap_insert (worklist, bb_order[e->dest->index],
471 if (!cfun->machine->rescan_vzeroupper_p)
476 fibheap_delete (worklist);
477 fibheap_delete (pending);
478 sbitmap_free (visited);
479 sbitmap_free (in_worklist);
480 sbitmap_free (in_pending);
483 fprintf (dump_file, "Process remaining basic blocks\n");
486 move_or_delete_vzeroupper_1 (bb, true);
488 free_aux_for_blocks ();
491 static rtx legitimize_dllimport_symbol (rtx, bool);
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
512 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
566 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
567 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
581 /* Processor costs (relative to an add) */
583 struct processor_costs i386_cost = { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
636 DUMMY_STRINGOP_ALGS},
637 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
638 DUMMY_STRINGOP_ALGS},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs i486_cost = { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
708 DUMMY_STRINGOP_ALGS},
709 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
710 DUMMY_STRINGOP_ALGS},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
725 struct processor_costs pentium_cost = {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
778 DUMMY_STRINGOP_ALGS},
779 {{libcall, {{-1, rep_prefix_4_byte}}},
780 DUMMY_STRINGOP_ALGS},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
795 struct processor_costs pentiumpro_cost = {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
853 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
854 DUMMY_STRINGOP_ALGS},
855 {{rep_prefix_4_byte, {{1024, unrolled_loop},
856 {8192, rep_prefix_4_byte}, {-1, libcall}}},
857 DUMMY_STRINGOP_ALGS},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
872 struct processor_costs geode_cost = {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
926 DUMMY_STRINGOP_ALGS},
927 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
928 DUMMY_STRINGOP_ALGS},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
943 struct processor_costs k6_cost = {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
999 DUMMY_STRINGOP_ALGS},
1000 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1001 DUMMY_STRINGOP_ALGS},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1016 struct processor_costs athlon_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1072 DUMMY_STRINGOP_ALGS},
1073 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1074 DUMMY_STRINGOP_ALGS},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1089 struct processor_costs k8_cost = {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1150 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1151 {{libcall, {{8, loop}, {24, unrolled_loop},
1152 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1153 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1167 struct processor_costs amdfam10_cost = {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1212 MOVD reg64, xmmreg Double FADD 3
1214 MOVD reg32, xmmreg Double FADD 3
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1237 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1238 {{libcall, {{8, loop}, {24, unrolled_loop},
1239 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1240 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1254 struct processor_costs bdver1_cost = {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1299 MOVD reg64, xmmreg Double FADD 3
1301 MOVD reg32, xmmreg Double FADD 3
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1324 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1325 {{libcall, {{8, loop}, {24, unrolled_loop},
1326 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1327 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1341 struct processor_costs btver1_cost = {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (2), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (3), /* SI */
1349 COSTS_N_INSNS (4), /* DI */
1350 COSTS_N_INSNS (5)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1361 4, /* cost for loading QImode using movzbl */
1362 {3, 4, 3}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {3, 4, 3}, /* cost of storing integer registers */
1366 4, /* cost of reg,reg fld/fst */
1367 {4, 4, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {6, 6, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {3, 3}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 3}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 5}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 3, /* MMX or SSE register to integer */
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1386 MOVD reg64, xmmreg Double FADD 3
1388 MOVD reg32, xmmreg Double FADD 3
1390 32, /* size of l1 cache. */
1391 512, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 100, /* number of parallel prefetches */
1394 2, /* Branch cost */
1395 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1396 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1397 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1398 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1399 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1400 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1402 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1403 very small blocks it is better to use loop. For large blocks, libcall can
1404 do nontemporary accesses and beat inline considerably. */
1405 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1406 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1407 {{libcall, {{8, loop}, {24, unrolled_loop},
1408 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1409 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1410 4, /* scalar_stmt_cost. */
1411 2, /* scalar load_cost. */
1412 2, /* scalar_store_cost. */
1413 6, /* vec_stmt_cost. */
1414 0, /* vec_to_scalar_cost. */
1415 2, /* scalar_to_vec_cost. */
1416 2, /* vec_align_load_cost. */
1417 2, /* vec_unalign_load_cost. */
1418 2, /* vec_store_cost. */
1419 2, /* cond_taken_branch_cost. */
1420 1, /* cond_not_taken_branch_cost. */
1424 struct processor_costs pentium4_cost = {
1425 COSTS_N_INSNS (1), /* cost of an add instruction */
1426 COSTS_N_INSNS (3), /* cost of a lea instruction */
1427 COSTS_N_INSNS (4), /* variable shift costs */
1428 COSTS_N_INSNS (4), /* constant shift costs */
1429 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1430 COSTS_N_INSNS (15), /* HI */
1431 COSTS_N_INSNS (15), /* SI */
1432 COSTS_N_INSNS (15), /* DI */
1433 COSTS_N_INSNS (15)}, /* other */
1434 0, /* cost of multiply per each bit set */
1435 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1436 COSTS_N_INSNS (56), /* HI */
1437 COSTS_N_INSNS (56), /* SI */
1438 COSTS_N_INSNS (56), /* DI */
1439 COSTS_N_INSNS (56)}, /* other */
1440 COSTS_N_INSNS (1), /* cost of movsx */
1441 COSTS_N_INSNS (1), /* cost of movzx */
1442 16, /* "large" insn */
1444 2, /* cost for loading QImode using movzbl */
1445 {4, 5, 4}, /* cost of loading integer registers
1446 in QImode, HImode and SImode.
1447 Relative to reg-reg move (2). */
1448 {2, 3, 2}, /* cost of storing integer registers */
1449 2, /* cost of reg,reg fld/fst */
1450 {2, 2, 6}, /* cost of loading fp registers
1451 in SFmode, DFmode and XFmode */
1452 {4, 4, 6}, /* cost of storing fp registers
1453 in SFmode, DFmode and XFmode */
1454 2, /* cost of moving MMX register */
1455 {2, 2}, /* cost of loading MMX registers
1456 in SImode and DImode */
1457 {2, 2}, /* cost of storing MMX registers
1458 in SImode and DImode */
1459 12, /* cost of moving SSE register */
1460 {12, 12, 12}, /* cost of loading SSE registers
1461 in SImode, DImode and TImode */
1462 {2, 2, 8}, /* cost of storing SSE registers
1463 in SImode, DImode and TImode */
1464 10, /* MMX or SSE register to integer */
1465 8, /* size of l1 cache. */
1466 256, /* size of l2 cache. */
1467 64, /* size of prefetch block */
1468 6, /* number of parallel prefetches */
1469 2, /* Branch cost */
1470 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1471 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1472 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1473 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1474 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1475 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1476 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1477 DUMMY_STRINGOP_ALGS},
1478 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1480 DUMMY_STRINGOP_ALGS},
1481 1, /* scalar_stmt_cost. */
1482 1, /* scalar load_cost. */
1483 1, /* scalar_store_cost. */
1484 1, /* vec_stmt_cost. */
1485 1, /* vec_to_scalar_cost. */
1486 1, /* scalar_to_vec_cost. */
1487 1, /* vec_align_load_cost. */
1488 2, /* vec_unalign_load_cost. */
1489 1, /* vec_store_cost. */
1490 3, /* cond_taken_branch_cost. */
1491 1, /* cond_not_taken_branch_cost. */
1495 struct processor_costs nocona_cost = {
1496 COSTS_N_INSNS (1), /* cost of an add instruction */
1497 COSTS_N_INSNS (1), /* cost of a lea instruction */
1498 COSTS_N_INSNS (1), /* variable shift costs */
1499 COSTS_N_INSNS (1), /* constant shift costs */
1500 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1501 COSTS_N_INSNS (10), /* HI */
1502 COSTS_N_INSNS (10), /* SI */
1503 COSTS_N_INSNS (10), /* DI */
1504 COSTS_N_INSNS (10)}, /* other */
1505 0, /* cost of multiply per each bit set */
1506 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1507 COSTS_N_INSNS (66), /* HI */
1508 COSTS_N_INSNS (66), /* SI */
1509 COSTS_N_INSNS (66), /* DI */
1510 COSTS_N_INSNS (66)}, /* other */
1511 COSTS_N_INSNS (1), /* cost of movsx */
1512 COSTS_N_INSNS (1), /* cost of movzx */
1513 16, /* "large" insn */
1514 17, /* MOVE_RATIO */
1515 4, /* cost for loading QImode using movzbl */
1516 {4, 4, 4}, /* cost of loading integer registers
1517 in QImode, HImode and SImode.
1518 Relative to reg-reg move (2). */
1519 {4, 4, 4}, /* cost of storing integer registers */
1520 3, /* cost of reg,reg fld/fst */
1521 {12, 12, 12}, /* cost of loading fp registers
1522 in SFmode, DFmode and XFmode */
1523 {4, 4, 4}, /* cost of storing fp registers
1524 in SFmode, DFmode and XFmode */
1525 6, /* cost of moving MMX register */
1526 {12, 12}, /* cost of loading MMX registers
1527 in SImode and DImode */
1528 {12, 12}, /* cost of storing MMX registers
1529 in SImode and DImode */
1530 6, /* cost of moving SSE register */
1531 {12, 12, 12}, /* cost of loading SSE registers
1532 in SImode, DImode and TImode */
1533 {12, 12, 12}, /* cost of storing SSE registers
1534 in SImode, DImode and TImode */
1535 8, /* MMX or SSE register to integer */
1536 8, /* size of l1 cache. */
1537 1024, /* size of l2 cache. */
1538 128, /* size of prefetch block */
1539 8, /* number of parallel prefetches */
1540 1, /* Branch cost */
1541 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1542 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1543 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1544 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1545 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1546 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1547 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1548 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1549 {100000, unrolled_loop}, {-1, libcall}}}},
1550 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1552 {libcall, {{24, loop}, {64, unrolled_loop},
1553 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1554 1, /* scalar_stmt_cost. */
1555 1, /* scalar load_cost. */
1556 1, /* scalar_store_cost. */
1557 1, /* vec_stmt_cost. */
1558 1, /* vec_to_scalar_cost. */
1559 1, /* scalar_to_vec_cost. */
1560 1, /* vec_align_load_cost. */
1561 2, /* vec_unalign_load_cost. */
1562 1, /* vec_store_cost. */
1563 3, /* cond_taken_branch_cost. */
1564 1, /* cond_not_taken_branch_cost. */
1568 struct processor_costs atom_cost = {
1569 COSTS_N_INSNS (1), /* cost of an add instruction */
1570 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1571 COSTS_N_INSNS (1), /* variable shift costs */
1572 COSTS_N_INSNS (1), /* constant shift costs */
1573 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1574 COSTS_N_INSNS (4), /* HI */
1575 COSTS_N_INSNS (3), /* SI */
1576 COSTS_N_INSNS (4), /* DI */
1577 COSTS_N_INSNS (2)}, /* other */
1578 0, /* cost of multiply per each bit set */
1579 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1580 COSTS_N_INSNS (26), /* HI */
1581 COSTS_N_INSNS (42), /* SI */
1582 COSTS_N_INSNS (74), /* DI */
1583 COSTS_N_INSNS (74)}, /* other */
1584 COSTS_N_INSNS (1), /* cost of movsx */
1585 COSTS_N_INSNS (1), /* cost of movzx */
1586 8, /* "large" insn */
1587 17, /* MOVE_RATIO */
1588 2, /* cost for loading QImode using movzbl */
1589 {4, 4, 4}, /* cost of loading integer registers
1590 in QImode, HImode and SImode.
1591 Relative to reg-reg move (2). */
1592 {4, 4, 4}, /* cost of storing integer registers */
1593 4, /* cost of reg,reg fld/fst */
1594 {12, 12, 12}, /* cost of loading fp registers
1595 in SFmode, DFmode and XFmode */
1596 {6, 6, 8}, /* cost of storing fp registers
1597 in SFmode, DFmode and XFmode */
1598 2, /* cost of moving MMX register */
1599 {8, 8}, /* cost of loading MMX registers
1600 in SImode and DImode */
1601 {8, 8}, /* cost of storing MMX registers
1602 in SImode and DImode */
1603 2, /* cost of moving SSE register */
1604 {8, 8, 8}, /* cost of loading SSE registers
1605 in SImode, DImode and TImode */
1606 {8, 8, 8}, /* cost of storing SSE registers
1607 in SImode, DImode and TImode */
1608 5, /* MMX or SSE register to integer */
1609 32, /* size of l1 cache. */
1610 256, /* size of l2 cache. */
1611 64, /* size of prefetch block */
1612 6, /* number of parallel prefetches */
1613 3, /* Branch cost */
1614 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1615 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1616 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1617 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1618 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1619 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1620 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1621 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1622 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1623 {{libcall, {{8, loop}, {15, unrolled_loop},
1624 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1625 {libcall, {{24, loop}, {32, unrolled_loop},
1626 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1627 1, /* scalar_stmt_cost. */
1628 1, /* scalar load_cost. */
1629 1, /* scalar_store_cost. */
1630 1, /* vec_stmt_cost. */
1631 1, /* vec_to_scalar_cost. */
1632 1, /* scalar_to_vec_cost. */
1633 1, /* vec_align_load_cost. */
1634 2, /* vec_unalign_load_cost. */
1635 1, /* vec_store_cost. */
1636 3, /* cond_taken_branch_cost. */
1637 1, /* cond_not_taken_branch_cost. */
1640 /* Generic64 should produce code tuned for Nocona and K8. */
1642 struct processor_costs generic64_cost = {
1643 COSTS_N_INSNS (1), /* cost of an add instruction */
1644 /* On all chips taken into consideration lea is 2 cycles and more. With
1645 this cost however our current implementation of synth_mult results in
1646 use of unnecessary temporary registers causing regression on several
1647 SPECfp benchmarks. */
1648 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1649 COSTS_N_INSNS (1), /* variable shift costs */
1650 COSTS_N_INSNS (1), /* constant shift costs */
1651 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1652 COSTS_N_INSNS (4), /* HI */
1653 COSTS_N_INSNS (3), /* SI */
1654 COSTS_N_INSNS (4), /* DI */
1655 COSTS_N_INSNS (2)}, /* other */
1656 0, /* cost of multiply per each bit set */
1657 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1658 COSTS_N_INSNS (26), /* HI */
1659 COSTS_N_INSNS (42), /* SI */
1660 COSTS_N_INSNS (74), /* DI */
1661 COSTS_N_INSNS (74)}, /* other */
1662 COSTS_N_INSNS (1), /* cost of movsx */
1663 COSTS_N_INSNS (1), /* cost of movzx */
1664 8, /* "large" insn */
1665 17, /* MOVE_RATIO */
1666 4, /* cost for loading QImode using movzbl */
1667 {4, 4, 4}, /* cost of loading integer registers
1668 in QImode, HImode and SImode.
1669 Relative to reg-reg move (2). */
1670 {4, 4, 4}, /* cost of storing integer registers */
1671 4, /* cost of reg,reg fld/fst */
1672 {12, 12, 12}, /* cost of loading fp registers
1673 in SFmode, DFmode and XFmode */
1674 {6, 6, 8}, /* cost of storing fp registers
1675 in SFmode, DFmode and XFmode */
1676 2, /* cost of moving MMX register */
1677 {8, 8}, /* cost of loading MMX registers
1678 in SImode and DImode */
1679 {8, 8}, /* cost of storing MMX registers
1680 in SImode and DImode */
1681 2, /* cost of moving SSE register */
1682 {8, 8, 8}, /* cost of loading SSE registers
1683 in SImode, DImode and TImode */
1684 {8, 8, 8}, /* cost of storing SSE registers
1685 in SImode, DImode and TImode */
1686 5, /* MMX or SSE register to integer */
1687 32, /* size of l1 cache. */
1688 512, /* size of l2 cache. */
1689 64, /* size of prefetch block */
1690 6, /* number of parallel prefetches */
1691 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1692 value is increased to perhaps more appropriate value of 5. */
1693 3, /* Branch cost */
1694 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1695 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1696 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1697 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1698 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1699 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1700 {DUMMY_STRINGOP_ALGS,
1701 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1702 {DUMMY_STRINGOP_ALGS,
1703 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1704 1, /* scalar_stmt_cost. */
1705 1, /* scalar load_cost. */
1706 1, /* scalar_store_cost. */
1707 1, /* vec_stmt_cost. */
1708 1, /* vec_to_scalar_cost. */
1709 1, /* scalar_to_vec_cost. */
1710 1, /* vec_align_load_cost. */
1711 2, /* vec_unalign_load_cost. */
1712 1, /* vec_store_cost. */
1713 3, /* cond_taken_branch_cost. */
1714 1, /* cond_not_taken_branch_cost. */
1717 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1720 struct processor_costs generic32_cost = {
1721 COSTS_N_INSNS (1), /* cost of an add instruction */
1722 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1723 COSTS_N_INSNS (1), /* variable shift costs */
1724 COSTS_N_INSNS (1), /* constant shift costs */
1725 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1726 COSTS_N_INSNS (4), /* HI */
1727 COSTS_N_INSNS (3), /* SI */
1728 COSTS_N_INSNS (4), /* DI */
1729 COSTS_N_INSNS (2)}, /* other */
1730 0, /* cost of multiply per each bit set */
1731 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1732 COSTS_N_INSNS (26), /* HI */
1733 COSTS_N_INSNS (42), /* SI */
1734 COSTS_N_INSNS (74), /* DI */
1735 COSTS_N_INSNS (74)}, /* other */
1736 COSTS_N_INSNS (1), /* cost of movsx */
1737 COSTS_N_INSNS (1), /* cost of movzx */
1738 8, /* "large" insn */
1739 17, /* MOVE_RATIO */
1740 4, /* cost for loading QImode using movzbl */
1741 {4, 4, 4}, /* cost of loading integer registers
1742 in QImode, HImode and SImode.
1743 Relative to reg-reg move (2). */
1744 {4, 4, 4}, /* cost of storing integer registers */
1745 4, /* cost of reg,reg fld/fst */
1746 {12, 12, 12}, /* cost of loading fp registers
1747 in SFmode, DFmode and XFmode */
1748 {6, 6, 8}, /* cost of storing fp registers
1749 in SFmode, DFmode and XFmode */
1750 2, /* cost of moving MMX register */
1751 {8, 8}, /* cost of loading MMX registers
1752 in SImode and DImode */
1753 {8, 8}, /* cost of storing MMX registers
1754 in SImode and DImode */
1755 2, /* cost of moving SSE register */
1756 {8, 8, 8}, /* cost of loading SSE registers
1757 in SImode, DImode and TImode */
1758 {8, 8, 8}, /* cost of storing SSE registers
1759 in SImode, DImode and TImode */
1760 5, /* MMX or SSE register to integer */
1761 32, /* size of l1 cache. */
1762 256, /* size of l2 cache. */
1763 64, /* size of prefetch block */
1764 6, /* number of parallel prefetches */
1765 3, /* Branch cost */
1766 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1767 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1768 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1769 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1770 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1771 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1772 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1773 DUMMY_STRINGOP_ALGS},
1774 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1775 DUMMY_STRINGOP_ALGS},
1776 1, /* scalar_stmt_cost. */
1777 1, /* scalar load_cost. */
1778 1, /* scalar_store_cost. */
1779 1, /* vec_stmt_cost. */
1780 1, /* vec_to_scalar_cost. */
1781 1, /* scalar_to_vec_cost. */
1782 1, /* vec_align_load_cost. */
1783 2, /* vec_unalign_load_cost. */
1784 1, /* vec_store_cost. */
1785 3, /* cond_taken_branch_cost. */
1786 1, /* cond_not_taken_branch_cost. */
1789 const struct processor_costs *ix86_cost = &pentium_cost;
1791 /* Processor feature/optimization bitmasks. */
1792 #define m_386 (1<<PROCESSOR_I386)
1793 #define m_486 (1<<PROCESSOR_I486)
1794 #define m_PENT (1<<PROCESSOR_PENTIUM)
1795 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1796 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1797 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1798 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1799 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1800 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1801 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1802 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1803 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1804 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1805 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1806 #define m_ATOM (1<<PROCESSOR_ATOM)
1808 #define m_GEODE (1<<PROCESSOR_GEODE)
1809 #define m_K6 (1<<PROCESSOR_K6)
1810 #define m_K6_GEODE (m_K6 | m_GEODE)
1811 #define m_K8 (1<<PROCESSOR_K8)
1812 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1813 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1814 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1815 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1816 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1817 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1 | m_BTVER1)
1819 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1820 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1822 /* Generic instruction choice should be common subset of supported CPUs
1823 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1824 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1826 /* Feature tests against the various tunings. */
1827 unsigned char ix86_tune_features[X86_TUNE_LAST];
1829 /* Feature tests against the various tunings used to create ix86_tune_features
1830 based on the processor mask. */
1831 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1832 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1833 negatively, so enabling for Generic64 seems like good code size
1834 tradeoff. We can't enable it for 32bit generic because it does not
1835 work well with PPro base chips. */
1836 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2I7_64 | m_GENERIC64,
1838 /* X86_TUNE_PUSH_MEMORY */
1839 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1840 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1842 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1845 /* X86_TUNE_UNROLL_STRLEN */
1846 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1847 | m_CORE2I7 | m_GENERIC,
1849 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1850 on simulation result. But after P4 was made, no performance benefit
1851 was observed with branch hints. It also increases the code size.
1852 As a result, icc never generates branch hints. */
1855 /* X86_TUNE_DOUBLE_WITH_ADD */
1858 /* X86_TUNE_USE_SAHF */
1859 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_BTVER1
1860 | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1862 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1863 partial dependencies. */
1864 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1865 | m_CORE2I7 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1867 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1868 register stalls on Generic32 compilation setting as well. However
1869 in current implementation the partial register stalls are not eliminated
1870 very well - they can be introduced via subregs synthesized by combine
1871 and can happen in caller/callee saving sequences. Because this option
1872 pays back little on PPro based chips and is in conflict with partial reg
1873 dependencies used by Athlon/P4 based chips, it is better to leave it off
1874 for generic32 for now. */
1877 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1878 m_CORE2I7 | m_GENERIC,
1880 /* X86_TUNE_USE_HIMODE_FIOP */
1881 m_386 | m_486 | m_K6_GEODE,
1883 /* X86_TUNE_USE_SIMODE_FIOP */
1884 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2I7 | m_GENERIC),
1886 /* X86_TUNE_USE_MOV0 */
1889 /* X86_TUNE_USE_CLTD */
1890 ~(m_PENT | m_ATOM | m_K6 | m_CORE2I7 | m_GENERIC),
1892 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1895 /* X86_TUNE_SPLIT_LONG_MOVES */
1898 /* X86_TUNE_READ_MODIFY_WRITE */
1901 /* X86_TUNE_READ_MODIFY */
1904 /* X86_TUNE_PROMOTE_QIMODE */
1905 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1906 | m_CORE2I7 | m_GENERIC /* | m_PENT4 ? */,
1908 /* X86_TUNE_FAST_PREFIX */
1909 ~(m_PENT | m_486 | m_386),
1911 /* X86_TUNE_SINGLE_STRINGOP */
1912 m_386 | m_PENT4 | m_NOCONA,
1914 /* X86_TUNE_QIMODE_MATH */
1917 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1918 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1919 might be considered for Generic32 if our scheme for avoiding partial
1920 stalls was more effective. */
1923 /* X86_TUNE_PROMOTE_QI_REGS */
1926 /* X86_TUNE_PROMOTE_HI_REGS */
1929 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1930 over esp addition. */
1931 m_386 | m_486 | m_PENT | m_PPRO,
1933 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1934 over esp addition. */
1937 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1938 over esp subtraction. */
1939 m_386 | m_486 | m_PENT | m_K6_GEODE,
1941 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1942 over esp subtraction. */
1943 m_PENT | m_K6_GEODE,
1945 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1946 for DFmode copies */
1947 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
1948 | m_GENERIC | m_GEODE),
1950 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1951 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1953 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1954 conflict here in between PPro/Pentium4 based chips that thread 128bit
1955 SSE registers as single units versus K8 based chips that divide SSE
1956 registers to two 64bit halves. This knob promotes all store destinations
1957 to be 128bit to allow register renaming on 128bit SSE units, but usually
1958 results in one extra microop on 64bit SSE units. Experimental results
1959 shows that disabling this option on P4 brings over 20% SPECfp regression,
1960 while enabling it on K8 brings roughly 2.4% regression that can be partly
1961 masked by careful scheduling of moves. */
1962 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 | m_GENERIC
1963 | m_AMDFAM10 | m_BDVER1,
1965 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1966 m_AMDFAM10 | m_BDVER1 | m_BTVER1 | m_COREI7,
1968 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1969 m_BDVER1 | m_COREI7,
1971 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1974 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1975 are resolved on SSE register parts instead of whole registers, so we may
1976 maintain just lower part of scalar values in proper format leaving the
1977 upper part undefined. */
1980 /* X86_TUNE_SSE_TYPELESS_STORES */
1983 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1984 m_PPRO | m_PENT4 | m_NOCONA,
1986 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1987 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1989 /* X86_TUNE_PROLOGUE_USING_MOVE */
1990 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1992 /* X86_TUNE_EPILOGUE_USING_MOVE */
1993 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1995 /* X86_TUNE_SHIFT1 */
1998 /* X86_TUNE_USE_FFREEP */
2001 /* X86_TUNE_INTER_UNIT_MOVES */
2002 ~(m_AMD_MULTIPLE | m_GENERIC),
2004 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2005 ~(m_AMDFAM10 | m_BDVER1),
2007 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2008 than 4 branch instructions in the 16 byte window. */
2009 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2I7
2012 /* X86_TUNE_SCHEDULE */
2013 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2I7
2016 /* X86_TUNE_USE_BT */
2017 m_AMD_MULTIPLE | m_ATOM | m_CORE2I7 | m_GENERIC,
2019 /* X86_TUNE_USE_INCDEC */
2020 ~(m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC | m_ATOM),
2022 /* X86_TUNE_PAD_RETURNS */
2023 m_AMD_MULTIPLE | m_CORE2I7 | m_GENERIC,
2025 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2028 /* X86_TUNE_EXT_80387_CONSTANTS */
2029 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
2030 | m_CORE2I7 | m_GENERIC,
2032 /* X86_TUNE_SHORTEN_X87_SSE */
2035 /* X86_TUNE_AVOID_VECTOR_DECODE */
2036 m_K8 | m_CORE2I7_64 | m_GENERIC64,
2038 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2039 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2042 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2043 vector path on AMD machines. */
2044 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2046 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2048 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2050 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2054 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2055 but one byte longer. */
2058 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2059 operand that cannot be represented using a modRM byte. The XOR
2060 replacement is long decoded, so this split helps here as well. */
2063 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2065 m_AMDFAM10 | m_CORE2I7 | m_GENERIC,
2067 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2068 from integer to FP. */
2071 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2072 with a subsequent conditional jump instruction into a single
2073 compare-and-branch uop. */
2076 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2077 will impact LEA instruction selection. */
2080 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2084 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2085 at -O3. For the moment, the prefetching seems badly tuned for Intel
2087 m_K6_GEODE | m_AMD_MULTIPLE,
2089 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2090 the auto-vectorizer. */
2094 /* Feature tests against the various architecture variations. */
2095 unsigned char ix86_arch_features[X86_ARCH_LAST];
2097 /* Feature tests against the various architecture variations, used to create
2098 ix86_arch_features based on the processor mask. */
2099 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2100 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2101 ~(m_386 | m_486 | m_PENT | m_K6),
2103 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2106 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2109 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2112 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2116 static const unsigned int x86_accumulate_outgoing_args
2117 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
2120 static const unsigned int x86_arch_always_fancy_math_387
2121 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
2122 | m_NOCONA | m_CORE2I7 | m_GENERIC;
2124 static const unsigned int x86_avx256_split_unaligned_load
2125 = m_COREI7 | m_GENERIC;
2127 static const unsigned int x86_avx256_split_unaligned_store
2128 = m_COREI7 | m_BDVER1 | m_GENERIC;
2130 /* In case the average insn count for single function invocation is
2131 lower than this constant, emit fast (but longer) prologue and
2133 #define FAST_PROLOGUE_INSN_COUNT 20
2135 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2136 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2137 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2138 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2140 /* Array of the smallest class containing reg number REGNO, indexed by
2141 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2143 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2145 /* ax, dx, cx, bx */
2146 AREG, DREG, CREG, BREG,
2147 /* si, di, bp, sp */
2148 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2150 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2151 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2154 /* flags, fpsr, fpcr, frame */
2155 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2157 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2160 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2163 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2164 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2165 /* SSE REX registers */
2166 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2170 /* The "default" register map used in 32bit mode. */
2172 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2174 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2175 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2176 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2177 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2178 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2179 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2180 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2183 /* The "default" register map used in 64bit mode. */
2185 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2187 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2188 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2189 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2190 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2191 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2192 8,9,10,11,12,13,14,15, /* extended integer registers */
2193 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2196 /* Define the register numbers to be used in Dwarf debugging information.
2197 The SVR4 reference port C compiler uses the following register numbers
2198 in its Dwarf output code:
2199 0 for %eax (gcc regno = 0)
2200 1 for %ecx (gcc regno = 2)
2201 2 for %edx (gcc regno = 1)
2202 3 for %ebx (gcc regno = 3)
2203 4 for %esp (gcc regno = 7)
2204 5 for %ebp (gcc regno = 6)
2205 6 for %esi (gcc regno = 4)
2206 7 for %edi (gcc regno = 5)
2207 The following three DWARF register numbers are never generated by
2208 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2209 believes these numbers have these meanings.
2210 8 for %eip (no gcc equivalent)
2211 9 for %eflags (gcc regno = 17)
2212 10 for %trapno (no gcc equivalent)
2213 It is not at all clear how we should number the FP stack registers
2214 for the x86 architecture. If the version of SDB on x86/svr4 were
2215 a bit less brain dead with respect to floating-point then we would
2216 have a precedent to follow with respect to DWARF register numbers
2217 for x86 FP registers, but the SDB on x86/svr4 is so completely
2218 broken with respect to FP registers that it is hardly worth thinking
2219 of it as something to strive for compatibility with.
2220 The version of x86/svr4 SDB I have at the moment does (partially)
2221 seem to believe that DWARF register number 11 is associated with
2222 the x86 register %st(0), but that's about all. Higher DWARF
2223 register numbers don't seem to be associated with anything in
2224 particular, and even for DWARF regno 11, SDB only seems to under-
2225 stand that it should say that a variable lives in %st(0) (when
2226 asked via an `=' command) if we said it was in DWARF regno 11,
2227 but SDB still prints garbage when asked for the value of the
2228 variable in question (via a `/' command).
2229 (Also note that the labels SDB prints for various FP stack regs
2230 when doing an `x' command are all wrong.)
2231 Note that these problems generally don't affect the native SVR4
2232 C compiler because it doesn't allow the use of -O with -g and
2233 because when it is *not* optimizing, it allocates a memory
2234 location for each floating-point variable, and the memory
2235 location is what gets described in the DWARF AT_location
2236 attribute for the variable in question.
2237 Regardless of the severe mental illness of the x86/svr4 SDB, we
2238 do something sensible here and we use the following DWARF
2239 register numbers. Note that these are all stack-top-relative
2241 11 for %st(0) (gcc regno = 8)
2242 12 for %st(1) (gcc regno = 9)
2243 13 for %st(2) (gcc regno = 10)
2244 14 for %st(3) (gcc regno = 11)
2245 15 for %st(4) (gcc regno = 12)
2246 16 for %st(5) (gcc regno = 13)
2247 17 for %st(6) (gcc regno = 14)
2248 18 for %st(7) (gcc regno = 15)
2250 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2252 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2253 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2254 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2255 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2256 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2257 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2258 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2261 /* Define parameter passing and return registers. */
2263 static int const x86_64_int_parameter_registers[6] =
2265 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2268 static int const x86_64_ms_abi_int_parameter_registers[4] =
2270 CX_REG, DX_REG, R8_REG, R9_REG
2273 static int const x86_64_int_return_registers[4] =
2275 AX_REG, DX_REG, DI_REG, SI_REG
2278 /* Define the structure for the machine field in struct function. */
2280 struct GTY(()) stack_local_entry {
2281 unsigned short mode;
2284 struct stack_local_entry *next;
2287 /* Structure describing stack frame layout.
2288 Stack grows downward:
2294 saved static chain if ix86_static_chain_on_stack
2296 saved frame pointer if frame_pointer_needed
2297 <- HARD_FRAME_POINTER
2303 <- sse_regs_save_offset
2306 [va_arg registers] |
2310 [padding2] | = to_allocate
2319 int outgoing_arguments_size;
2320 HOST_WIDE_INT frame;
2322 /* The offsets relative to ARG_POINTER. */
2323 HOST_WIDE_INT frame_pointer_offset;
2324 HOST_WIDE_INT hard_frame_pointer_offset;
2325 HOST_WIDE_INT stack_pointer_offset;
2326 HOST_WIDE_INT hfp_save_offset;
2327 HOST_WIDE_INT reg_save_offset;
2328 HOST_WIDE_INT sse_reg_save_offset;
2330 /* When save_regs_using_mov is set, emit prologue using
2331 move instead of push instructions. */
2332 bool save_regs_using_mov;
2335 /* Which cpu are we scheduling for. */
2336 enum attr_cpu ix86_schedule;
2338 /* Which cpu are we optimizing for. */
2339 enum processor_type ix86_tune;
2341 /* Which instruction set architecture to use. */
2342 enum processor_type ix86_arch;
2344 /* true if sse prefetch instruction is not NOOP. */
2345 int x86_prefetch_sse;
2347 /* -mstackrealign option */
2348 static const char ix86_force_align_arg_pointer_string[]
2349 = "force_align_arg_pointer";
2351 static rtx (*ix86_gen_leave) (void);
2352 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2353 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2354 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2355 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2356 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2357 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2358 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2359 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2360 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2362 /* Preferred alignment for stack boundary in bits. */
2363 unsigned int ix86_preferred_stack_boundary;
2365 /* Alignment for incoming stack boundary in bits specified at
2367 static unsigned int ix86_user_incoming_stack_boundary;
2369 /* Default alignment for incoming stack boundary in bits. */
2370 static unsigned int ix86_default_incoming_stack_boundary;
2372 /* Alignment for incoming stack boundary in bits. */
2373 unsigned int ix86_incoming_stack_boundary;
2375 /* Calling abi specific va_list type nodes. */
2376 static GTY(()) tree sysv_va_list_type_node;
2377 static GTY(()) tree ms_va_list_type_node;
2379 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2380 char internal_label_prefix[16];
2381 int internal_label_prefix_len;
2383 /* Fence to use after loop using movnt. */
2386 /* Register class used for passing given 64bit part of the argument.
2387 These represent classes as documented by the PS ABI, with the exception
2388 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2389 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2391 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2392 whenever possible (upper half does contain padding). */
2393 enum x86_64_reg_class
2396 X86_64_INTEGER_CLASS,
2397 X86_64_INTEGERSI_CLASS,
2404 X86_64_COMPLEX_X87_CLASS,
2408 #define MAX_CLASSES 4
2410 /* Table of constants used by fldpi, fldln2, etc.... */
2411 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2412 static bool ext_80387_constants_init = 0;
2415 static struct machine_function * ix86_init_machine_status (void);
2416 static rtx ix86_function_value (const_tree, const_tree, bool);
2417 static bool ix86_function_value_regno_p (const unsigned int);
2418 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2420 static rtx ix86_static_chain (const_tree, bool);
2421 static int ix86_function_regparm (const_tree, const_tree);
2422 static void ix86_compute_frame_layout (struct ix86_frame *);
2423 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2425 static void ix86_add_new_builtins (int);
2426 static rtx ix86_expand_vec_perm_builtin (tree);
2427 static tree ix86_canonical_va_list_type (tree);
2428 static void predict_jump (int);
2429 static unsigned int split_stack_prologue_scratch_regno (void);
2430 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2432 enum ix86_function_specific_strings
2434 IX86_FUNCTION_SPECIFIC_ARCH,
2435 IX86_FUNCTION_SPECIFIC_TUNE,
2436 IX86_FUNCTION_SPECIFIC_MAX
2439 static char *ix86_target_string (int, int, const char *, const char *,
2440 enum fpmath_unit, bool);
2441 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2442 static void ix86_function_specific_save (struct cl_target_option *);
2443 static void ix86_function_specific_restore (struct cl_target_option *);
2444 static void ix86_function_specific_print (FILE *, int,
2445 struct cl_target_option *);
2446 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2447 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2448 struct gcc_options *);
2449 static bool ix86_can_inline_p (tree, tree);
2450 static void ix86_set_current_function (tree);
2451 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2453 static enum calling_abi ix86_function_abi (const_tree);
2456 #ifndef SUBTARGET32_DEFAULT_CPU
2457 #define SUBTARGET32_DEFAULT_CPU "i386"
2460 /* The svr4 ABI for the i386 says that records and unions are returned
2462 #ifndef DEFAULT_PCC_STRUCT_RETURN
2463 #define DEFAULT_PCC_STRUCT_RETURN 1
2466 /* Whether -mtune= or -march= were specified */
2467 static int ix86_tune_defaulted;
2468 static int ix86_arch_specified;
2470 /* Vectorization library interface and handlers. */
2471 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2473 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2474 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2476 /* Processor target table, indexed by processor number */
2479 const struct processor_costs *cost; /* Processor costs */
2480 const int align_loop; /* Default alignments. */
2481 const int align_loop_max_skip;
2482 const int align_jump;
2483 const int align_jump_max_skip;
2484 const int align_func;
2487 static const struct ptt processor_target_table[PROCESSOR_max] =
2489 {&i386_cost, 4, 3, 4, 3, 4},
2490 {&i486_cost, 16, 15, 16, 15, 16},
2491 {&pentium_cost, 16, 7, 16, 7, 16},
2492 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2493 {&geode_cost, 0, 0, 0, 0, 0},
2494 {&k6_cost, 32, 7, 32, 7, 32},
2495 {&athlon_cost, 16, 7, 16, 7, 16},
2496 {&pentium4_cost, 0, 0, 0, 0, 0},
2497 {&k8_cost, 16, 7, 16, 7, 16},
2498 {&nocona_cost, 0, 0, 0, 0, 0},
2499 /* Core 2 32-bit. */
2500 {&generic32_cost, 16, 10, 16, 10, 16},
2501 /* Core 2 64-bit. */
2502 {&generic64_cost, 16, 10, 16, 10, 16},
2503 /* Core i7 32-bit. */
2504 {&generic32_cost, 16, 10, 16, 10, 16},
2505 /* Core i7 64-bit. */
2506 {&generic64_cost, 16, 10, 16, 10, 16},
2507 {&generic32_cost, 16, 7, 16, 7, 16},
2508 {&generic64_cost, 16, 10, 16, 10, 16},
2509 {&amdfam10_cost, 32, 24, 32, 7, 32},
2510 {&bdver1_cost, 32, 24, 32, 7, 32},
2511 {&btver1_cost, 32, 24, 32, 7, 32},
2512 {&atom_cost, 16, 7, 16, 7, 16}
2515 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2544 /* Return true if a red-zone is in use. */
2547 ix86_using_red_zone (void)
2549 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2552 /* Return a string that documents the current -m options. The caller is
2553 responsible for freeing the string. */
2556 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2557 enum fpmath_unit fpmath, bool add_nl_p)
2559 struct ix86_target_opts
2561 const char *option; /* option string */
2562 int mask; /* isa mask options */
2565 /* This table is ordered so that options like -msse4.2 that imply
2566 preceding options while match those first. */
2567 static struct ix86_target_opts isa_opts[] =
2569 { "-m64", OPTION_MASK_ISA_64BIT },
2570 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2571 { "-mfma", OPTION_MASK_ISA_FMA },
2572 { "-mxop", OPTION_MASK_ISA_XOP },
2573 { "-mlwp", OPTION_MASK_ISA_LWP },
2574 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2575 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2576 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2577 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2578 { "-msse3", OPTION_MASK_ISA_SSE3 },
2579 { "-msse2", OPTION_MASK_ISA_SSE2 },
2580 { "-msse", OPTION_MASK_ISA_SSE },
2581 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2582 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2583 { "-mmmx", OPTION_MASK_ISA_MMX },
2584 { "-mabm", OPTION_MASK_ISA_ABM },
2585 { "-mbmi", OPTION_MASK_ISA_BMI },
2586 { "-mtbm", OPTION_MASK_ISA_TBM },
2587 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2588 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2589 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2590 { "-maes", OPTION_MASK_ISA_AES },
2591 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2592 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2593 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2594 { "-mf16c", OPTION_MASK_ISA_F16C },
2598 static struct ix86_target_opts flag_opts[] =
2600 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2601 { "-m80387", MASK_80387 },
2602 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2603 { "-malign-double", MASK_ALIGN_DOUBLE },
2604 { "-mcld", MASK_CLD },
2605 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2606 { "-mieee-fp", MASK_IEEE_FP },
2607 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2608 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2609 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2610 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2611 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2612 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2613 { "-mno-red-zone", MASK_NO_RED_ZONE },
2614 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2615 { "-mrecip", MASK_RECIP },
2616 { "-mrtd", MASK_RTD },
2617 { "-msseregparm", MASK_SSEREGPARM },
2618 { "-mstack-arg-probe", MASK_STACK_PROBE },
2619 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2620 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2621 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2622 { "-mvzeroupper", MASK_VZEROUPPER },
2623 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2624 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2625 { "-mprefer-avx128", MASK_PREFER_AVX128},
2628 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2631 char target_other[40];
2640 memset (opts, '\0', sizeof (opts));
2642 /* Add -march= option. */
2645 opts[num][0] = "-march=";
2646 opts[num++][1] = arch;
2649 /* Add -mtune= option. */
2652 opts[num][0] = "-mtune=";
2653 opts[num++][1] = tune;
2656 /* Pick out the options in isa options. */
2657 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2659 if ((isa & isa_opts[i].mask) != 0)
2661 opts[num++][0] = isa_opts[i].option;
2662 isa &= ~ isa_opts[i].mask;
2666 if (isa && add_nl_p)
2668 opts[num++][0] = isa_other;
2669 sprintf (isa_other, "(other isa: %#x)", isa);
2672 /* Add flag options. */
2673 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2675 if ((flags & flag_opts[i].mask) != 0)
2677 opts[num++][0] = flag_opts[i].option;
2678 flags &= ~ flag_opts[i].mask;
2682 if (flags && add_nl_p)
2684 opts[num++][0] = target_other;
2685 sprintf (target_other, "(other flags: %#x)", flags);
2688 /* Add -fpmath= option. */
2691 opts[num][0] = "-mfpmath=";
2692 switch ((int) fpmath)
2695 opts[num++][1] = "387";
2699 opts[num++][1] = "sse";
2702 case FPMATH_387 | FPMATH_SSE:
2703 opts[num++][1] = "sse+387";
2715 gcc_assert (num < ARRAY_SIZE (opts));
2717 /* Size the string. */
2719 sep_len = (add_nl_p) ? 3 : 1;
2720 for (i = 0; i < num; i++)
2723 for (j = 0; j < 2; j++)
2725 len += strlen (opts[i][j]);
2728 /* Build the string. */
2729 ret = ptr = (char *) xmalloc (len);
2732 for (i = 0; i < num; i++)
2736 for (j = 0; j < 2; j++)
2737 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2744 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2752 for (j = 0; j < 2; j++)
2755 memcpy (ptr, opts[i][j], len2[j]);
2757 line_len += len2[j];
2762 gcc_assert (ret + len >= ptr);
2767 /* Return true, if profiling code should be emitted before
2768 prologue. Otherwise it returns false.
2769 Note: For x86 with "hotfix" it is sorried. */
2771 ix86_profile_before_prologue (void)
2773 return flag_fentry != 0;
2776 /* Function that is callable from the debugger to print the current
2779 ix86_debug_options (void)
2781 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2782 ix86_arch_string, ix86_tune_string,
2787 fprintf (stderr, "%s\n\n", opts);
2791 fputs ("<no options>\n\n", stderr);
2796 /* Override various settings based on options. If MAIN_ARGS_P, the
2797 options are from the command line, otherwise they are from
2801 ix86_option_override_internal (bool main_args_p)
2804 unsigned int ix86_arch_mask, ix86_tune_mask;
2805 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2816 PTA_PREFETCH_SSE = 1 << 4,
2818 PTA_3DNOW_A = 1 << 6,
2822 PTA_POPCNT = 1 << 10,
2824 PTA_SSE4A = 1 << 12,
2825 PTA_NO_SAHF = 1 << 13,
2826 PTA_SSE4_1 = 1 << 14,
2827 PTA_SSE4_2 = 1 << 15,
2829 PTA_PCLMUL = 1 << 17,
2832 PTA_MOVBE = 1 << 20,
2836 PTA_FSGSBASE = 1 << 24,
2837 PTA_RDRND = 1 << 25,
2841 /* if this reaches 32, need to widen struct pta flags below */
2846 const char *const name; /* processor name or nickname. */
2847 const enum processor_type processor;
2848 const enum attr_cpu schedule;
2849 const unsigned /*enum pta_flags*/ flags;
2851 const processor_alias_table[] =
2853 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2854 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2855 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2856 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2857 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2858 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2859 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2860 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2861 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2862 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2863 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2864 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2865 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2867 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2869 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2870 PTA_MMX | PTA_SSE | PTA_SSE2},
2871 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2872 PTA_MMX |PTA_SSE | PTA_SSE2},
2873 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2874 PTA_MMX | PTA_SSE | PTA_SSE2},
2875 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2876 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2877 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2878 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2879 | PTA_CX16 | PTA_NO_SAHF},
2880 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
2881 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2882 | PTA_SSSE3 | PTA_CX16},
2883 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
2884 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2885 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
2886 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
2887 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2888 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
2889 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
2890 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2891 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2892 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2893 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2894 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2895 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2896 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2897 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2898 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2899 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2900 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2901 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2902 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2903 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2904 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2905 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2906 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2907 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2908 {"x86-64", PROCESSOR_K8, CPU_K8,
2909 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2910 {"k8", PROCESSOR_K8, CPU_K8,
2911 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2912 | PTA_SSE2 | PTA_NO_SAHF},
2913 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2914 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2915 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2916 {"opteron", PROCESSOR_K8, CPU_K8,
2917 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2918 | PTA_SSE2 | PTA_NO_SAHF},
2919 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2920 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2921 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2922 {"athlon64", PROCESSOR_K8, CPU_K8,
2923 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2924 | PTA_SSE2 | PTA_NO_SAHF},
2925 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2926 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2927 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2928 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2929 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2930 | PTA_SSE2 | PTA_NO_SAHF},
2931 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2932 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2933 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2934 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2935 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2936 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2937 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2938 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2939 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
2940 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
2941 | PTA_XOP | PTA_LWP},
2942 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
2943 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2944 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
2945 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2946 0 /* flags are only used for -march switch. */ },
2947 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2948 PTA_64BIT /* flags are only used for -march switch. */ },
2951 int const pta_size = ARRAY_SIZE (processor_alias_table);
2953 /* Set up prefix/suffix so the error messages refer to either the command
2954 line argument, or the attribute(target). */
2963 prefix = "option(\"";
2968 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2969 SUBTARGET_OVERRIDE_OPTIONS;
2972 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2973 SUBSUBTARGET_OVERRIDE_OPTIONS;
2977 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
2979 /* -fPIC is the default for x86_64. */
2980 if (TARGET_MACHO && TARGET_64BIT)
2983 /* Need to check -mtune=generic first. */
2984 if (ix86_tune_string)
2986 if (!strcmp (ix86_tune_string, "generic")
2987 || !strcmp (ix86_tune_string, "i686")
2988 /* As special support for cross compilers we read -mtune=native
2989 as -mtune=generic. With native compilers we won't see the
2990 -mtune=native, as it was changed by the driver. */
2991 || !strcmp (ix86_tune_string, "native"))
2994 ix86_tune_string = "generic64";
2996 ix86_tune_string = "generic32";
2998 /* If this call is for setting the option attribute, allow the
2999 generic32/generic64 that was previously set. */
3000 else if (!main_args_p
3001 && (!strcmp (ix86_tune_string, "generic32")
3002 || !strcmp (ix86_tune_string, "generic64")))
3004 else if (!strncmp (ix86_tune_string, "generic", 7))
3005 error ("bad value (%s) for %stune=%s %s",
3006 ix86_tune_string, prefix, suffix, sw);
3007 else if (!strcmp (ix86_tune_string, "x86-64"))
3008 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3009 "%stune=k8%s or %stune=generic%s instead as appropriate",
3010 prefix, suffix, prefix, suffix, prefix, suffix);
3014 if (ix86_arch_string)
3015 ix86_tune_string = ix86_arch_string;
3016 if (!ix86_tune_string)
3018 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3019 ix86_tune_defaulted = 1;
3022 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3023 need to use a sensible tune option. */
3024 if (!strcmp (ix86_tune_string, "generic")
3025 || !strcmp (ix86_tune_string, "x86-64")
3026 || !strcmp (ix86_tune_string, "i686"))
3029 ix86_tune_string = "generic64";
3031 ix86_tune_string = "generic32";
3035 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3037 /* rep; movq isn't available in 32-bit code. */
3038 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3039 ix86_stringop_alg = no_stringop;
3042 if (!ix86_arch_string)
3043 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3045 ix86_arch_specified = 1;
3047 if (!global_options_set.x_ix86_abi)
3048 ix86_abi = DEFAULT_ABI;
3050 if (global_options_set.x_ix86_cmodel)
3052 switch (ix86_cmodel)
3057 ix86_cmodel = CM_SMALL_PIC;
3059 error ("code model %qs not supported in the %s bit mode",
3066 ix86_cmodel = CM_MEDIUM_PIC;
3068 error ("code model %qs not supported in the %s bit mode",
3070 else if (TARGET_X32)
3071 error ("code model %qs not supported in x32 mode",
3078 ix86_cmodel = CM_LARGE_PIC;
3080 error ("code model %qs not supported in the %s bit mode",
3082 else if (TARGET_X32)
3083 error ("code model %qs not supported in x32 mode",
3089 error ("code model %s does not support PIC mode", "32");
3091 error ("code model %qs not supported in the %s bit mode",
3098 error ("code model %s does not support PIC mode", "kernel");
3099 ix86_cmodel = CM_32;
3102 error ("code model %qs not supported in the %s bit mode",
3112 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3113 use of rip-relative addressing. This eliminates fixups that
3114 would otherwise be needed if this object is to be placed in a
3115 DLL, and is essentially just as efficient as direct addressing. */
3116 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3117 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3118 else if (TARGET_64BIT)
3119 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3121 ix86_cmodel = CM_32;
3123 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3125 error ("-masm=intel not supported in this configuration");
3126 ix86_asm_dialect = ASM_ATT;
3128 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3129 sorry ("%i-bit mode not compiled in",
3130 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3132 for (i = 0; i < pta_size; i++)
3133 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3135 ix86_schedule = processor_alias_table[i].schedule;
3136 ix86_arch = processor_alias_table[i].processor;
3137 /* Default cpu tuning to the architecture. */
3138 ix86_tune = ix86_arch;
3140 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3141 error ("CPU you selected does not support x86-64 "
3144 if (processor_alias_table[i].flags & PTA_MMX
3145 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3146 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3147 if (processor_alias_table[i].flags & PTA_3DNOW
3148 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3149 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3150 if (processor_alias_table[i].flags & PTA_3DNOW_A
3151 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3152 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3153 if (processor_alias_table[i].flags & PTA_SSE
3154 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3155 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3156 if (processor_alias_table[i].flags & PTA_SSE2
3157 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3158 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3159 if (processor_alias_table[i].flags & PTA_SSE3
3160 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3161 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3162 if (processor_alias_table[i].flags & PTA_SSSE3
3163 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3164 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3165 if (processor_alias_table[i].flags & PTA_SSE4_1
3166 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3167 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3168 if (processor_alias_table[i].flags & PTA_SSE4_2
3169 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3170 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3171 if (processor_alias_table[i].flags & PTA_AVX
3172 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3173 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3174 if (processor_alias_table[i].flags & PTA_FMA
3175 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3176 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3177 if (processor_alias_table[i].flags & PTA_SSE4A
3178 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3179 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3180 if (processor_alias_table[i].flags & PTA_FMA4
3181 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3182 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3183 if (processor_alias_table[i].flags & PTA_XOP
3184 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3185 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3186 if (processor_alias_table[i].flags & PTA_LWP
3187 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3188 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3189 if (processor_alias_table[i].flags & PTA_ABM
3190 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3191 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3192 if (processor_alias_table[i].flags & PTA_BMI
3193 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3194 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3195 if (processor_alias_table[i].flags & PTA_TBM
3196 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3197 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3198 if (processor_alias_table[i].flags & PTA_CX16
3199 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3200 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3201 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3202 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3203 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3204 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3205 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3206 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3207 if (processor_alias_table[i].flags & PTA_MOVBE
3208 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3209 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3210 if (processor_alias_table[i].flags & PTA_AES
3211 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3212 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3213 if (processor_alias_table[i].flags & PTA_PCLMUL
3214 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3215 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3216 if (processor_alias_table[i].flags & PTA_FSGSBASE
3217 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3218 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3219 if (processor_alias_table[i].flags & PTA_RDRND
3220 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3221 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3222 if (processor_alias_table[i].flags & PTA_F16C
3223 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3224 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3225 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3226 x86_prefetch_sse = true;
3231 if (!strcmp (ix86_arch_string, "generic"))
3232 error ("generic CPU can be used only for %stune=%s %s",
3233 prefix, suffix, sw);
3234 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3235 error ("bad value (%s) for %sarch=%s %s",
3236 ix86_arch_string, prefix, suffix, sw);
3238 ix86_arch_mask = 1u << ix86_arch;
3239 for (i = 0; i < X86_ARCH_LAST; ++i)
3240 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3242 for (i = 0; i < pta_size; i++)
3243 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3245 ix86_schedule = processor_alias_table[i].schedule;
3246 ix86_tune = processor_alias_table[i].processor;
3249 if (!(processor_alias_table[i].flags & PTA_64BIT))
3251 if (ix86_tune_defaulted)
3253 ix86_tune_string = "x86-64";
3254 for (i = 0; i < pta_size; i++)
3255 if (! strcmp (ix86_tune_string,
3256 processor_alias_table[i].name))
3258 ix86_schedule = processor_alias_table[i].schedule;
3259 ix86_tune = processor_alias_table[i].processor;
3262 error ("CPU you selected does not support x86-64 "
3268 /* Adjust tuning when compiling for 32-bit ABI. */
3271 case PROCESSOR_GENERIC64:
3272 ix86_tune = PROCESSOR_GENERIC32;
3273 ix86_schedule = CPU_PENTIUMPRO;
3276 case PROCESSOR_CORE2_64:
3277 ix86_tune = PROCESSOR_CORE2_32;
3280 case PROCESSOR_COREI7_64:
3281 ix86_tune = PROCESSOR_COREI7_32;
3288 /* Intel CPUs have always interpreted SSE prefetch instructions as
3289 NOPs; so, we can enable SSE prefetch instructions even when
3290 -mtune (rather than -march) points us to a processor that has them.
3291 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3292 higher processors. */
3294 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3295 x86_prefetch_sse = true;
3299 if (ix86_tune_specified && i == pta_size)
3300 error ("bad value (%s) for %stune=%s %s",
3301 ix86_tune_string, prefix, suffix, sw);
3303 ix86_tune_mask = 1u << ix86_tune;
3304 for (i = 0; i < X86_TUNE_LAST; ++i)
3305 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3307 #ifndef USE_IX86_FRAME_POINTER
3308 #define USE_IX86_FRAME_POINTER 0
3311 #ifndef USE_X86_64_FRAME_POINTER
3312 #define USE_X86_64_FRAME_POINTER 0
3315 /* Set the default values for switches whose default depends on TARGET_64BIT
3316 in case they weren't overwritten by command line options. */
3319 if (optimize > 1 && !global_options_set.x_flag_zee)
3321 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3322 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3323 if (flag_asynchronous_unwind_tables == 2)
3324 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3325 if (flag_pcc_struct_return == 2)
3326 flag_pcc_struct_return = 0;
3330 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3331 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3332 if (flag_asynchronous_unwind_tables == 2)
3333 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3334 if (flag_pcc_struct_return == 2)
3335 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3339 ix86_cost = &ix86_size_cost;
3341 ix86_cost = processor_target_table[ix86_tune].cost;
3343 /* Arrange to set up i386_stack_locals for all functions. */
3344 init_machine_status = ix86_init_machine_status;
3346 /* Validate -mregparm= value. */
3347 if (global_options_set.x_ix86_regparm)
3350 warning (0, "-mregparm is ignored in 64-bit mode");
3351 if (ix86_regparm > REGPARM_MAX)
3353 error ("-mregparm=%d is not between 0 and %d",
3354 ix86_regparm, REGPARM_MAX);
3359 ix86_regparm = REGPARM_MAX;
3361 /* Default align_* from the processor table. */
3362 if (align_loops == 0)
3364 align_loops = processor_target_table[ix86_tune].align_loop;
3365 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3367 if (align_jumps == 0)
3369 align_jumps = processor_target_table[ix86_tune].align_jump;
3370 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3372 if (align_functions == 0)
3374 align_functions = processor_target_table[ix86_tune].align_func;
3377 /* Provide default for -mbranch-cost= value. */
3378 if (!global_options_set.x_ix86_branch_cost)
3379 ix86_branch_cost = ix86_cost->branch_cost;
3383 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3385 /* Enable by default the SSE and MMX builtins. Do allow the user to
3386 explicitly disable any of these. In particular, disabling SSE and
3387 MMX for kernel code is extremely useful. */
3388 if (!ix86_arch_specified)
3390 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3391 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3394 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3398 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3400 if (!ix86_arch_specified)
3402 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3404 /* i386 ABI does not specify red zone. It still makes sense to use it
3405 when programmer takes care to stack from being destroyed. */
3406 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3407 target_flags |= MASK_NO_RED_ZONE;
3410 /* Keep nonleaf frame pointers. */
3411 if (flag_omit_frame_pointer)
3412 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3413 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3414 flag_omit_frame_pointer = 1;
3416 /* If we're doing fast math, we don't care about comparison order
3417 wrt NaNs. This lets us use a shorter comparison sequence. */
3418 if (flag_finite_math_only)
3419 target_flags &= ~MASK_IEEE_FP;
3421 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3422 since the insns won't need emulation. */
3423 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3424 target_flags &= ~MASK_NO_FANCY_MATH_387;
3426 /* Likewise, if the target doesn't have a 387, or we've specified
3427 software floating point, don't use 387 inline intrinsics. */
3429 target_flags |= MASK_NO_FANCY_MATH_387;
3431 /* Turn on MMX builtins for -msse. */
3434 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3435 x86_prefetch_sse = true;
3438 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3439 if (TARGET_SSE4_2 || TARGET_ABM)
3440 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3442 /* Validate -mpreferred-stack-boundary= value or default it to
3443 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3444 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3445 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3447 int min = (TARGET_64BIT ? 4 : 2);
3448 int max = (TARGET_SEH ? 4 : 12);
3450 if (ix86_preferred_stack_boundary_arg < min
3451 || ix86_preferred_stack_boundary_arg > max)
3454 error ("-mpreferred-stack-boundary is not supported "
3457 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3458 ix86_preferred_stack_boundary_arg, min, max);
3461 ix86_preferred_stack_boundary
3462 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3465 /* Set the default value for -mstackrealign. */
3466 if (ix86_force_align_arg_pointer == -1)
3467 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3469 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3471 /* Validate -mincoming-stack-boundary= value or default it to
3472 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3473 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3474 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3476 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3477 || ix86_incoming_stack_boundary_arg > 12)
3478 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3479 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3482 ix86_user_incoming_stack_boundary
3483 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3484 ix86_incoming_stack_boundary
3485 = ix86_user_incoming_stack_boundary;
3489 /* Accept -msseregparm only if at least SSE support is enabled. */
3490 if (TARGET_SSEREGPARM
3492 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3494 if (global_options_set.x_ix86_fpmath)
3496 if (ix86_fpmath & FPMATH_SSE)
3500 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3501 ix86_fpmath = FPMATH_387;
3503 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3505 warning (0, "387 instruction set disabled, using SSE arithmetics");
3506 ix86_fpmath = FPMATH_SSE;
3511 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3513 /* If the i387 is disabled, then do not return values in it. */
3515 target_flags &= ~MASK_FLOAT_RETURNS;
3517 /* Use external vectorized library in vectorizing intrinsics. */
3518 if (global_options_set.x_ix86_veclibabi_type)
3519 switch (ix86_veclibabi_type)
3521 case ix86_veclibabi_type_svml:
3522 ix86_veclib_handler = ix86_veclibabi_svml;
3525 case ix86_veclibabi_type_acml:
3526 ix86_veclib_handler = ix86_veclibabi_acml;
3533 if ((!USE_IX86_FRAME_POINTER
3534 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3535 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3537 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3539 /* ??? Unwind info is not correct around the CFG unless either a frame
3540 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3541 unwind info generation to be aware of the CFG and propagating states
3543 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3544 || flag_exceptions || flag_non_call_exceptions)
3545 && flag_omit_frame_pointer
3546 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3548 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3549 warning (0, "unwind tables currently require either a frame pointer "
3550 "or %saccumulate-outgoing-args%s for correctness",
3552 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3555 /* If stack probes are required, the space used for large function
3556 arguments on the stack must also be probed, so enable
3557 -maccumulate-outgoing-args so this happens in the prologue. */
3558 if (TARGET_STACK_PROBE
3559 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3561 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3562 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3563 "for correctness", prefix, suffix);
3564 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3567 /* For sane SSE instruction set generation we need fcomi instruction.
3568 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3569 expands to a sequence that includes conditional move. */
3570 if (TARGET_SSE || TARGET_RDRND)
3573 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3576 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3577 p = strchr (internal_label_prefix, 'X');
3578 internal_label_prefix_len = p - internal_label_prefix;
3582 /* When scheduling description is not available, disable scheduler pass
3583 so it won't slow down the compilation and make x87 code slower. */
3584 if (!TARGET_SCHEDULE)
3585 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3587 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3588 ix86_cost->simultaneous_prefetches,
3589 global_options.x_param_values,
3590 global_options_set.x_param_values);
3591 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3592 global_options.x_param_values,
3593 global_options_set.x_param_values);
3594 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3595 global_options.x_param_values,
3596 global_options_set.x_param_values);
3597 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3598 global_options.x_param_values,
3599 global_options_set.x_param_values);
3601 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3602 if (flag_prefetch_loop_arrays < 0
3605 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3606 flag_prefetch_loop_arrays = 1;
3608 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3609 can be optimized to ap = __builtin_next_arg (0). */
3610 if (!TARGET_64BIT && !flag_split_stack)
3611 targetm.expand_builtin_va_start = NULL;
3615 ix86_gen_leave = gen_leave_rex64;
3616 ix86_gen_add3 = gen_adddi3;
3617 ix86_gen_sub3 = gen_subdi3;
3618 ix86_gen_sub3_carry = gen_subdi3_carry;
3619 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3620 ix86_gen_monitor = gen_sse3_monitor64;
3621 ix86_gen_andsp = gen_anddi3;
3622 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3623 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3624 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3628 ix86_gen_leave = gen_leave;
3629 ix86_gen_add3 = gen_addsi3;
3630 ix86_gen_sub3 = gen_subsi3;
3631 ix86_gen_sub3_carry = gen_subsi3_carry;
3632 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3633 ix86_gen_monitor = gen_sse3_monitor;
3634 ix86_gen_andsp = gen_andsi3;
3635 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3636 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3637 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3641 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3643 target_flags |= MASK_CLD & ~target_flags_explicit;
3646 if (!TARGET_64BIT && flag_pic)
3648 if (flag_fentry > 0)
3649 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3653 else if (TARGET_SEH)
3655 if (flag_fentry == 0)
3656 sorry ("-mno-fentry isn%'t compatible with SEH");
3659 else if (flag_fentry < 0)
3661 #if defined(PROFILE_BEFORE_PROLOGUE)
3670 /* When not optimize for size, enable vzeroupper optimization for
3671 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3672 AVX unaligned load/store. */
3675 if (flag_expensive_optimizations
3676 && !(target_flags_explicit & MASK_VZEROUPPER))
3677 target_flags |= MASK_VZEROUPPER;
3678 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3679 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3680 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3681 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3682 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3683 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3684 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3685 if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
3686 target_flags |= MASK_PREFER_AVX128;
3691 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3692 target_flags &= ~MASK_VZEROUPPER;
3695 /* Save the initial options in case the user does function specific
3698 target_option_default_node = target_option_current_node
3699 = build_target_option_node ();
3702 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3705 function_pass_avx256_p (const_rtx val)
3710 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
3713 if (GET_CODE (val) == PARALLEL)
3718 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
3720 r = XVECEXP (val, 0, i);
3721 if (GET_CODE (r) == EXPR_LIST
3723 && REG_P (XEXP (r, 0))
3724 && (GET_MODE (XEXP (r, 0)) == OImode
3725 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
3733 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3736 ix86_option_override (void)
3738 ix86_option_override_internal (true);
3741 /* Update register usage after having seen the compiler flags. */
3744 ix86_conditional_register_usage (void)
3749 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3751 if (fixed_regs[i] > 1)
3752 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3753 if (call_used_regs[i] > 1)
3754 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3757 /* The PIC register, if it exists, is fixed. */
3758 j = PIC_OFFSET_TABLE_REGNUM;
3759 if (j != INVALID_REGNUM)
3760 fixed_regs[j] = call_used_regs[j] = 1;
3762 /* The 64-bit MS_ABI changes the set of call-used registers. */
3763 if (TARGET_64BIT_MS_ABI)
3765 call_used_regs[SI_REG] = 0;
3766 call_used_regs[DI_REG] = 0;
3767 call_used_regs[XMM6_REG] = 0;
3768 call_used_regs[XMM7_REG] = 0;
3769 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3770 call_used_regs[i] = 0;
3773 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3774 other call-clobbered regs for 64-bit. */
3777 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3779 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3780 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3781 && call_used_regs[i])
3782 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3785 /* If MMX is disabled, squash the registers. */
3787 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3788 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3789 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3791 /* If SSE is disabled, squash the registers. */
3793 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3794 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3795 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3797 /* If the FPU is disabled, squash the registers. */
3798 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3799 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3800 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3801 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3803 /* If 32-bit, squash the 64-bit registers. */
3806 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3808 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3814 /* Save the current options */
3817 ix86_function_specific_save (struct cl_target_option *ptr)
3819 ptr->arch = ix86_arch;
3820 ptr->schedule = ix86_schedule;
3821 ptr->tune = ix86_tune;
3822 ptr->branch_cost = ix86_branch_cost;
3823 ptr->tune_defaulted = ix86_tune_defaulted;
3824 ptr->arch_specified = ix86_arch_specified;
3825 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3826 ptr->ix86_target_flags_explicit = target_flags_explicit;
3828 /* The fields are char but the variables are not; make sure the
3829 values fit in the fields. */
3830 gcc_assert (ptr->arch == ix86_arch);
3831 gcc_assert (ptr->schedule == ix86_schedule);
3832 gcc_assert (ptr->tune == ix86_tune);
3833 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3836 /* Restore the current options */
3839 ix86_function_specific_restore (struct cl_target_option *ptr)
3841 enum processor_type old_tune = ix86_tune;
3842 enum processor_type old_arch = ix86_arch;
3843 unsigned int ix86_arch_mask, ix86_tune_mask;
3846 ix86_arch = (enum processor_type) ptr->arch;
3847 ix86_schedule = (enum attr_cpu) ptr->schedule;
3848 ix86_tune = (enum processor_type) ptr->tune;
3849 ix86_branch_cost = ptr->branch_cost;
3850 ix86_tune_defaulted = ptr->tune_defaulted;
3851 ix86_arch_specified = ptr->arch_specified;
3852 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
3853 target_flags_explicit = ptr->ix86_target_flags_explicit;
3855 /* Recreate the arch feature tests if the arch changed */
3856 if (old_arch != ix86_arch)
3858 ix86_arch_mask = 1u << ix86_arch;
3859 for (i = 0; i < X86_ARCH_LAST; ++i)
3860 ix86_arch_features[i]
3861 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3864 /* Recreate the tune optimization tests */
3865 if (old_tune != ix86_tune)
3867 ix86_tune_mask = 1u << ix86_tune;
3868 for (i = 0; i < X86_TUNE_LAST; ++i)
3869 ix86_tune_features[i]
3870 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3874 /* Print the current options */
3877 ix86_function_specific_print (FILE *file, int indent,
3878 struct cl_target_option *ptr)
3881 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
3882 NULL, NULL, ptr->x_ix86_fpmath, false);
3884 fprintf (file, "%*sarch = %d (%s)\n",
3887 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3888 ? cpu_names[ptr->arch]
3891 fprintf (file, "%*stune = %d (%s)\n",
3894 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3895 ? cpu_names[ptr->tune]
3898 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3902 fprintf (file, "%*s%s\n", indent, "", target_string);
3903 free (target_string);
3908 /* Inner function to process the attribute((target(...))), take an argument and
3909 set the current options from the argument. If we have a list, recursively go
3913 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
3914 struct gcc_options *enum_opts_set)
3919 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3920 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3921 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
3922 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3923 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3939 enum ix86_opt_type type;
3944 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3945 IX86_ATTR_ISA ("abm", OPT_mabm),
3946 IX86_ATTR_ISA ("bmi", OPT_mbmi),
3947 IX86_ATTR_ISA ("tbm", OPT_mtbm),
3948 IX86_ATTR_ISA ("aes", OPT_maes),
3949 IX86_ATTR_ISA ("avx", OPT_mavx),
3950 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3951 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3952 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3953 IX86_ATTR_ISA ("sse", OPT_msse),
3954 IX86_ATTR_ISA ("sse2", OPT_msse2),
3955 IX86_ATTR_ISA ("sse3", OPT_msse3),
3956 IX86_ATTR_ISA ("sse4", OPT_msse4),
3957 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3958 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3959 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3960 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3961 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3962 IX86_ATTR_ISA ("xop", OPT_mxop),
3963 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3964 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3965 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3966 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3969 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
3971 /* string options */
3972 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3973 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3976 IX86_ATTR_YES ("cld",
3980 IX86_ATTR_NO ("fancy-math-387",
3981 OPT_mfancy_math_387,
3982 MASK_NO_FANCY_MATH_387),
3984 IX86_ATTR_YES ("ieee-fp",
3988 IX86_ATTR_YES ("inline-all-stringops",
3989 OPT_minline_all_stringops,
3990 MASK_INLINE_ALL_STRINGOPS),
3992 IX86_ATTR_YES ("inline-stringops-dynamically",
3993 OPT_minline_stringops_dynamically,
3994 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3996 IX86_ATTR_NO ("align-stringops",
3997 OPT_mno_align_stringops,
3998 MASK_NO_ALIGN_STRINGOPS),
4000 IX86_ATTR_YES ("recip",
4006 /* If this is a list, recurse to get the options. */
4007 if (TREE_CODE (args) == TREE_LIST)
4011 for (; args; args = TREE_CHAIN (args))
4012 if (TREE_VALUE (args)
4013 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4014 p_strings, enum_opts_set))
4020 else if (TREE_CODE (args) != STRING_CST)
4023 /* Handle multiple arguments separated by commas. */
4024 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4026 while (next_optstr && *next_optstr != '\0')
4028 char *p = next_optstr;
4030 char *comma = strchr (next_optstr, ',');
4031 const char *opt_string;
4032 size_t len, opt_len;
4037 enum ix86_opt_type type = ix86_opt_unknown;
4043 len = comma - next_optstr;
4044 next_optstr = comma + 1;
4052 /* Recognize no-xxx. */
4053 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4062 /* Find the option. */
4065 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4067 type = attrs[i].type;
4068 opt_len = attrs[i].len;
4069 if (ch == attrs[i].string[0]
4070 && ((type != ix86_opt_str && type != ix86_opt_enum)
4073 && memcmp (p, attrs[i].string, opt_len) == 0)
4076 mask = attrs[i].mask;
4077 opt_string = attrs[i].string;
4082 /* Process the option. */
4085 error ("attribute(target(\"%s\")) is unknown", orig_p);
4089 else if (type == ix86_opt_isa)
4091 struct cl_decoded_option decoded;
4093 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4094 ix86_handle_option (&global_options, &global_options_set,
4095 &decoded, input_location);
4098 else if (type == ix86_opt_yes || type == ix86_opt_no)
4100 if (type == ix86_opt_no)
4101 opt_set_p = !opt_set_p;
4104 target_flags |= mask;
4106 target_flags &= ~mask;
4109 else if (type == ix86_opt_str)
4113 error ("option(\"%s\") was already specified", opt_string);
4117 p_strings[opt] = xstrdup (p + opt_len);
4120 else if (type == ix86_opt_enum)
4125 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4127 set_option (&global_options, enum_opts_set, opt, value,
4128 p + opt_len, DK_UNSPECIFIED, input_location,
4132 error ("attribute(target(\"%s\")) is unknown", orig_p);
4144 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4147 ix86_valid_target_attribute_tree (tree args)
4149 const char *orig_arch_string = ix86_arch_string;
4150 const char *orig_tune_string = ix86_tune_string;
4151 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4152 int orig_tune_defaulted = ix86_tune_defaulted;
4153 int orig_arch_specified = ix86_arch_specified;
4154 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4157 struct cl_target_option *def
4158 = TREE_TARGET_OPTION (target_option_default_node);
4159 struct gcc_options enum_opts_set;
4161 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4163 /* Process each of the options on the chain. */
4164 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4168 /* If the changed options are different from the default, rerun
4169 ix86_option_override_internal, and then save the options away.
4170 The string options are are attribute options, and will be undone
4171 when we copy the save structure. */
4172 if (ix86_isa_flags != def->x_ix86_isa_flags
4173 || target_flags != def->x_target_flags
4174 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4175 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4176 || enum_opts_set.x_ix86_fpmath)
4178 /* If we are using the default tune= or arch=, undo the string assigned,
4179 and use the default. */
4180 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4181 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4182 else if (!orig_arch_specified)
4183 ix86_arch_string = NULL;
4185 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4186 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4187 else if (orig_tune_defaulted)
4188 ix86_tune_string = NULL;
4190 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4191 if (enum_opts_set.x_ix86_fpmath)
4192 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4193 else if (!TARGET_64BIT && TARGET_SSE)
4195 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4196 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4199 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4200 ix86_option_override_internal (false);
4202 /* Add any builtin functions with the new isa if any. */
4203 ix86_add_new_builtins (ix86_isa_flags);
4205 /* Save the current options unless we are validating options for
4207 t = build_target_option_node ();
4209 ix86_arch_string = orig_arch_string;
4210 ix86_tune_string = orig_tune_string;
4211 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4213 /* Free up memory allocated to hold the strings */
4214 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4215 free (option_strings[i]);
4221 /* Hook to validate attribute((target("string"))). */
4224 ix86_valid_target_attribute_p (tree fndecl,
4225 tree ARG_UNUSED (name),
4227 int ARG_UNUSED (flags))
4229 struct cl_target_option cur_target;
4231 tree old_optimize = build_optimization_node ();
4232 tree new_target, new_optimize;
4233 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4235 /* If the function changed the optimization levels as well as setting target
4236 options, start with the optimizations specified. */
4237 if (func_optimize && func_optimize != old_optimize)
4238 cl_optimization_restore (&global_options,
4239 TREE_OPTIMIZATION (func_optimize));
4241 /* The target attributes may also change some optimization flags, so update
4242 the optimization options if necessary. */
4243 cl_target_option_save (&cur_target, &global_options);
4244 new_target = ix86_valid_target_attribute_tree (args);
4245 new_optimize = build_optimization_node ();
4252 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4254 if (old_optimize != new_optimize)
4255 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4258 cl_target_option_restore (&global_options, &cur_target);
4260 if (old_optimize != new_optimize)
4261 cl_optimization_restore (&global_options,
4262 TREE_OPTIMIZATION (old_optimize));
4268 /* Hook to determine if one function can safely inline another. */
4271 ix86_can_inline_p (tree caller, tree callee)
4274 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4275 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4277 /* If callee has no option attributes, then it is ok to inline. */
4281 /* If caller has no option attributes, but callee does then it is not ok to
4283 else if (!caller_tree)
4288 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4289 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4291 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4292 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4294 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4295 != callee_opts->x_ix86_isa_flags)
4298 /* See if we have the same non-isa options. */
4299 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4302 /* See if arch, tune, etc. are the same. */
4303 else if (caller_opts->arch != callee_opts->arch)
4306 else if (caller_opts->tune != callee_opts->tune)
4309 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4312 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4323 /* Remember the last target of ix86_set_current_function. */
4324 static GTY(()) tree ix86_previous_fndecl;
4326 /* Establish appropriate back-end context for processing the function
4327 FNDECL. The argument might be NULL to indicate processing at top
4328 level, outside of any function scope. */
4330 ix86_set_current_function (tree fndecl)
4332 /* Only change the context if the function changes. This hook is called
4333 several times in the course of compiling a function, and we don't want to
4334 slow things down too much or call target_reinit when it isn't safe. */
4335 if (fndecl && fndecl != ix86_previous_fndecl)
4337 tree old_tree = (ix86_previous_fndecl
4338 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4341 tree new_tree = (fndecl
4342 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4345 ix86_previous_fndecl = fndecl;
4346 if (old_tree == new_tree)
4351 cl_target_option_restore (&global_options,
4352 TREE_TARGET_OPTION (new_tree));
4358 struct cl_target_option *def
4359 = TREE_TARGET_OPTION (target_option_current_node);
4361 cl_target_option_restore (&global_options, def);
4368 /* Return true if this goes in large data/bss. */
4371 ix86_in_large_data_p (tree exp)
4373 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4376 /* Functions are never large data. */
4377 if (TREE_CODE (exp) == FUNCTION_DECL)
4380 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4382 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4383 if (strcmp (section, ".ldata") == 0
4384 || strcmp (section, ".lbss") == 0)
4390 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4392 /* If this is an incomplete type with size 0, then we can't put it
4393 in data because it might be too big when completed. */
4394 if (!size || size > ix86_section_threshold)
4401 /* Switch to the appropriate section for output of DECL.
4402 DECL is either a `VAR_DECL' node or a constant of some sort.
4403 RELOC indicates whether forming the initial value of DECL requires
4404 link-time relocations. */
4406 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4410 x86_64_elf_select_section (tree decl, int reloc,
4411 unsigned HOST_WIDE_INT align)
4413 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4414 && ix86_in_large_data_p (decl))
4416 const char *sname = NULL;
4417 unsigned int flags = SECTION_WRITE;
4418 switch (categorize_decl_for_section (decl, reloc))
4423 case SECCAT_DATA_REL:
4424 sname = ".ldata.rel";
4426 case SECCAT_DATA_REL_LOCAL:
4427 sname = ".ldata.rel.local";
4429 case SECCAT_DATA_REL_RO:
4430 sname = ".ldata.rel.ro";
4432 case SECCAT_DATA_REL_RO_LOCAL:
4433 sname = ".ldata.rel.ro.local";
4437 flags |= SECTION_BSS;
4440 case SECCAT_RODATA_MERGE_STR:
4441 case SECCAT_RODATA_MERGE_STR_INIT:
4442 case SECCAT_RODATA_MERGE_CONST:
4446 case SECCAT_SRODATA:
4453 /* We don't split these for medium model. Place them into
4454 default sections and hope for best. */
4459 /* We might get called with string constants, but get_named_section
4460 doesn't like them as they are not DECLs. Also, we need to set
4461 flags in that case. */
4463 return get_section (sname, flags, NULL);
4464 return get_named_section (decl, sname, reloc);
4467 return default_elf_select_section (decl, reloc, align);
4470 /* Build up a unique section name, expressed as a
4471 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4472 RELOC indicates whether the initial value of EXP requires
4473 link-time relocations. */
4475 static void ATTRIBUTE_UNUSED
4476 x86_64_elf_unique_section (tree decl, int reloc)
4478 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4479 && ix86_in_large_data_p (decl))
4481 const char *prefix = NULL;
4482 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4483 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4485 switch (categorize_decl_for_section (decl, reloc))
4488 case SECCAT_DATA_REL:
4489 case SECCAT_DATA_REL_LOCAL:
4490 case SECCAT_DATA_REL_RO:
4491 case SECCAT_DATA_REL_RO_LOCAL:
4492 prefix = one_only ? ".ld" : ".ldata";
4495 prefix = one_only ? ".lb" : ".lbss";
4498 case SECCAT_RODATA_MERGE_STR:
4499 case SECCAT_RODATA_MERGE_STR_INIT:
4500 case SECCAT_RODATA_MERGE_CONST:
4501 prefix = one_only ? ".lr" : ".lrodata";
4503 case SECCAT_SRODATA:
4510 /* We don't split these for medium model. Place them into
4511 default sections and hope for best. */
4516 const char *name, *linkonce;
4519 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4520 name = targetm.strip_name_encoding (name);
4522 /* If we're using one_only, then there needs to be a .gnu.linkonce
4523 prefix to the section name. */
4524 linkonce = one_only ? ".gnu.linkonce" : "";
4526 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4528 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4532 default_unique_section (decl, reloc);
4535 #ifdef COMMON_ASM_OP
4536 /* This says how to output assembler code to declare an
4537 uninitialized external linkage data object.
4539 For medium model x86-64 we need to use .largecomm opcode for
4542 x86_elf_aligned_common (FILE *file,
4543 const char *name, unsigned HOST_WIDE_INT size,
4546 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4547 && size > (unsigned int)ix86_section_threshold)
4548 fputs (".largecomm\t", file);
4550 fputs (COMMON_ASM_OP, file);
4551 assemble_name (file, name);
4552 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4553 size, align / BITS_PER_UNIT);
4557 /* Utility function for targets to use in implementing
4558 ASM_OUTPUT_ALIGNED_BSS. */
4561 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4562 const char *name, unsigned HOST_WIDE_INT size,
4565 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4566 && size > (unsigned int)ix86_section_threshold)
4567 switch_to_section (get_named_section (decl, ".lbss", 0));
4569 switch_to_section (bss_section);
4570 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4571 #ifdef ASM_DECLARE_OBJECT_NAME
4572 last_assemble_variable_decl = decl;
4573 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4575 /* Standard thing is just output label for the object. */
4576 ASM_OUTPUT_LABEL (file, name);
4577 #endif /* ASM_DECLARE_OBJECT_NAME */
4578 ASM_OUTPUT_SKIP (file, size ? size : 1);
4581 /* Decide whether we must probe the stack before any space allocation
4582 on this target. It's essentially TARGET_STACK_PROBE except when
4583 -fstack-check causes the stack to be already probed differently. */
4586 ix86_target_stack_probe (void)
4588 /* Do not probe the stack twice if static stack checking is enabled. */
4589 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4592 return TARGET_STACK_PROBE;
4595 /* Decide whether we can make a sibling call to a function. DECL is the
4596 declaration of the function being targeted by the call and EXP is the
4597 CALL_EXPR representing the call. */
4600 ix86_function_ok_for_sibcall (tree decl, tree exp)
4602 tree type, decl_or_type;
4605 /* If we are generating position-independent code, we cannot sibcall
4606 optimize any indirect call, or a direct call to a global function,
4607 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4611 && (!decl || !targetm.binds_local_p (decl)))
4614 /* If we need to align the outgoing stack, then sibcalling would
4615 unalign the stack, which may break the called function. */
4616 if (ix86_minimum_incoming_stack_boundary (true)
4617 < PREFERRED_STACK_BOUNDARY)
4622 decl_or_type = decl;
4623 type = TREE_TYPE (decl);
4627 /* We're looking at the CALL_EXPR, we need the type of the function. */
4628 type = CALL_EXPR_FN (exp); /* pointer expression */
4629 type = TREE_TYPE (type); /* pointer type */
4630 type = TREE_TYPE (type); /* function type */
4631 decl_or_type = type;
4634 /* Check that the return value locations are the same. Like
4635 if we are returning floats on the 80387 register stack, we cannot
4636 make a sibcall from a function that doesn't return a float to a
4637 function that does or, conversely, from a function that does return
4638 a float to a function that doesn't; the necessary stack adjustment
4639 would not be executed. This is also the place we notice
4640 differences in the return value ABI. Note that it is ok for one
4641 of the functions to have void return type as long as the return
4642 value of the other is passed in a register. */
4643 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4644 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4646 if (STACK_REG_P (a) || STACK_REG_P (b))
4648 if (!rtx_equal_p (a, b))
4651 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4653 /* Disable sibcall if we need to generate vzeroupper after
4655 if (TARGET_VZEROUPPER
4656 && cfun->machine->callee_return_avx256_p
4657 && !cfun->machine->caller_return_avx256_p)
4660 else if (!rtx_equal_p (a, b))
4665 /* The SYSV ABI has more call-clobbered registers;
4666 disallow sibcalls from MS to SYSV. */
4667 if (cfun->machine->call_abi == MS_ABI
4668 && ix86_function_type_abi (type) == SYSV_ABI)
4673 /* If this call is indirect, we'll need to be able to use a
4674 call-clobbered register for the address of the target function.
4675 Make sure that all such registers are not used for passing
4676 parameters. Note that DLLIMPORT functions are indirect. */
4678 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4680 if (ix86_function_regparm (type, NULL) >= 3)
4682 /* ??? Need to count the actual number of registers to be used,
4683 not the possible number of registers. Fix later. */
4689 /* Otherwise okay. That also includes certain types of indirect calls. */
4693 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4694 and "sseregparm" calling convention attributes;
4695 arguments as in struct attribute_spec.handler. */
4698 ix86_handle_cconv_attribute (tree *node, tree name,
4700 int flags ATTRIBUTE_UNUSED,
4703 if (TREE_CODE (*node) != FUNCTION_TYPE
4704 && TREE_CODE (*node) != METHOD_TYPE
4705 && TREE_CODE (*node) != FIELD_DECL
4706 && TREE_CODE (*node) != TYPE_DECL)
4708 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4710 *no_add_attrs = true;
4714 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4715 if (is_attribute_p ("regparm", name))
4719 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4721 error ("fastcall and regparm attributes are not compatible");
4724 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4726 error ("regparam and thiscall attributes are not compatible");
4729 cst = TREE_VALUE (args);
4730 if (TREE_CODE (cst) != INTEGER_CST)
4732 warning (OPT_Wattributes,
4733 "%qE attribute requires an integer constant argument",
4735 *no_add_attrs = true;
4737 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4739 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4741 *no_add_attrs = true;
4749 /* Do not warn when emulating the MS ABI. */
4750 if ((TREE_CODE (*node) != FUNCTION_TYPE
4751 && TREE_CODE (*node) != METHOD_TYPE)
4752 || ix86_function_type_abi (*node) != MS_ABI)
4753 warning (OPT_Wattributes, "%qE attribute ignored",
4755 *no_add_attrs = true;
4759 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4760 if (is_attribute_p ("fastcall", name))
4762 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4764 error ("fastcall and cdecl attributes are not compatible");
4766 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4768 error ("fastcall and stdcall attributes are not compatible");
4770 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4772 error ("fastcall and regparm attributes are not compatible");
4774 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4776 error ("fastcall and thiscall attributes are not compatible");
4780 /* Can combine stdcall with fastcall (redundant), regparm and
4782 else if (is_attribute_p ("stdcall", name))
4784 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4786 error ("stdcall and cdecl attributes are not compatible");
4788 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4790 error ("stdcall and fastcall attributes are not compatible");
4792 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4794 error ("stdcall and thiscall attributes are not compatible");
4798 /* Can combine cdecl with regparm and sseregparm. */
4799 else if (is_attribute_p ("cdecl", name))
4801 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4803 error ("stdcall and cdecl attributes are not compatible");
4805 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4807 error ("fastcall and cdecl attributes are not compatible");
4809 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4811 error ("cdecl and thiscall attributes are not compatible");
4814 else if (is_attribute_p ("thiscall", name))
4816 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4817 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4819 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4821 error ("stdcall and thiscall attributes are not compatible");
4823 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4825 error ("fastcall and thiscall attributes are not compatible");
4827 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4829 error ("cdecl and thiscall attributes are not compatible");
4833 /* Can combine sseregparm with all attributes. */
4838 /* This function determines from TYPE the calling-convention. */
4841 ix86_get_callcvt (const_tree type)
4843 unsigned int ret = 0;
4848 return IX86_CALLCVT_CDECL;
4850 attrs = TYPE_ATTRIBUTES (type);
4851 if (attrs != NULL_TREE)
4853 if (lookup_attribute ("cdecl", attrs))
4854 ret |= IX86_CALLCVT_CDECL;
4855 else if (lookup_attribute ("stdcall", attrs))
4856 ret |= IX86_CALLCVT_STDCALL;
4857 else if (lookup_attribute ("fastcall", attrs))
4858 ret |= IX86_CALLCVT_FASTCALL;
4859 else if (lookup_attribute ("thiscall", attrs))
4860 ret |= IX86_CALLCVT_THISCALL;
4862 /* Regparam isn't allowed for thiscall and fastcall. */
4863 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
4865 if (lookup_attribute ("regparm", attrs))
4866 ret |= IX86_CALLCVT_REGPARM;
4867 if (lookup_attribute ("sseregparm", attrs))
4868 ret |= IX86_CALLCVT_SSEREGPARM;
4871 if (IX86_BASE_CALLCVT(ret) != 0)
4875 is_stdarg = stdarg_p (type);
4876 if (TARGET_RTD && !is_stdarg)
4877 return IX86_CALLCVT_STDCALL | ret;
4881 || TREE_CODE (type) != METHOD_TYPE
4882 || ix86_function_type_abi (type) != MS_ABI)
4883 return IX86_CALLCVT_CDECL | ret;
4885 return IX86_CALLCVT_THISCALL;
4888 /* Return 0 if the attributes for two types are incompatible, 1 if they
4889 are compatible, and 2 if they are nearly compatible (which causes a
4890 warning to be generated). */
4893 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4895 unsigned int ccvt1, ccvt2;
4897 if (TREE_CODE (type1) != FUNCTION_TYPE
4898 && TREE_CODE (type1) != METHOD_TYPE)
4901 ccvt1 = ix86_get_callcvt (type1);
4902 ccvt2 = ix86_get_callcvt (type2);
4905 if (ix86_function_regparm (type1, NULL)
4906 != ix86_function_regparm (type2, NULL))
4912 /* Return the regparm value for a function with the indicated TYPE and DECL.
4913 DECL may be NULL when calling function indirectly
4914 or considering a libcall. */
4917 ix86_function_regparm (const_tree type, const_tree decl)
4924 return (ix86_function_type_abi (type) == SYSV_ABI
4925 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4926 ccvt = ix86_get_callcvt (type);
4927 regparm = ix86_regparm;
4929 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
4931 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4934 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4938 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
4940 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
4943 /* Use register calling convention for local functions when possible. */
4945 && TREE_CODE (decl) == FUNCTION_DECL
4947 && !(profile_flag && !flag_fentry))
4949 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4950 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4951 if (i && i->local && i->can_change_signature)
4953 int local_regparm, globals = 0, regno;
4955 /* Make sure no regparm register is taken by a
4956 fixed register variable. */
4957 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4958 if (fixed_regs[local_regparm])
4961 /* We don't want to use regparm(3) for nested functions as
4962 these use a static chain pointer in the third argument. */
4963 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4966 /* In 32-bit mode save a register for the split stack. */
4967 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
4970 /* Each fixed register usage increases register pressure,
4971 so less registers should be used for argument passing.
4972 This functionality can be overriden by an explicit
4974 for (regno = 0; regno <= DI_REG; regno++)
4975 if (fixed_regs[regno])
4979 = globals < local_regparm ? local_regparm - globals : 0;
4981 if (local_regparm > regparm)
4982 regparm = local_regparm;
4989 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4990 DFmode (2) arguments in SSE registers for a function with the
4991 indicated TYPE and DECL. DECL may be NULL when calling function
4992 indirectly or considering a libcall. Otherwise return 0. */
4995 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4997 gcc_assert (!TARGET_64BIT);
4999 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5000 by the sseregparm attribute. */
5001 if (TARGET_SSEREGPARM
5002 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5009 error ("calling %qD with attribute sseregparm without "
5010 "SSE/SSE2 enabled", decl);
5012 error ("calling %qT with attribute sseregparm without "
5013 "SSE/SSE2 enabled", type);
5021 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5022 (and DFmode for SSE2) arguments in SSE registers. */
5023 if (decl && TARGET_SSE_MATH && optimize
5024 && !(profile_flag && !flag_fentry))
5026 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5027 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5028 if (i && i->local && i->can_change_signature)
5029 return TARGET_SSE2 ? 2 : 1;
5035 /* Return true if EAX is live at the start of the function. Used by
5036 ix86_expand_prologue to determine if we need special help before
5037 calling allocate_stack_worker. */
5040 ix86_eax_live_at_start_p (void)
5042 /* Cheat. Don't bother working forward from ix86_function_regparm
5043 to the function type to whether an actual argument is located in
5044 eax. Instead just look at cfg info, which is still close enough
5045 to correct at this point. This gives false positives for broken
5046 functions that might use uninitialized data that happens to be
5047 allocated in eax, but who cares? */
5048 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5052 ix86_keep_aggregate_return_pointer (tree fntype)
5058 attr = lookup_attribute ("callee_pop_aggregate_return",
5059 TYPE_ATTRIBUTES (fntype));
5061 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5063 /* For 32-bit MS-ABI the default is to keep aggregate
5065 if (ix86_function_type_abi (fntype) == MS_ABI)
5068 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5071 /* Value is the number of bytes of arguments automatically
5072 popped when returning from a subroutine call.
5073 FUNDECL is the declaration node of the function (as a tree),
5074 FUNTYPE is the data type of the function (as a tree),
5075 or for a library call it is an identifier node for the subroutine name.
5076 SIZE is the number of bytes of arguments passed on the stack.
5078 On the 80386, the RTD insn may be used to pop them if the number
5079 of args is fixed, but if the number is variable then the caller
5080 must pop them all. RTD can't be used for library calls now
5081 because the library is compiled with the Unix compiler.
5082 Use of RTD is a selectable option, since it is incompatible with
5083 standard Unix calling sequences. If the option is not selected,
5084 the caller must always pop the args.
5086 The attribute stdcall is equivalent to RTD on a per module basis. */
5089 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5093 /* None of the 64-bit ABIs pop arguments. */
5097 ccvt = ix86_get_callcvt (funtype);
5099 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5100 | IX86_CALLCVT_THISCALL)) != 0
5101 && ! stdarg_p (funtype))
5104 /* Lose any fake structure return argument if it is passed on the stack. */
5105 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5106 && !ix86_keep_aggregate_return_pointer (funtype))
5108 int nregs = ix86_function_regparm (funtype, fundecl);
5110 return GET_MODE_SIZE (Pmode);
5116 /* Argument support functions. */
5118 /* Return true when register may be used to pass function parameters. */
5120 ix86_function_arg_regno_p (int regno)
5123 const int *parm_regs;
5128 return (regno < REGPARM_MAX
5129 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5131 return (regno < REGPARM_MAX
5132 || (TARGET_MMX && MMX_REGNO_P (regno)
5133 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5134 || (TARGET_SSE && SSE_REGNO_P (regno)
5135 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5140 if (SSE_REGNO_P (regno) && TARGET_SSE)
5145 if (TARGET_SSE && SSE_REGNO_P (regno)
5146 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5150 /* TODO: The function should depend on current function ABI but
5151 builtins.c would need updating then. Therefore we use the
5154 /* RAX is used as hidden argument to va_arg functions. */
5155 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5158 if (ix86_abi == MS_ABI)
5159 parm_regs = x86_64_ms_abi_int_parameter_registers;
5161 parm_regs = x86_64_int_parameter_registers;
5162 for (i = 0; i < (ix86_abi == MS_ABI
5163 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5164 if (regno == parm_regs[i])
5169 /* Return if we do not know how to pass TYPE solely in registers. */
5172 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5174 if (must_pass_in_stack_var_size_or_pad (mode, type))
5177 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5178 The layout_type routine is crafty and tries to trick us into passing
5179 currently unsupported vector types on the stack by using TImode. */
5180 return (!TARGET_64BIT && mode == TImode
5181 && type && TREE_CODE (type) != VECTOR_TYPE);
5184 /* It returns the size, in bytes, of the area reserved for arguments passed
5185 in registers for the function represented by fndecl dependent to the used
5188 ix86_reg_parm_stack_space (const_tree fndecl)
5190 enum calling_abi call_abi = SYSV_ABI;
5191 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5192 call_abi = ix86_function_abi (fndecl);
5194 call_abi = ix86_function_type_abi (fndecl);
5195 if (TARGET_64BIT && call_abi == MS_ABI)
5200 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5203 ix86_function_type_abi (const_tree fntype)
5205 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5207 enum calling_abi abi = ix86_abi;
5208 if (abi == SYSV_ABI)
5210 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5213 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5221 ix86_function_ms_hook_prologue (const_tree fn)
5223 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5225 if (decl_function_context (fn) != NULL_TREE)
5226 error_at (DECL_SOURCE_LOCATION (fn),
5227 "ms_hook_prologue is not compatible with nested function");
5234 static enum calling_abi
5235 ix86_function_abi (const_tree fndecl)
5239 return ix86_function_type_abi (TREE_TYPE (fndecl));
5242 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5245 ix86_cfun_abi (void)
5249 return cfun->machine->call_abi;
5252 /* Write the extra assembler code needed to declare a function properly. */
5255 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5258 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5262 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5263 unsigned int filler_cc = 0xcccccccc;
5265 for (i = 0; i < filler_count; i += 4)
5266 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5269 #ifdef SUBTARGET_ASM_UNWIND_INIT
5270 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5273 ASM_OUTPUT_LABEL (asm_out_file, fname);
5275 /* Output magic byte marker, if hot-patch attribute is set. */
5280 /* leaq [%rsp + 0], %rsp */
5281 asm_fprintf (asm_out_file, ASM_BYTE
5282 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5286 /* movl.s %edi, %edi
5288 movl.s %esp, %ebp */
5289 asm_fprintf (asm_out_file, ASM_BYTE
5290 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5296 extern void init_regs (void);
5298 /* Implementation of call abi switching target hook. Specific to FNDECL
5299 the specific call register sets are set. See also
5300 ix86_conditional_register_usage for more details. */
5302 ix86_call_abi_override (const_tree fndecl)
5304 if (fndecl == NULL_TREE)
5305 cfun->machine->call_abi = ix86_abi;
5307 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5310 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5311 expensive re-initialization of init_regs each time we switch function context
5312 since this is needed only during RTL expansion. */
5314 ix86_maybe_switch_abi (void)
5317 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5321 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5322 for a call to a function whose data type is FNTYPE.
5323 For a library call, FNTYPE is 0. */
5326 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5327 tree fntype, /* tree ptr for function decl */
5328 rtx libname, /* SYMBOL_REF of library name or 0 */
5332 struct cgraph_local_info *i;
5335 memset (cum, 0, sizeof (*cum));
5337 /* Initialize for the current callee. */
5340 cfun->machine->callee_pass_avx256_p = false;
5341 cfun->machine->callee_return_avx256_p = false;
5346 i = cgraph_local_info (fndecl);
5347 cum->call_abi = ix86_function_abi (fndecl);
5348 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5353 cum->call_abi = ix86_function_type_abi (fntype);
5355 fnret_type = TREE_TYPE (fntype);
5360 if (TARGET_VZEROUPPER && fnret_type)
5362 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5364 if (function_pass_avx256_p (fnret_value))
5366 /* The return value of this function uses 256bit AVX modes. */
5368 cfun->machine->callee_return_avx256_p = true;
5370 cfun->machine->caller_return_avx256_p = true;
5374 cum->caller = caller;
5376 /* Set up the number of registers to use for passing arguments. */
5378 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5379 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5380 "or subtarget optimization implying it");
5381 cum->nregs = ix86_regparm;
5384 cum->nregs = (cum->call_abi == SYSV_ABI
5385 ? X86_64_REGPARM_MAX
5386 : X86_64_MS_REGPARM_MAX);
5390 cum->sse_nregs = SSE_REGPARM_MAX;
5393 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5394 ? X86_64_SSE_REGPARM_MAX
5395 : X86_64_MS_SSE_REGPARM_MAX);
5399 cum->mmx_nregs = MMX_REGPARM_MAX;
5400 cum->warn_avx = true;
5401 cum->warn_sse = true;
5402 cum->warn_mmx = true;
5404 /* Because type might mismatch in between caller and callee, we need to
5405 use actual type of function for local calls.
5406 FIXME: cgraph_analyze can be told to actually record if function uses
5407 va_start so for local functions maybe_vaarg can be made aggressive
5409 FIXME: once typesytem is fixed, we won't need this code anymore. */
5410 if (i && i->local && i->can_change_signature)
5411 fntype = TREE_TYPE (fndecl);
5412 cum->maybe_vaarg = (fntype
5413 ? (!prototype_p (fntype) || stdarg_p (fntype))
5418 /* If there are variable arguments, then we won't pass anything
5419 in registers in 32-bit mode. */
5420 if (stdarg_p (fntype))
5431 /* Use ecx and edx registers if function has fastcall attribute,
5432 else look for regparm information. */
5435 unsigned int ccvt = ix86_get_callcvt (fntype);
5436 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5439 cum->fastcall = 1; /* Same first register as in fastcall. */
5441 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5447 cum->nregs = ix86_function_regparm (fntype, fndecl);
5450 /* Set up the number of SSE registers used for passing SFmode
5451 and DFmode arguments. Warn for mismatching ABI. */
5452 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5456 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5457 But in the case of vector types, it is some vector mode.
5459 When we have only some of our vector isa extensions enabled, then there
5460 are some modes for which vector_mode_supported_p is false. For these
5461 modes, the generic vector support in gcc will choose some non-vector mode
5462 in order to implement the type. By computing the natural mode, we'll
5463 select the proper ABI location for the operand and not depend on whatever
5464 the middle-end decides to do with these vector types.
5466 The midde-end can't deal with the vector types > 16 bytes. In this
5467 case, we return the original mode and warn ABI change if CUM isn't
5470 static enum machine_mode
5471 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5473 enum machine_mode mode = TYPE_MODE (type);
5475 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5477 HOST_WIDE_INT size = int_size_in_bytes (type);
5478 if ((size == 8 || size == 16 || size == 32)
5479 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5480 && TYPE_VECTOR_SUBPARTS (type) > 1)
5482 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5484 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5485 mode = MIN_MODE_VECTOR_FLOAT;
5487 mode = MIN_MODE_VECTOR_INT;
5489 /* Get the mode which has this inner mode and number of units. */
5490 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5491 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5492 && GET_MODE_INNER (mode) == innermode)
5494 if (size == 32 && !TARGET_AVX)
5496 static bool warnedavx;
5503 warning (0, "AVX vector argument without AVX "
5504 "enabled changes the ABI");
5506 return TYPE_MODE (type);
5519 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5520 this may not agree with the mode that the type system has chosen for the
5521 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5522 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5525 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5530 if (orig_mode != BLKmode)
5531 tmp = gen_rtx_REG (orig_mode, regno);
5534 tmp = gen_rtx_REG (mode, regno);
5535 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5536 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5542 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5543 of this code is to classify each 8bytes of incoming argument by the register
5544 class and assign registers accordingly. */
5546 /* Return the union class of CLASS1 and CLASS2.
5547 See the x86-64 PS ABI for details. */
5549 static enum x86_64_reg_class
5550 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5552 /* Rule #1: If both classes are equal, this is the resulting class. */
5553 if (class1 == class2)
5556 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5558 if (class1 == X86_64_NO_CLASS)
5560 if (class2 == X86_64_NO_CLASS)
5563 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5564 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5565 return X86_64_MEMORY_CLASS;
5567 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5568 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5569 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5570 return X86_64_INTEGERSI_CLASS;
5571 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5572 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5573 return X86_64_INTEGER_CLASS;
5575 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5577 if (class1 == X86_64_X87_CLASS
5578 || class1 == X86_64_X87UP_CLASS
5579 || class1 == X86_64_COMPLEX_X87_CLASS
5580 || class2 == X86_64_X87_CLASS
5581 || class2 == X86_64_X87UP_CLASS
5582 || class2 == X86_64_COMPLEX_X87_CLASS)
5583 return X86_64_MEMORY_CLASS;
5585 /* Rule #6: Otherwise class SSE is used. */
5586 return X86_64_SSE_CLASS;
5589 /* Classify the argument of type TYPE and mode MODE.
5590 CLASSES will be filled by the register class used to pass each word
5591 of the operand. The number of words is returned. In case the parameter
5592 should be passed in memory, 0 is returned. As a special case for zero
5593 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5595 BIT_OFFSET is used internally for handling records and specifies offset
5596 of the offset in bits modulo 256 to avoid overflow cases.
5598 See the x86-64 PS ABI for details.
5602 classify_argument (enum machine_mode mode, const_tree type,
5603 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5605 HOST_WIDE_INT bytes =
5606 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5607 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5609 /* Variable sized entities are always passed/returned in memory. */
5613 if (mode != VOIDmode
5614 && targetm.calls.must_pass_in_stack (mode, type))
5617 if (type && AGGREGATE_TYPE_P (type))
5621 enum x86_64_reg_class subclasses[MAX_CLASSES];
5623 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5627 for (i = 0; i < words; i++)
5628 classes[i] = X86_64_NO_CLASS;
5630 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5631 signalize memory class, so handle it as special case. */
5634 classes[0] = X86_64_NO_CLASS;
5638 /* Classify each field of record and merge classes. */
5639 switch (TREE_CODE (type))
5642 /* And now merge the fields of structure. */
5643 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5645 if (TREE_CODE (field) == FIELD_DECL)
5649 if (TREE_TYPE (field) == error_mark_node)
5652 /* Bitfields are always classified as integer. Handle them
5653 early, since later code would consider them to be
5654 misaligned integers. */
5655 if (DECL_BIT_FIELD (field))
5657 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5658 i < ((int_bit_position (field) + (bit_offset % 64))
5659 + tree_low_cst (DECL_SIZE (field), 0)
5662 merge_classes (X86_64_INTEGER_CLASS,
5669 type = TREE_TYPE (field);
5671 /* Flexible array member is ignored. */
5672 if (TYPE_MODE (type) == BLKmode
5673 && TREE_CODE (type) == ARRAY_TYPE
5674 && TYPE_SIZE (type) == NULL_TREE
5675 && TYPE_DOMAIN (type) != NULL_TREE
5676 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5681 if (!warned && warn_psabi)
5684 inform (input_location,
5685 "the ABI of passing struct with"
5686 " a flexible array member has"
5687 " changed in GCC 4.4");
5691 num = classify_argument (TYPE_MODE (type), type,
5693 (int_bit_position (field)
5694 + bit_offset) % 256);
5697 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5698 for (i = 0; i < num && (i + pos) < words; i++)
5700 merge_classes (subclasses[i], classes[i + pos]);
5707 /* Arrays are handled as small records. */
5710 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5711 TREE_TYPE (type), subclasses, bit_offset);
5715 /* The partial classes are now full classes. */
5716 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5717 subclasses[0] = X86_64_SSE_CLASS;
5718 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5719 && !((bit_offset % 64) == 0 && bytes == 4))
5720 subclasses[0] = X86_64_INTEGER_CLASS;
5722 for (i = 0; i < words; i++)
5723 classes[i] = subclasses[i % num];
5728 case QUAL_UNION_TYPE:
5729 /* Unions are similar to RECORD_TYPE but offset is always 0.
5731 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5733 if (TREE_CODE (field) == FIELD_DECL)
5737 if (TREE_TYPE (field) == error_mark_node)
5740 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5741 TREE_TYPE (field), subclasses,
5745 for (i = 0; i < num; i++)
5746 classes[i] = merge_classes (subclasses[i], classes[i]);
5757 /* When size > 16 bytes, if the first one isn't
5758 X86_64_SSE_CLASS or any other ones aren't
5759 X86_64_SSEUP_CLASS, everything should be passed in
5761 if (classes[0] != X86_64_SSE_CLASS)
5764 for (i = 1; i < words; i++)
5765 if (classes[i] != X86_64_SSEUP_CLASS)
5769 /* Final merger cleanup. */
5770 for (i = 0; i < words; i++)
5772 /* If one class is MEMORY, everything should be passed in
5774 if (classes[i] == X86_64_MEMORY_CLASS)
5777 /* The X86_64_SSEUP_CLASS should be always preceded by
5778 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5779 if (classes[i] == X86_64_SSEUP_CLASS
5780 && classes[i - 1] != X86_64_SSE_CLASS
5781 && classes[i - 1] != X86_64_SSEUP_CLASS)
5783 /* The first one should never be X86_64_SSEUP_CLASS. */
5784 gcc_assert (i != 0);
5785 classes[i] = X86_64_SSE_CLASS;
5788 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5789 everything should be passed in memory. */
5790 if (classes[i] == X86_64_X87UP_CLASS
5791 && (classes[i - 1] != X86_64_X87_CLASS))
5795 /* The first one should never be X86_64_X87UP_CLASS. */
5796 gcc_assert (i != 0);
5797 if (!warned && warn_psabi)
5800 inform (input_location,
5801 "the ABI of passing union with long double"
5802 " has changed in GCC 4.4");
5810 /* Compute alignment needed. We align all types to natural boundaries with
5811 exception of XFmode that is aligned to 64bits. */
5812 if (mode != VOIDmode && mode != BLKmode)
5814 int mode_alignment = GET_MODE_BITSIZE (mode);
5817 mode_alignment = 128;
5818 else if (mode == XCmode)
5819 mode_alignment = 256;
5820 if (COMPLEX_MODE_P (mode))
5821 mode_alignment /= 2;
5822 /* Misaligned fields are always returned in memory. */
5823 if (bit_offset % mode_alignment)
5827 /* for V1xx modes, just use the base mode */
5828 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5829 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5830 mode = GET_MODE_INNER (mode);
5832 /* Classification of atomic types. */
5837 classes[0] = X86_64_SSE_CLASS;
5840 classes[0] = X86_64_SSE_CLASS;
5841 classes[1] = X86_64_SSEUP_CLASS;
5851 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5855 classes[0] = X86_64_INTEGERSI_CLASS;
5858 else if (size <= 64)
5860 classes[0] = X86_64_INTEGER_CLASS;
5863 else if (size <= 64+32)
5865 classes[0] = X86_64_INTEGER_CLASS;
5866 classes[1] = X86_64_INTEGERSI_CLASS;
5869 else if (size <= 64+64)
5871 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5879 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5883 /* OImode shouldn't be used directly. */
5888 if (!(bit_offset % 64))
5889 classes[0] = X86_64_SSESF_CLASS;
5891 classes[0] = X86_64_SSE_CLASS;
5894 classes[0] = X86_64_SSEDF_CLASS;
5897 classes[0] = X86_64_X87_CLASS;
5898 classes[1] = X86_64_X87UP_CLASS;
5901 classes[0] = X86_64_SSE_CLASS;
5902 classes[1] = X86_64_SSEUP_CLASS;
5905 classes[0] = X86_64_SSE_CLASS;
5906 if (!(bit_offset % 64))
5912 if (!warned && warn_psabi)
5915 inform (input_location,
5916 "the ABI of passing structure with complex float"
5917 " member has changed in GCC 4.4");
5919 classes[1] = X86_64_SSESF_CLASS;
5923 classes[0] = X86_64_SSEDF_CLASS;
5924 classes[1] = X86_64_SSEDF_CLASS;
5927 classes[0] = X86_64_COMPLEX_X87_CLASS;
5930 /* This modes is larger than 16 bytes. */
5938 classes[0] = X86_64_SSE_CLASS;
5939 classes[1] = X86_64_SSEUP_CLASS;
5940 classes[2] = X86_64_SSEUP_CLASS;
5941 classes[3] = X86_64_SSEUP_CLASS;
5949 classes[0] = X86_64_SSE_CLASS;
5950 classes[1] = X86_64_SSEUP_CLASS;
5958 classes[0] = X86_64_SSE_CLASS;
5964 gcc_assert (VECTOR_MODE_P (mode));
5969 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5971 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5972 classes[0] = X86_64_INTEGERSI_CLASS;
5974 classes[0] = X86_64_INTEGER_CLASS;
5975 classes[1] = X86_64_INTEGER_CLASS;
5976 return 1 + (bytes > 8);
5980 /* Examine the argument and return set number of register required in each
5981 class. Return 0 iff parameter should be passed in memory. */
5983 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5984 int *int_nregs, int *sse_nregs)
5986 enum x86_64_reg_class regclass[MAX_CLASSES];
5987 int n = classify_argument (mode, type, regclass, 0);
5993 for (n--; n >= 0; n--)
5994 switch (regclass[n])
5996 case X86_64_INTEGER_CLASS:
5997 case X86_64_INTEGERSI_CLASS:
6000 case X86_64_SSE_CLASS:
6001 case X86_64_SSESF_CLASS:
6002 case X86_64_SSEDF_CLASS:
6005 case X86_64_NO_CLASS:
6006 case X86_64_SSEUP_CLASS:
6008 case X86_64_X87_CLASS:
6009 case X86_64_X87UP_CLASS:
6013 case X86_64_COMPLEX_X87_CLASS:
6014 return in_return ? 2 : 0;
6015 case X86_64_MEMORY_CLASS:
6021 /* Construct container for the argument used by GCC interface. See
6022 FUNCTION_ARG for the detailed description. */
6025 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6026 const_tree type, int in_return, int nintregs, int nsseregs,
6027 const int *intreg, int sse_regno)
6029 /* The following variables hold the static issued_error state. */
6030 static bool issued_sse_arg_error;
6031 static bool issued_sse_ret_error;
6032 static bool issued_x87_ret_error;
6034 enum machine_mode tmpmode;
6036 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6037 enum x86_64_reg_class regclass[MAX_CLASSES];
6041 int needed_sseregs, needed_intregs;
6042 rtx exp[MAX_CLASSES];
6045 n = classify_argument (mode, type, regclass, 0);
6048 if (!examine_argument (mode, type, in_return, &needed_intregs,
6051 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6054 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6055 some less clueful developer tries to use floating-point anyway. */
6056 if (needed_sseregs && !TARGET_SSE)
6060 if (!issued_sse_ret_error)
6062 error ("SSE register return with SSE disabled");
6063 issued_sse_ret_error = true;
6066 else if (!issued_sse_arg_error)
6068 error ("SSE register argument with SSE disabled");
6069 issued_sse_arg_error = true;
6074 /* Likewise, error if the ABI requires us to return values in the
6075 x87 registers and the user specified -mno-80387. */
6076 if (!TARGET_80387 && in_return)
6077 for (i = 0; i < n; i++)
6078 if (regclass[i] == X86_64_X87_CLASS
6079 || regclass[i] == X86_64_X87UP_CLASS
6080 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6082 if (!issued_x87_ret_error)
6084 error ("x87 register return with x87 disabled");
6085 issued_x87_ret_error = true;
6090 /* First construct simple cases. Avoid SCmode, since we want to use
6091 single register to pass this type. */
6092 if (n == 1 && mode != SCmode)
6093 switch (regclass[0])
6095 case X86_64_INTEGER_CLASS:
6096 case X86_64_INTEGERSI_CLASS:
6097 return gen_rtx_REG (mode, intreg[0]);
6098 case X86_64_SSE_CLASS:
6099 case X86_64_SSESF_CLASS:
6100 case X86_64_SSEDF_CLASS:
6101 if (mode != BLKmode)
6102 return gen_reg_or_parallel (mode, orig_mode,
6103 SSE_REGNO (sse_regno));
6105 case X86_64_X87_CLASS:
6106 case X86_64_COMPLEX_X87_CLASS:
6107 return gen_rtx_REG (mode, FIRST_STACK_REG);
6108 case X86_64_NO_CLASS:
6109 /* Zero sized array, struct or class. */
6114 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6115 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6116 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6118 && regclass[0] == X86_64_SSE_CLASS
6119 && regclass[1] == X86_64_SSEUP_CLASS
6120 && regclass[2] == X86_64_SSEUP_CLASS
6121 && regclass[3] == X86_64_SSEUP_CLASS
6123 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6126 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6127 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6128 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6129 && regclass[1] == X86_64_INTEGER_CLASS
6130 && (mode == CDImode || mode == TImode || mode == TFmode)
6131 && intreg[0] + 1 == intreg[1])
6132 return gen_rtx_REG (mode, intreg[0]);
6134 /* Otherwise figure out the entries of the PARALLEL. */
6135 for (i = 0; i < n; i++)
6139 switch (regclass[i])
6141 case X86_64_NO_CLASS:
6143 case X86_64_INTEGER_CLASS:
6144 case X86_64_INTEGERSI_CLASS:
6145 /* Merge TImodes on aligned occasions here too. */
6146 if (i * 8 + 8 > bytes)
6147 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6148 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6152 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6153 if (tmpmode == BLKmode)
6155 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6156 gen_rtx_REG (tmpmode, *intreg),
6160 case X86_64_SSESF_CLASS:
6161 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6162 gen_rtx_REG (SFmode,
6163 SSE_REGNO (sse_regno)),
6167 case X86_64_SSEDF_CLASS:
6168 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6169 gen_rtx_REG (DFmode,
6170 SSE_REGNO (sse_regno)),
6174 case X86_64_SSE_CLASS:
6182 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6192 && regclass[1] == X86_64_SSEUP_CLASS
6193 && regclass[2] == X86_64_SSEUP_CLASS
6194 && regclass[3] == X86_64_SSEUP_CLASS);
6201 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6202 gen_rtx_REG (tmpmode,
6203 SSE_REGNO (sse_regno)),
6212 /* Empty aligned struct, union or class. */
6216 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6217 for (i = 0; i < nexps; i++)
6218 XVECEXP (ret, 0, i) = exp [i];
6222 /* Update the data in CUM to advance over an argument of mode MODE
6223 and data type TYPE. (TYPE is null for libcalls where that information
6224 may not be available.) */
6227 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6228 const_tree type, HOST_WIDE_INT bytes,
6229 HOST_WIDE_INT words)
6245 cum->words += words;
6246 cum->nregs -= words;
6247 cum->regno += words;
6249 if (cum->nregs <= 0)
6257 /* OImode shouldn't be used directly. */
6261 if (cum->float_in_sse < 2)
6264 if (cum->float_in_sse < 1)
6281 if (!type || !AGGREGATE_TYPE_P (type))
6283 cum->sse_words += words;
6284 cum->sse_nregs -= 1;
6285 cum->sse_regno += 1;
6286 if (cum->sse_nregs <= 0)
6300 if (!type || !AGGREGATE_TYPE_P (type))
6302 cum->mmx_words += words;
6303 cum->mmx_nregs -= 1;
6304 cum->mmx_regno += 1;
6305 if (cum->mmx_nregs <= 0)
6316 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6317 const_tree type, HOST_WIDE_INT words, bool named)
6319 int int_nregs, sse_nregs;
6321 /* Unnamed 256bit vector mode parameters are passed on stack. */
6322 if (!named && VALID_AVX256_REG_MODE (mode))
6325 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6326 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6328 cum->nregs -= int_nregs;
6329 cum->sse_nregs -= sse_nregs;
6330 cum->regno += int_nregs;
6331 cum->sse_regno += sse_nregs;
6335 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6336 cum->words = (cum->words + align - 1) & ~(align - 1);
6337 cum->words += words;
6342 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6343 HOST_WIDE_INT words)
6345 /* Otherwise, this should be passed indirect. */
6346 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6348 cum->words += words;
6356 /* Update the data in CUM to advance over an argument of mode MODE and
6357 data type TYPE. (TYPE is null for libcalls where that information
6358 may not be available.) */
6361 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6362 const_tree type, bool named)
6364 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6365 HOST_WIDE_INT bytes, words;
6367 if (mode == BLKmode)
6368 bytes = int_size_in_bytes (type);
6370 bytes = GET_MODE_SIZE (mode);
6371 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6374 mode = type_natural_mode (type, NULL);
6376 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6377 function_arg_advance_ms_64 (cum, bytes, words);
6378 else if (TARGET_64BIT)
6379 function_arg_advance_64 (cum, mode, type, words, named);
6381 function_arg_advance_32 (cum, mode, type, bytes, words);
6384 /* Define where to put the arguments to a function.
6385 Value is zero to push the argument on the stack,
6386 or a hard register in which to store the argument.
6388 MODE is the argument's machine mode.
6389 TYPE is the data type of the argument (as a tree).
6390 This is null for libcalls where that information may
6392 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6393 the preceding args and about the function being called.
6394 NAMED is nonzero if this argument is a named parameter
6395 (otherwise it is an extra parameter matching an ellipsis). */
6398 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6399 enum machine_mode orig_mode, const_tree type,
6400 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6402 static bool warnedsse, warnedmmx;
6404 /* Avoid the AL settings for the Unix64 ABI. */
6405 if (mode == VOIDmode)
6421 if (words <= cum->nregs)
6423 int regno = cum->regno;
6425 /* Fastcall allocates the first two DWORD (SImode) or
6426 smaller arguments to ECX and EDX if it isn't an
6432 || (type && AGGREGATE_TYPE_P (type)))
6435 /* ECX not EAX is the first allocated register. */
6436 if (regno == AX_REG)
6439 return gen_rtx_REG (mode, regno);
6444 if (cum->float_in_sse < 2)
6447 if (cum->float_in_sse < 1)
6451 /* In 32bit, we pass TImode in xmm registers. */
6458 if (!type || !AGGREGATE_TYPE_P (type))
6460 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6463 warning (0, "SSE vector argument without SSE enabled "
6467 return gen_reg_or_parallel (mode, orig_mode,
6468 cum->sse_regno + FIRST_SSE_REG);
6473 /* OImode shouldn't be used directly. */
6482 if (!type || !AGGREGATE_TYPE_P (type))
6485 return gen_reg_or_parallel (mode, orig_mode,
6486 cum->sse_regno + FIRST_SSE_REG);
6496 if (!type || !AGGREGATE_TYPE_P (type))
6498 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6501 warning (0, "MMX vector argument without MMX enabled "
6505 return gen_reg_or_parallel (mode, orig_mode,
6506 cum->mmx_regno + FIRST_MMX_REG);
6515 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6516 enum machine_mode orig_mode, const_tree type, bool named)
6518 /* Handle a hidden AL argument containing number of registers
6519 for varargs x86-64 functions. */
6520 if (mode == VOIDmode)
6521 return GEN_INT (cum->maybe_vaarg
6522 ? (cum->sse_nregs < 0
6523 ? X86_64_SSE_REGPARM_MAX
6538 /* Unnamed 256bit vector mode parameters are passed on stack. */
6544 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6546 &x86_64_int_parameter_registers [cum->regno],
6551 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6552 enum machine_mode orig_mode, bool named,
6553 HOST_WIDE_INT bytes)
6557 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6558 We use value of -2 to specify that current function call is MSABI. */
6559 if (mode == VOIDmode)
6560 return GEN_INT (-2);
6562 /* If we've run out of registers, it goes on the stack. */
6563 if (cum->nregs == 0)
6566 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6568 /* Only floating point modes are passed in anything but integer regs. */
6569 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6572 regno = cum->regno + FIRST_SSE_REG;
6577 /* Unnamed floating parameters are passed in both the
6578 SSE and integer registers. */
6579 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6580 t2 = gen_rtx_REG (mode, regno);
6581 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6582 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6583 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6586 /* Handle aggregated types passed in register. */
6587 if (orig_mode == BLKmode)
6589 if (bytes > 0 && bytes <= 8)
6590 mode = (bytes > 4 ? DImode : SImode);
6591 if (mode == BLKmode)
6595 return gen_reg_or_parallel (mode, orig_mode, regno);
6598 /* Return where to put the arguments to a function.
6599 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6601 MODE is the argument's machine mode. TYPE is the data type of the
6602 argument. It is null for libcalls where that information may not be
6603 available. CUM gives information about the preceding args and about
6604 the function being called. NAMED is nonzero if this argument is a
6605 named parameter (otherwise it is an extra parameter matching an
6609 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
6610 const_tree type, bool named)
6612 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6613 enum machine_mode mode = omode;
6614 HOST_WIDE_INT bytes, words;
6617 if (mode == BLKmode)
6618 bytes = int_size_in_bytes (type);
6620 bytes = GET_MODE_SIZE (mode);
6621 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6623 /* To simplify the code below, represent vector types with a vector mode
6624 even if MMX/SSE are not active. */
6625 if (type && TREE_CODE (type) == VECTOR_TYPE)
6626 mode = type_natural_mode (type, cum);
6628 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6629 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6630 else if (TARGET_64BIT)
6631 arg = function_arg_64 (cum, mode, omode, type, named);
6633 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6635 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6637 /* This argument uses 256bit AVX modes. */
6639 cfun->machine->callee_pass_avx256_p = true;
6641 cfun->machine->caller_pass_avx256_p = true;
6647 /* A C expression that indicates when an argument must be passed by
6648 reference. If nonzero for an argument, a copy of that argument is
6649 made in memory and a pointer to the argument is passed instead of
6650 the argument itself. The pointer is passed in whatever way is
6651 appropriate for passing a pointer to that type. */
6654 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
6655 enum machine_mode mode ATTRIBUTE_UNUSED,
6656 const_tree type, bool named ATTRIBUTE_UNUSED)
6658 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6660 /* See Windows x64 Software Convention. */
6661 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6663 int msize = (int) GET_MODE_SIZE (mode);
6666 /* Arrays are passed by reference. */
6667 if (TREE_CODE (type) == ARRAY_TYPE)
6670 if (AGGREGATE_TYPE_P (type))
6672 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6673 are passed by reference. */
6674 msize = int_size_in_bytes (type);
6678 /* __m128 is passed by reference. */
6680 case 1: case 2: case 4: case 8:
6686 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6692 /* Return true when TYPE should be 128bit aligned for 32bit argument
6693 passing ABI. XXX: This function is obsolete and is only used for
6694 checking psABI compatibility with previous versions of GCC. */
6697 ix86_compat_aligned_value_p (const_tree type)
6699 enum machine_mode mode = TYPE_MODE (type);
6700 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6704 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6706 if (TYPE_ALIGN (type) < 128)
6709 if (AGGREGATE_TYPE_P (type))
6711 /* Walk the aggregates recursively. */
6712 switch (TREE_CODE (type))
6716 case QUAL_UNION_TYPE:
6720 /* Walk all the structure fields. */
6721 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6723 if (TREE_CODE (field) == FIELD_DECL
6724 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6731 /* Just for use if some languages passes arrays by value. */
6732 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
6743 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6744 XXX: This function is obsolete and is only used for checking psABI
6745 compatibility with previous versions of GCC. */
6748 ix86_compat_function_arg_boundary (enum machine_mode mode,
6749 const_tree type, unsigned int align)
6751 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6752 natural boundaries. */
6753 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6755 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6756 make an exception for SSE modes since these require 128bit
6759 The handling here differs from field_alignment. ICC aligns MMX
6760 arguments to 4 byte boundaries, while structure fields are aligned
6761 to 8 byte boundaries. */
6764 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6765 align = PARM_BOUNDARY;
6769 if (!ix86_compat_aligned_value_p (type))
6770 align = PARM_BOUNDARY;
6773 if (align > BIGGEST_ALIGNMENT)
6774 align = BIGGEST_ALIGNMENT;
6778 /* Return true when TYPE should be 128bit aligned for 32bit argument
6782 ix86_contains_aligned_value_p (const_tree type)
6784 enum machine_mode mode = TYPE_MODE (type);
6786 if (mode == XFmode || mode == XCmode)
6789 if (TYPE_ALIGN (type) < 128)
6792 if (AGGREGATE_TYPE_P (type))
6794 /* Walk the aggregates recursively. */
6795 switch (TREE_CODE (type))
6799 case QUAL_UNION_TYPE:
6803 /* Walk all the structure fields. */
6804 for (field = TYPE_FIELDS (type);
6806 field = DECL_CHAIN (field))
6808 if (TREE_CODE (field) == FIELD_DECL
6809 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
6816 /* Just for use if some languages passes arrays by value. */
6817 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
6826 return TYPE_ALIGN (type) >= 128;
6831 /* Gives the alignment boundary, in bits, of an argument with the
6832 specified mode and type. */
6835 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6840 /* Since the main variant type is used for call, we convert it to
6841 the main variant type. */
6842 type = TYPE_MAIN_VARIANT (type);
6843 align = TYPE_ALIGN (type);
6846 align = GET_MODE_ALIGNMENT (mode);
6847 if (align < PARM_BOUNDARY)
6848 align = PARM_BOUNDARY;
6852 unsigned int saved_align = align;
6856 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
6859 if (mode == XFmode || mode == XCmode)
6860 align = PARM_BOUNDARY;
6862 else if (!ix86_contains_aligned_value_p (type))
6863 align = PARM_BOUNDARY;
6866 align = PARM_BOUNDARY;
6871 && align != ix86_compat_function_arg_boundary (mode, type,
6875 inform (input_location,
6876 "The ABI for passing parameters with %d-byte"
6877 " alignment has changed in GCC 4.6",
6878 align / BITS_PER_UNIT);
6885 /* Return true if N is a possible register number of function value. */
6888 ix86_function_value_regno_p (const unsigned int regno)
6895 case FIRST_FLOAT_REG:
6896 /* TODO: The function should depend on current function ABI but
6897 builtins.c would need updating then. Therefore we use the
6899 if (TARGET_64BIT && ix86_abi == MS_ABI)
6901 return TARGET_FLOAT_RETURNS_IN_80387;
6907 if (TARGET_MACHO || TARGET_64BIT)
6915 /* Define how to find the value returned by a function.
6916 VALTYPE is the data type of the value (as a tree).
6917 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6918 otherwise, FUNC is 0. */
6921 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6922 const_tree fntype, const_tree fn)
6926 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6927 we normally prevent this case when mmx is not available. However
6928 some ABIs may require the result to be returned like DImode. */
6929 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6930 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6932 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6933 we prevent this case when sse is not available. However some ABIs
6934 may require the result to be returned like integer TImode. */
6935 else if (mode == TImode
6936 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6937 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6939 /* 32-byte vector modes in %ymm0. */
6940 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6941 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6943 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6944 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6945 regno = FIRST_FLOAT_REG;
6947 /* Most things go in %eax. */
6950 /* Override FP return register with %xmm0 for local functions when
6951 SSE math is enabled or for functions with sseregparm attribute. */
6952 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6954 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6955 if ((sse_level >= 1 && mode == SFmode)
6956 || (sse_level == 2 && mode == DFmode))
6957 regno = FIRST_SSE_REG;
6960 /* OImode shouldn't be used directly. */
6961 gcc_assert (mode != OImode);
6963 return gen_rtx_REG (orig_mode, regno);
6967 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6972 /* Handle libcalls, which don't provide a type node. */
6973 if (valtype == NULL)
6985 return gen_rtx_REG (mode, FIRST_SSE_REG);
6988 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6992 return gen_rtx_REG (mode, AX_REG);
6996 ret = construct_container (mode, orig_mode, valtype, 1,
6997 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6998 x86_64_int_return_registers, 0);
7000 /* For zero sized structures, construct_container returns NULL, but we
7001 need to keep rest of compiler happy by returning meaningful value. */
7003 ret = gen_rtx_REG (orig_mode, AX_REG);
7009 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7011 unsigned int regno = AX_REG;
7015 switch (GET_MODE_SIZE (mode))
7018 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7019 && !COMPLEX_MODE_P (mode))
7020 regno = FIRST_SSE_REG;
7024 if (mode == SFmode || mode == DFmode)
7025 regno = FIRST_SSE_REG;
7031 return gen_rtx_REG (orig_mode, regno);
7035 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7036 enum machine_mode orig_mode, enum machine_mode mode)
7038 const_tree fn, fntype;
7041 if (fntype_or_decl && DECL_P (fntype_or_decl))
7042 fn = fntype_or_decl;
7043 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7045 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7046 return function_value_ms_64 (orig_mode, mode);
7047 else if (TARGET_64BIT)
7048 return function_value_64 (orig_mode, mode, valtype);
7050 return function_value_32 (orig_mode, mode, fntype, fn);
7054 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7055 bool outgoing ATTRIBUTE_UNUSED)
7057 enum machine_mode mode, orig_mode;
7059 orig_mode = TYPE_MODE (valtype);
7060 mode = type_natural_mode (valtype, NULL);
7061 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7065 ix86_libcall_value (enum machine_mode mode)
7067 return ix86_function_value_1 (NULL, NULL, mode, mode);
7070 /* Return true iff type is returned in memory. */
7072 static bool ATTRIBUTE_UNUSED
7073 return_in_memory_32 (const_tree type, enum machine_mode mode)
7077 if (mode == BLKmode)
7080 size = int_size_in_bytes (type);
7082 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7085 if (VECTOR_MODE_P (mode) || mode == TImode)
7087 /* User-created vectors small enough to fit in EAX. */
7091 /* MMX/3dNow values are returned in MM0,
7092 except when it doesn't exits or the ABI prescribes otherwise. */
7094 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7096 /* SSE values are returned in XMM0, except when it doesn't exist. */
7100 /* AVX values are returned in YMM0, except when it doesn't exist. */
7111 /* OImode shouldn't be used directly. */
7112 gcc_assert (mode != OImode);
7117 static bool ATTRIBUTE_UNUSED
7118 return_in_memory_64 (const_tree type, enum machine_mode mode)
7120 int needed_intregs, needed_sseregs;
7121 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7124 static bool ATTRIBUTE_UNUSED
7125 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7127 HOST_WIDE_INT size = int_size_in_bytes (type);
7129 /* __m128 is returned in xmm0. */
7130 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7131 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7134 /* Otherwise, the size must be exactly in [1248]. */
7135 return size != 1 && size != 2 && size != 4 && size != 8;
7139 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7141 #ifdef SUBTARGET_RETURN_IN_MEMORY
7142 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7144 const enum machine_mode mode = type_natural_mode (type, NULL);
7148 if (ix86_function_type_abi (fntype) == MS_ABI)
7149 return return_in_memory_ms_64 (type, mode);
7151 return return_in_memory_64 (type, mode);
7154 return return_in_memory_32 (type, mode);
7158 /* When returning SSE vector types, we have a choice of either
7159 (1) being abi incompatible with a -march switch, or
7160 (2) generating an error.
7161 Given no good solution, I think the safest thing is one warning.
7162 The user won't be able to use -Werror, but....
7164 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7165 called in response to actually generating a caller or callee that
7166 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7167 via aggregate_value_p for general type probing from tree-ssa. */
7170 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7172 static bool warnedsse, warnedmmx;
7174 if (!TARGET_64BIT && type)
7176 /* Look at the return type of the function, not the function type. */
7177 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7179 if (!TARGET_SSE && !warnedsse)
7182 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7185 warning (0, "SSE vector return without SSE enabled "
7190 if (!TARGET_MMX && !warnedmmx)
7192 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7195 warning (0, "MMX vector return without MMX enabled "
7205 /* Create the va_list data type. */
7207 /* Returns the calling convention specific va_list date type.
7208 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7211 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7213 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7215 /* For i386 we use plain pointer to argument area. */
7216 if (!TARGET_64BIT || abi == MS_ABI)
7217 return build_pointer_type (char_type_node);
7219 record = lang_hooks.types.make_type (RECORD_TYPE);
7220 type_decl = build_decl (BUILTINS_LOCATION,
7221 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7223 f_gpr = build_decl (BUILTINS_LOCATION,
7224 FIELD_DECL, get_identifier ("gp_offset"),
7225 unsigned_type_node);
7226 f_fpr = build_decl (BUILTINS_LOCATION,
7227 FIELD_DECL, get_identifier ("fp_offset"),
7228 unsigned_type_node);
7229 f_ovf = build_decl (BUILTINS_LOCATION,
7230 FIELD_DECL, get_identifier ("overflow_arg_area"),
7232 f_sav = build_decl (BUILTINS_LOCATION,
7233 FIELD_DECL, get_identifier ("reg_save_area"),
7236 va_list_gpr_counter_field = f_gpr;
7237 va_list_fpr_counter_field = f_fpr;
7239 DECL_FIELD_CONTEXT (f_gpr) = record;
7240 DECL_FIELD_CONTEXT (f_fpr) = record;
7241 DECL_FIELD_CONTEXT (f_ovf) = record;
7242 DECL_FIELD_CONTEXT (f_sav) = record;
7244 TYPE_STUB_DECL (record) = type_decl;
7245 TYPE_NAME (record) = type_decl;
7246 TYPE_FIELDS (record) = f_gpr;
7247 DECL_CHAIN (f_gpr) = f_fpr;
7248 DECL_CHAIN (f_fpr) = f_ovf;
7249 DECL_CHAIN (f_ovf) = f_sav;
7251 layout_type (record);
7253 /* The correct type is an array type of one element. */
7254 return build_array_type (record, build_index_type (size_zero_node));
7257 /* Setup the builtin va_list data type and for 64-bit the additional
7258 calling convention specific va_list data types. */
7261 ix86_build_builtin_va_list (void)
7263 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7265 /* Initialize abi specific va_list builtin types. */
7269 if (ix86_abi == MS_ABI)
7271 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7272 if (TREE_CODE (t) != RECORD_TYPE)
7273 t = build_variant_type_copy (t);
7274 sysv_va_list_type_node = t;
7279 if (TREE_CODE (t) != RECORD_TYPE)
7280 t = build_variant_type_copy (t);
7281 sysv_va_list_type_node = t;
7283 if (ix86_abi != MS_ABI)
7285 t = ix86_build_builtin_va_list_abi (MS_ABI);
7286 if (TREE_CODE (t) != RECORD_TYPE)
7287 t = build_variant_type_copy (t);
7288 ms_va_list_type_node = t;
7293 if (TREE_CODE (t) != RECORD_TYPE)
7294 t = build_variant_type_copy (t);
7295 ms_va_list_type_node = t;
7302 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7305 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7311 /* GPR size of varargs save area. */
7312 if (cfun->va_list_gpr_size)
7313 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7315 ix86_varargs_gpr_size = 0;
7317 /* FPR size of varargs save area. We don't need it if we don't pass
7318 anything in SSE registers. */
7319 if (TARGET_SSE && cfun->va_list_fpr_size)
7320 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7322 ix86_varargs_fpr_size = 0;
7324 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7327 save_area = frame_pointer_rtx;
7328 set = get_varargs_alias_set ();
7330 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7331 if (max > X86_64_REGPARM_MAX)
7332 max = X86_64_REGPARM_MAX;
7334 for (i = cum->regno; i < max; i++)
7336 mem = gen_rtx_MEM (Pmode,
7337 plus_constant (save_area, i * UNITS_PER_WORD));
7338 MEM_NOTRAP_P (mem) = 1;
7339 set_mem_alias_set (mem, set);
7340 emit_move_insn (mem, gen_rtx_REG (Pmode,
7341 x86_64_int_parameter_registers[i]));
7344 if (ix86_varargs_fpr_size)
7346 enum machine_mode smode;
7349 /* Now emit code to save SSE registers. The AX parameter contains number
7350 of SSE parameter registers used to call this function, though all we
7351 actually check here is the zero/non-zero status. */
7353 label = gen_label_rtx ();
7354 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7355 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7358 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7359 we used movdqa (i.e. TImode) instead? Perhaps even better would
7360 be if we could determine the real mode of the data, via a hook
7361 into pass_stdarg. Ignore all that for now. */
7363 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7364 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7366 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7367 if (max > X86_64_SSE_REGPARM_MAX)
7368 max = X86_64_SSE_REGPARM_MAX;
7370 for (i = cum->sse_regno; i < max; ++i)
7372 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7373 mem = gen_rtx_MEM (smode, mem);
7374 MEM_NOTRAP_P (mem) = 1;
7375 set_mem_alias_set (mem, set);
7376 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7378 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7386 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7388 alias_set_type set = get_varargs_alias_set ();
7391 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7395 mem = gen_rtx_MEM (Pmode,
7396 plus_constant (virtual_incoming_args_rtx,
7397 i * UNITS_PER_WORD));
7398 MEM_NOTRAP_P (mem) = 1;
7399 set_mem_alias_set (mem, set);
7401 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7402 emit_move_insn (mem, reg);
7407 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7408 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7411 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7412 CUMULATIVE_ARGS next_cum;
7415 /* This argument doesn't appear to be used anymore. Which is good,
7416 because the old code here didn't suppress rtl generation. */
7417 gcc_assert (!no_rtl);
7422 fntype = TREE_TYPE (current_function_decl);
7424 /* For varargs, we do not want to skip the dummy va_dcl argument.
7425 For stdargs, we do want to skip the last named argument. */
7427 if (stdarg_p (fntype))
7428 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7431 if (cum->call_abi == MS_ABI)
7432 setup_incoming_varargs_ms_64 (&next_cum);
7434 setup_incoming_varargs_64 (&next_cum);
7437 /* Checks if TYPE is of kind va_list char *. */
7440 is_va_list_char_pointer (tree type)
7444 /* For 32-bit it is always true. */
7447 canonic = ix86_canonical_va_list_type (type);
7448 return (canonic == ms_va_list_type_node
7449 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7452 /* Implement va_start. */
7455 ix86_va_start (tree valist, rtx nextarg)
7457 HOST_WIDE_INT words, n_gpr, n_fpr;
7458 tree f_gpr, f_fpr, f_ovf, f_sav;
7459 tree gpr, fpr, ovf, sav, t;
7463 if (flag_split_stack
7464 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7466 unsigned int scratch_regno;
7468 /* When we are splitting the stack, we can't refer to the stack
7469 arguments using internal_arg_pointer, because they may be on
7470 the old stack. The split stack prologue will arrange to
7471 leave a pointer to the old stack arguments in a scratch
7472 register, which we here copy to a pseudo-register. The split
7473 stack prologue can't set the pseudo-register directly because
7474 it (the prologue) runs before any registers have been saved. */
7476 scratch_regno = split_stack_prologue_scratch_regno ();
7477 if (scratch_regno != INVALID_REGNUM)
7481 reg = gen_reg_rtx (Pmode);
7482 cfun->machine->split_stack_varargs_pointer = reg;
7485 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7489 push_topmost_sequence ();
7490 emit_insn_after (seq, entry_of_function ());
7491 pop_topmost_sequence ();
7495 /* Only 64bit target needs something special. */
7496 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7498 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7499 std_expand_builtin_va_start (valist, nextarg);
7504 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7505 next = expand_binop (ptr_mode, add_optab,
7506 cfun->machine->split_stack_varargs_pointer,
7507 crtl->args.arg_offset_rtx,
7508 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7509 convert_move (va_r, next, 0);
7514 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7515 f_fpr = DECL_CHAIN (f_gpr);
7516 f_ovf = DECL_CHAIN (f_fpr);
7517 f_sav = DECL_CHAIN (f_ovf);
7519 valist = build_simple_mem_ref (valist);
7520 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7521 /* The following should be folded into the MEM_REF offset. */
7522 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7524 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7526 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7528 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7531 /* Count number of gp and fp argument registers used. */
7532 words = crtl->args.info.words;
7533 n_gpr = crtl->args.info.regno;
7534 n_fpr = crtl->args.info.sse_regno;
7536 if (cfun->va_list_gpr_size)
7538 type = TREE_TYPE (gpr);
7539 t = build2 (MODIFY_EXPR, type,
7540 gpr, build_int_cst (type, n_gpr * 8));
7541 TREE_SIDE_EFFECTS (t) = 1;
7542 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7545 if (TARGET_SSE && cfun->va_list_fpr_size)
7547 type = TREE_TYPE (fpr);
7548 t = build2 (MODIFY_EXPR, type, fpr,
7549 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7550 TREE_SIDE_EFFECTS (t) = 1;
7551 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7554 /* Find the overflow area. */
7555 type = TREE_TYPE (ovf);
7556 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7557 ovf_rtx = crtl->args.internal_arg_pointer;
7559 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7560 t = make_tree (type, ovf_rtx);
7562 t = build2 (POINTER_PLUS_EXPR, type, t,
7563 size_int (words * UNITS_PER_WORD));
7564 t = build2 (MODIFY_EXPR, type, ovf, t);
7565 TREE_SIDE_EFFECTS (t) = 1;
7566 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7568 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7570 /* Find the register save area.
7571 Prologue of the function save it right above stack frame. */
7572 type = TREE_TYPE (sav);
7573 t = make_tree (type, frame_pointer_rtx);
7574 if (!ix86_varargs_gpr_size)
7575 t = build2 (POINTER_PLUS_EXPR, type, t,
7576 size_int (-8 * X86_64_REGPARM_MAX));
7577 t = build2 (MODIFY_EXPR, type, sav, t);
7578 TREE_SIDE_EFFECTS (t) = 1;
7579 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7583 /* Implement va_arg. */
7586 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7589 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7590 tree f_gpr, f_fpr, f_ovf, f_sav;
7591 tree gpr, fpr, ovf, sav, t;
7593 tree lab_false, lab_over = NULL_TREE;
7598 enum machine_mode nat_mode;
7599 unsigned int arg_boundary;
7601 /* Only 64bit target needs something special. */
7602 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7603 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7605 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7606 f_fpr = DECL_CHAIN (f_gpr);
7607 f_ovf = DECL_CHAIN (f_fpr);
7608 f_sav = DECL_CHAIN (f_ovf);
7610 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7611 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7612 valist = build_va_arg_indirect_ref (valist);
7613 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7614 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7615 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7617 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7619 type = build_pointer_type (type);
7620 size = int_size_in_bytes (type);
7621 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7623 nat_mode = type_natural_mode (type, NULL);
7632 /* Unnamed 256bit vector mode parameters are passed on stack. */
7633 if (!TARGET_64BIT_MS_ABI)
7640 container = construct_container (nat_mode, TYPE_MODE (type),
7641 type, 0, X86_64_REGPARM_MAX,
7642 X86_64_SSE_REGPARM_MAX, intreg,
7647 /* Pull the value out of the saved registers. */
7649 addr = create_tmp_var (ptr_type_node, "addr");
7653 int needed_intregs, needed_sseregs;
7655 tree int_addr, sse_addr;
7657 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7658 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7660 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7662 need_temp = (!REG_P (container)
7663 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7664 || TYPE_ALIGN (type) > 128));
7666 /* In case we are passing structure, verify that it is consecutive block
7667 on the register save area. If not we need to do moves. */
7668 if (!need_temp && !REG_P (container))
7670 /* Verify that all registers are strictly consecutive */
7671 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7675 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7677 rtx slot = XVECEXP (container, 0, i);
7678 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7679 || INTVAL (XEXP (slot, 1)) != i * 16)
7687 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7689 rtx slot = XVECEXP (container, 0, i);
7690 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7691 || INTVAL (XEXP (slot, 1)) != i * 8)
7703 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7704 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7707 /* First ensure that we fit completely in registers. */
7710 t = build_int_cst (TREE_TYPE (gpr),
7711 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7712 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7713 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7714 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7715 gimplify_and_add (t, pre_p);
7719 t = build_int_cst (TREE_TYPE (fpr),
7720 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7721 + X86_64_REGPARM_MAX * 8);
7722 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7723 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7724 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7725 gimplify_and_add (t, pre_p);
7728 /* Compute index to start of area used for integer regs. */
7731 /* int_addr = gpr + sav; */
7732 t = fold_convert (sizetype, gpr);
7733 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7734 gimplify_assign (int_addr, t, pre_p);
7738 /* sse_addr = fpr + sav; */
7739 t = fold_convert (sizetype, fpr);
7740 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7741 gimplify_assign (sse_addr, t, pre_p);
7745 int i, prev_size = 0;
7746 tree temp = create_tmp_var (type, "va_arg_tmp");
7749 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7750 gimplify_assign (addr, t, pre_p);
7752 for (i = 0; i < XVECLEN (container, 0); i++)
7754 rtx slot = XVECEXP (container, 0, i);
7755 rtx reg = XEXP (slot, 0);
7756 enum machine_mode mode = GET_MODE (reg);
7762 tree dest_addr, dest;
7763 int cur_size = GET_MODE_SIZE (mode);
7765 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7766 prev_size = INTVAL (XEXP (slot, 1));
7767 if (prev_size + cur_size > size)
7769 cur_size = size - prev_size;
7770 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7771 if (mode == BLKmode)
7774 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7775 if (mode == GET_MODE (reg))
7776 addr_type = build_pointer_type (piece_type);
7778 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7780 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7783 if (SSE_REGNO_P (REGNO (reg)))
7785 src_addr = sse_addr;
7786 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7790 src_addr = int_addr;
7791 src_offset = REGNO (reg) * 8;
7793 src_addr = fold_convert (addr_type, src_addr);
7794 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7795 size_int (src_offset));
7797 dest_addr = fold_convert (daddr_type, addr);
7798 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7799 size_int (prev_size));
7800 if (cur_size == GET_MODE_SIZE (mode))
7802 src = build_va_arg_indirect_ref (src_addr);
7803 dest = build_va_arg_indirect_ref (dest_addr);
7805 gimplify_assign (dest, src, pre_p);
7810 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7811 3, dest_addr, src_addr,
7812 size_int (cur_size));
7813 gimplify_and_add (copy, pre_p);
7815 prev_size += cur_size;
7821 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7822 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7823 gimplify_assign (gpr, t, pre_p);
7828 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7829 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7830 gimplify_assign (fpr, t, pre_p);
7833 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7835 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7838 /* ... otherwise out of the overflow area. */
7840 /* When we align parameter on stack for caller, if the parameter
7841 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7842 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7843 here with caller. */
7844 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
7845 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7846 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7848 /* Care for on-stack alignment if needed. */
7849 if (arg_boundary <= 64 || size == 0)
7853 HOST_WIDE_INT align = arg_boundary / 8;
7854 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7855 size_int (align - 1));
7856 t = fold_convert (sizetype, t);
7857 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7859 t = fold_convert (TREE_TYPE (ovf), t);
7862 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7863 gimplify_assign (addr, t, pre_p);
7865 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7866 size_int (rsize * UNITS_PER_WORD));
7867 gimplify_assign (unshare_expr (ovf), t, pre_p);
7870 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7872 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7873 addr = fold_convert (ptrtype, addr);
7876 addr = build_va_arg_indirect_ref (addr);
7877 return build_va_arg_indirect_ref (addr);
7880 /* Return true if OPNUM's MEM should be matched
7881 in movabs* patterns. */
7884 ix86_check_movabs (rtx insn, int opnum)
7888 set = PATTERN (insn);
7889 if (GET_CODE (set) == PARALLEL)
7890 set = XVECEXP (set, 0, 0);
7891 gcc_assert (GET_CODE (set) == SET);
7892 mem = XEXP (set, opnum);
7893 while (GET_CODE (mem) == SUBREG)
7894 mem = SUBREG_REG (mem);
7895 gcc_assert (MEM_P (mem));
7896 return volatile_ok || !MEM_VOLATILE_P (mem);
7899 /* Initialize the table of extra 80387 mathematical constants. */
7902 init_ext_80387_constants (void)
7904 static const char * cst[5] =
7906 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7907 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7908 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7909 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7910 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7914 for (i = 0; i < 5; i++)
7916 real_from_string (&ext_80387_constants_table[i], cst[i]);
7917 /* Ensure each constant is rounded to XFmode precision. */
7918 real_convert (&ext_80387_constants_table[i],
7919 XFmode, &ext_80387_constants_table[i]);
7922 ext_80387_constants_init = 1;
7925 /* Return non-zero if the constant is something that
7926 can be loaded with a special instruction. */
7929 standard_80387_constant_p (rtx x)
7931 enum machine_mode mode = GET_MODE (x);
7935 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7938 if (x == CONST0_RTX (mode))
7940 if (x == CONST1_RTX (mode))
7943 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7945 /* For XFmode constants, try to find a special 80387 instruction when
7946 optimizing for size or on those CPUs that benefit from them. */
7948 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7952 if (! ext_80387_constants_init)
7953 init_ext_80387_constants ();
7955 for (i = 0; i < 5; i++)
7956 if (real_identical (&r, &ext_80387_constants_table[i]))
7960 /* Load of the constant -0.0 or -1.0 will be split as
7961 fldz;fchs or fld1;fchs sequence. */
7962 if (real_isnegzero (&r))
7964 if (real_identical (&r, &dconstm1))
7970 /* Return the opcode of the special instruction to be used to load
7974 standard_80387_constant_opcode (rtx x)
7976 switch (standard_80387_constant_p (x))
8000 /* Return the CONST_DOUBLE representing the 80387 constant that is
8001 loaded by the specified special instruction. The argument IDX
8002 matches the return value from standard_80387_constant_p. */
8005 standard_80387_constant_rtx (int idx)
8009 if (! ext_80387_constants_init)
8010 init_ext_80387_constants ();
8026 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8030 /* Return 1 if X is all 0s and 2 if x is all 1s
8031 in supported SSE vector mode. */
8034 standard_sse_constant_p (rtx x)
8036 enum machine_mode mode = GET_MODE (x);
8038 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8040 if (vector_all_ones_operand (x, mode))
8056 /* Return the opcode of the special instruction to be used to load
8060 standard_sse_constant_opcode (rtx insn, rtx x)
8062 switch (standard_sse_constant_p (x))
8065 switch (get_attr_mode (insn))
8068 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8069 return "%vpxor\t%0, %d0";
8071 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8072 return "%vxorpd\t%0, %d0";
8074 return "%vxorps\t%0, %d0";
8077 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8078 return "vpxor\t%x0, %x0, %x0";
8080 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8081 return "vxorpd\t%x0, %x0, %x0";
8083 return "vxorps\t%x0, %x0, %x0";
8090 return "%vpcmpeqd\t%0, %d0";
8097 /* Returns true if OP contains a symbol reference */
8100 symbolic_reference_mentioned_p (rtx op)
8105 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8108 fmt = GET_RTX_FORMAT (GET_CODE (op));
8109 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8115 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8116 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8120 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8127 /* Return true if it is appropriate to emit `ret' instructions in the
8128 body of a function. Do this only if the epilogue is simple, needing a
8129 couple of insns. Prior to reloading, we can't tell how many registers
8130 must be saved, so return false then. Return false if there is no frame
8131 marker to de-allocate. */
8134 ix86_can_use_return_insn_p (void)
8136 struct ix86_frame frame;
8138 if (! reload_completed || frame_pointer_needed)
8141 /* Don't allow more than 32k pop, since that's all we can do
8142 with one instruction. */
8143 if (crtl->args.pops_args && crtl->args.size >= 32768)
8146 ix86_compute_frame_layout (&frame);
8147 return (frame.stack_pointer_offset == UNITS_PER_WORD
8148 && (frame.nregs + frame.nsseregs) == 0);
8151 /* Value should be nonzero if functions must have frame pointers.
8152 Zero means the frame pointer need not be set up (and parms may
8153 be accessed via the stack pointer) in functions that seem suitable. */
8156 ix86_frame_pointer_required (void)
8158 /* If we accessed previous frames, then the generated code expects
8159 to be able to access the saved ebp value in our frame. */
8160 if (cfun->machine->accesses_prev_frame)
8163 /* Several x86 os'es need a frame pointer for other reasons,
8164 usually pertaining to setjmp. */
8165 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8168 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8169 turns off the frame pointer by default. Turn it back on now if
8170 we've not got a leaf function. */
8171 if (TARGET_OMIT_LEAF_FRAME_POINTER
8172 && (!current_function_is_leaf
8173 || ix86_current_function_calls_tls_descriptor))
8176 if (crtl->profile && !flag_fentry)
8182 /* Record that the current function accesses previous call frames. */
8185 ix86_setup_frame_addresses (void)
8187 cfun->machine->accesses_prev_frame = 1;
8190 #ifndef USE_HIDDEN_LINKONCE
8191 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8192 # define USE_HIDDEN_LINKONCE 1
8194 # define USE_HIDDEN_LINKONCE 0
8198 static int pic_labels_used;
8200 /* Fills in the label name that should be used for a pc thunk for
8201 the given register. */
8204 get_pc_thunk_name (char name[32], unsigned int regno)
8206 gcc_assert (!TARGET_64BIT);
8208 if (USE_HIDDEN_LINKONCE)
8209 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8211 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8215 /* This function generates code for -fpic that loads %ebx with
8216 the return address of the caller and then returns. */
8219 ix86_code_end (void)
8224 #ifdef TARGET_SOLARIS
8225 solaris_code_end ();
8228 for (regno = AX_REG; regno <= SP_REG; regno++)
8233 if (!(pic_labels_used & (1 << regno)))
8236 get_pc_thunk_name (name, regno);
8238 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8239 get_identifier (name),
8240 build_function_type_list (void_type_node, NULL_TREE));
8241 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8242 NULL_TREE, void_type_node);
8243 TREE_PUBLIC (decl) = 1;
8244 TREE_STATIC (decl) = 1;
8249 switch_to_section (darwin_sections[text_coal_section]);
8250 fputs ("\t.weak_definition\t", asm_out_file);
8251 assemble_name (asm_out_file, name);
8252 fputs ("\n\t.private_extern\t", asm_out_file);
8253 assemble_name (asm_out_file, name);
8254 putc ('\n', asm_out_file);
8255 ASM_OUTPUT_LABEL (asm_out_file, name);
8256 DECL_WEAK (decl) = 1;
8260 if (USE_HIDDEN_LINKONCE)
8262 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8264 targetm.asm_out.unique_section (decl, 0);
8265 switch_to_section (get_named_section (decl, NULL, 0));
8267 targetm.asm_out.globalize_label (asm_out_file, name);
8268 fputs ("\t.hidden\t", asm_out_file);
8269 assemble_name (asm_out_file, name);
8270 putc ('\n', asm_out_file);
8271 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8275 switch_to_section (text_section);
8276 ASM_OUTPUT_LABEL (asm_out_file, name);
8279 DECL_INITIAL (decl) = make_node (BLOCK);
8280 current_function_decl = decl;
8281 init_function_start (decl);
8282 first_function_block_is_cold = false;
8283 /* Make sure unwind info is emitted for the thunk if needed. */
8284 final_start_function (emit_barrier (), asm_out_file, 1);
8286 /* Pad stack IP move with 4 instructions (two NOPs count
8287 as one instruction). */
8288 if (TARGET_PAD_SHORT_FUNCTION)
8293 fputs ("\tnop\n", asm_out_file);
8296 xops[0] = gen_rtx_REG (Pmode, regno);
8297 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8298 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8299 fputs ("\tret\n", asm_out_file);
8300 final_end_function ();
8301 init_insn_lengths ();
8302 free_after_compilation (cfun);
8304 current_function_decl = NULL;
8307 if (flag_split_stack)
8308 file_end_indicate_split_stack ();
8311 /* Emit code for the SET_GOT patterns. */
8314 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8320 if (TARGET_VXWORKS_RTP && flag_pic)
8322 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8323 xops[2] = gen_rtx_MEM (Pmode,
8324 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8325 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8327 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8328 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8329 an unadorned address. */
8330 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8331 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8332 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8336 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8340 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8342 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8345 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8346 is what will be referenced by the Mach-O PIC subsystem. */
8348 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8351 targetm.asm_out.internal_label (asm_out_file, "L",
8352 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8357 get_pc_thunk_name (name, REGNO (dest));
8358 pic_labels_used |= 1 << REGNO (dest);
8360 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8361 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8362 output_asm_insn ("call\t%X2", xops);
8363 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8364 is what will be referenced by the Mach-O PIC subsystem. */
8367 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8369 targetm.asm_out.internal_label (asm_out_file, "L",
8370 CODE_LABEL_NUMBER (label));
8375 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8380 /* Generate an "push" pattern for input ARG. */
8385 struct machine_function *m = cfun->machine;
8387 if (m->fs.cfa_reg == stack_pointer_rtx)
8388 m->fs.cfa_offset += UNITS_PER_WORD;
8389 m->fs.sp_offset += UNITS_PER_WORD;
8391 return gen_rtx_SET (VOIDmode,
8393 gen_rtx_PRE_DEC (Pmode,
8394 stack_pointer_rtx)),
8398 /* Generate an "pop" pattern for input ARG. */
8403 return gen_rtx_SET (VOIDmode,
8406 gen_rtx_POST_INC (Pmode,
8407 stack_pointer_rtx)));
8410 /* Return >= 0 if there is an unused call-clobbered register available
8411 for the entire function. */
8414 ix86_select_alt_pic_regnum (void)
8416 if (current_function_is_leaf
8418 && !ix86_current_function_calls_tls_descriptor)
8421 /* Can't use the same register for both PIC and DRAP. */
8423 drap = REGNO (crtl->drap_reg);
8426 for (i = 2; i >= 0; --i)
8427 if (i != drap && !df_regs_ever_live_p (i))
8431 return INVALID_REGNUM;
8434 /* Return TRUE if we need to save REGNO. */
8437 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8439 if (pic_offset_table_rtx
8440 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8441 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8443 || crtl->calls_eh_return
8444 || crtl->uses_const_pool))
8445 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8447 if (crtl->calls_eh_return && maybe_eh_return)
8452 unsigned test = EH_RETURN_DATA_REGNO (i);
8453 if (test == INVALID_REGNUM)
8460 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8463 return (df_regs_ever_live_p (regno)
8464 && !call_used_regs[regno]
8465 && !fixed_regs[regno]
8466 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8469 /* Return number of saved general prupose registers. */
8472 ix86_nsaved_regs (void)
8477 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8478 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8483 /* Return number of saved SSE registrers. */
8486 ix86_nsaved_sseregs (void)
8491 if (!TARGET_64BIT_MS_ABI)
8493 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8494 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8499 /* Given FROM and TO register numbers, say whether this elimination is
8500 allowed. If stack alignment is needed, we can only replace argument
8501 pointer with hard frame pointer, or replace frame pointer with stack
8502 pointer. Otherwise, frame pointer elimination is automatically
8503 handled and all other eliminations are valid. */
8506 ix86_can_eliminate (const int from, const int to)
8508 if (stack_realign_fp)
8509 return ((from == ARG_POINTER_REGNUM
8510 && to == HARD_FRAME_POINTER_REGNUM)
8511 || (from == FRAME_POINTER_REGNUM
8512 && to == STACK_POINTER_REGNUM));
8514 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8517 /* Return the offset between two registers, one to be eliminated, and the other
8518 its replacement, at the start of a routine. */
8521 ix86_initial_elimination_offset (int from, int to)
8523 struct ix86_frame frame;
8524 ix86_compute_frame_layout (&frame);
8526 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8527 return frame.hard_frame_pointer_offset;
8528 else if (from == FRAME_POINTER_REGNUM
8529 && to == HARD_FRAME_POINTER_REGNUM)
8530 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8533 gcc_assert (to == STACK_POINTER_REGNUM);
8535 if (from == ARG_POINTER_REGNUM)
8536 return frame.stack_pointer_offset;
8538 gcc_assert (from == FRAME_POINTER_REGNUM);
8539 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8543 /* In a dynamically-aligned function, we can't know the offset from
8544 stack pointer to frame pointer, so we must ensure that setjmp
8545 eliminates fp against the hard fp (%ebp) rather than trying to
8546 index from %esp up to the top of the frame across a gap that is
8547 of unknown (at compile-time) size. */
8549 ix86_builtin_setjmp_frame_value (void)
8551 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8554 /* When using -fsplit-stack, the allocation routines set a field in
8555 the TCB to the bottom of the stack plus this much space, measured
8558 #define SPLIT_STACK_AVAILABLE 256
8560 /* Fill structure ix86_frame about frame of currently computed function. */
8563 ix86_compute_frame_layout (struct ix86_frame *frame)
8565 unsigned int stack_alignment_needed;
8566 HOST_WIDE_INT offset;
8567 unsigned int preferred_alignment;
8568 HOST_WIDE_INT size = get_frame_size ();
8569 HOST_WIDE_INT to_allocate;
8571 frame->nregs = ix86_nsaved_regs ();
8572 frame->nsseregs = ix86_nsaved_sseregs ();
8574 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8575 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8577 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8578 function prologues and leaf. */
8579 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
8580 && (!current_function_is_leaf || cfun->calls_alloca != 0
8581 || ix86_current_function_calls_tls_descriptor))
8583 preferred_alignment = 16;
8584 stack_alignment_needed = 16;
8585 crtl->preferred_stack_boundary = 128;
8586 crtl->stack_alignment_needed = 128;
8589 gcc_assert (!size || stack_alignment_needed);
8590 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8591 gcc_assert (preferred_alignment <= stack_alignment_needed);
8593 /* For SEH we have to limit the amount of code movement into the prologue.
8594 At present we do this via a BLOCKAGE, at which point there's very little
8595 scheduling that can be done, which means that there's very little point
8596 in doing anything except PUSHs. */
8598 cfun->machine->use_fast_prologue_epilogue = false;
8600 /* During reload iteration the amount of registers saved can change.
8601 Recompute the value as needed. Do not recompute when amount of registers
8602 didn't change as reload does multiple calls to the function and does not
8603 expect the decision to change within single iteration. */
8604 else if (!optimize_function_for_size_p (cfun)
8605 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8607 int count = frame->nregs;
8608 struct cgraph_node *node = cgraph_get_node (current_function_decl);
8610 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8612 /* The fast prologue uses move instead of push to save registers. This
8613 is significantly longer, but also executes faster as modern hardware
8614 can execute the moves in parallel, but can't do that for push/pop.
8616 Be careful about choosing what prologue to emit: When function takes
8617 many instructions to execute we may use slow version as well as in
8618 case function is known to be outside hot spot (this is known with
8619 feedback only). Weight the size of function by number of registers
8620 to save as it is cheap to use one or two push instructions but very
8621 slow to use many of them. */
8623 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8624 if (node->frequency < NODE_FREQUENCY_NORMAL
8625 || (flag_branch_probabilities
8626 && node->frequency < NODE_FREQUENCY_HOT))
8627 cfun->machine->use_fast_prologue_epilogue = false;
8629 cfun->machine->use_fast_prologue_epilogue
8630 = !expensive_function_p (count);
8632 if (TARGET_PROLOGUE_USING_MOVE
8633 && cfun->machine->use_fast_prologue_epilogue)
8634 frame->save_regs_using_mov = true;
8636 frame->save_regs_using_mov = false;
8638 /* If static stack checking is enabled and done with probes, the registers
8639 need to be saved before allocating the frame. */
8640 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8641 frame->save_regs_using_mov = false;
8643 /* Skip return address. */
8644 offset = UNITS_PER_WORD;
8646 /* Skip pushed static chain. */
8647 if (ix86_static_chain_on_stack)
8648 offset += UNITS_PER_WORD;
8650 /* Skip saved base pointer. */
8651 if (frame_pointer_needed)
8652 offset += UNITS_PER_WORD;
8653 frame->hfp_save_offset = offset;
8655 /* The traditional frame pointer location is at the top of the frame. */
8656 frame->hard_frame_pointer_offset = offset;
8658 /* Register save area */
8659 offset += frame->nregs * UNITS_PER_WORD;
8660 frame->reg_save_offset = offset;
8662 /* Align and set SSE register save area. */
8663 if (frame->nsseregs)
8665 /* The only ABI that has saved SSE registers (Win64) also has a
8666 16-byte aligned default stack, and thus we don't need to be
8667 within the re-aligned local stack frame to save them. */
8668 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8669 offset = (offset + 16 - 1) & -16;
8670 offset += frame->nsseregs * 16;
8672 frame->sse_reg_save_offset = offset;
8674 /* The re-aligned stack starts here. Values before this point are not
8675 directly comparable with values below this point. In order to make
8676 sure that no value happens to be the same before and after, force
8677 the alignment computation below to add a non-zero value. */
8678 if (stack_realign_fp)
8679 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8682 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8683 offset += frame->va_arg_size;
8685 /* Align start of frame for local function. */
8686 if (stack_realign_fp
8687 || offset != frame->sse_reg_save_offset
8689 || !current_function_is_leaf
8690 || cfun->calls_alloca
8691 || ix86_current_function_calls_tls_descriptor)
8692 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8694 /* Frame pointer points here. */
8695 frame->frame_pointer_offset = offset;
8699 /* Add outgoing arguments area. Can be skipped if we eliminated
8700 all the function calls as dead code.
8701 Skipping is however impossible when function calls alloca. Alloca
8702 expander assumes that last crtl->outgoing_args_size
8703 of stack frame are unused. */
8704 if (ACCUMULATE_OUTGOING_ARGS
8705 && (!current_function_is_leaf || cfun->calls_alloca
8706 || ix86_current_function_calls_tls_descriptor))
8708 offset += crtl->outgoing_args_size;
8709 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8712 frame->outgoing_arguments_size = 0;
8714 /* Align stack boundary. Only needed if we're calling another function
8716 if (!current_function_is_leaf || cfun->calls_alloca
8717 || ix86_current_function_calls_tls_descriptor)
8718 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8720 /* We've reached end of stack frame. */
8721 frame->stack_pointer_offset = offset;
8723 /* Size prologue needs to allocate. */
8724 to_allocate = offset - frame->sse_reg_save_offset;
8726 if ((!to_allocate && frame->nregs <= 1)
8727 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8728 frame->save_regs_using_mov = false;
8730 if (ix86_using_red_zone ()
8731 && current_function_sp_is_unchanging
8732 && current_function_is_leaf
8733 && !ix86_current_function_calls_tls_descriptor)
8735 frame->red_zone_size = to_allocate;
8736 if (frame->save_regs_using_mov)
8737 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8738 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8739 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8742 frame->red_zone_size = 0;
8743 frame->stack_pointer_offset -= frame->red_zone_size;
8745 /* The SEH frame pointer location is near the bottom of the frame.
8746 This is enforced by the fact that the difference between the
8747 stack pointer and the frame pointer is limited to 240 bytes in
8748 the unwind data structure. */
8753 /* If we can leave the frame pointer where it is, do so. */
8754 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
8755 if (diff > 240 || (diff & 15) != 0)
8757 /* Ideally we'd determine what portion of the local stack frame
8758 (within the constraint of the lowest 240) is most heavily used.
8759 But without that complication, simply bias the frame pointer
8760 by 128 bytes so as to maximize the amount of the local stack
8761 frame that is addressable with 8-bit offsets. */
8762 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
8767 /* This is semi-inlined memory_address_length, but simplified
8768 since we know that we're always dealing with reg+offset, and
8769 to avoid having to create and discard all that rtl. */
8772 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8778 /* EBP and R13 cannot be encoded without an offset. */
8779 len = (regno == BP_REG || regno == R13_REG);
8781 else if (IN_RANGE (offset, -128, 127))
8784 /* ESP and R12 must be encoded with a SIB byte. */
8785 if (regno == SP_REG || regno == R12_REG)
8791 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8792 The valid base registers are taken from CFUN->MACHINE->FS. */
8795 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8797 const struct machine_function *m = cfun->machine;
8798 rtx base_reg = NULL;
8799 HOST_WIDE_INT base_offset = 0;
8801 if (m->use_fast_prologue_epilogue)
8803 /* Choose the base register most likely to allow the most scheduling
8804 opportunities. Generally FP is valid througout the function,
8805 while DRAP must be reloaded within the epilogue. But choose either
8806 over the SP due to increased encoding size. */
8810 base_reg = hard_frame_pointer_rtx;
8811 base_offset = m->fs.fp_offset - cfa_offset;
8813 else if (m->fs.drap_valid)
8815 base_reg = crtl->drap_reg;
8816 base_offset = 0 - cfa_offset;
8818 else if (m->fs.sp_valid)
8820 base_reg = stack_pointer_rtx;
8821 base_offset = m->fs.sp_offset - cfa_offset;
8826 HOST_WIDE_INT toffset;
8829 /* Choose the base register with the smallest address encoding.
8830 With a tie, choose FP > DRAP > SP. */
8833 base_reg = stack_pointer_rtx;
8834 base_offset = m->fs.sp_offset - cfa_offset;
8835 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8837 if (m->fs.drap_valid)
8839 toffset = 0 - cfa_offset;
8840 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8843 base_reg = crtl->drap_reg;
8844 base_offset = toffset;
8850 toffset = m->fs.fp_offset - cfa_offset;
8851 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8854 base_reg = hard_frame_pointer_rtx;
8855 base_offset = toffset;
8860 gcc_assert (base_reg != NULL);
8862 return plus_constant (base_reg, base_offset);
8865 /* Emit code to save registers in the prologue. */
8868 ix86_emit_save_regs (void)
8873 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8874 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8876 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8877 RTX_FRAME_RELATED_P (insn) = 1;
8881 /* Emit a single register save at CFA - CFA_OFFSET. */
8884 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8885 HOST_WIDE_INT cfa_offset)
8887 struct machine_function *m = cfun->machine;
8888 rtx reg = gen_rtx_REG (mode, regno);
8889 rtx mem, addr, base, insn;
8891 addr = choose_baseaddr (cfa_offset);
8892 mem = gen_frame_mem (mode, addr);
8894 /* For SSE saves, we need to indicate the 128-bit alignment. */
8895 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
8897 insn = emit_move_insn (mem, reg);
8898 RTX_FRAME_RELATED_P (insn) = 1;
8901 if (GET_CODE (base) == PLUS)
8902 base = XEXP (base, 0);
8903 gcc_checking_assert (REG_P (base));
8905 /* When saving registers into a re-aligned local stack frame, avoid
8906 any tricky guessing by dwarf2out. */
8907 if (m->fs.realigned)
8909 gcc_checking_assert (stack_realign_drap);
8911 if (regno == REGNO (crtl->drap_reg))
8913 /* A bit of a hack. We force the DRAP register to be saved in
8914 the re-aligned stack frame, which provides us with a copy
8915 of the CFA that will last past the prologue. Install it. */
8916 gcc_checking_assert (cfun->machine->fs.fp_valid);
8917 addr = plus_constant (hard_frame_pointer_rtx,
8918 cfun->machine->fs.fp_offset - cfa_offset);
8919 mem = gen_rtx_MEM (mode, addr);
8920 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
8924 /* The frame pointer is a stable reference within the
8925 aligned frame. Use it. */
8926 gcc_checking_assert (cfun->machine->fs.fp_valid);
8927 addr = plus_constant (hard_frame_pointer_rtx,
8928 cfun->machine->fs.fp_offset - cfa_offset);
8929 mem = gen_rtx_MEM (mode, addr);
8930 add_reg_note (insn, REG_CFA_EXPRESSION,
8931 gen_rtx_SET (VOIDmode, mem, reg));
8935 /* The memory may not be relative to the current CFA register,
8936 which means that we may need to generate a new pattern for
8937 use by the unwind info. */
8938 else if (base != m->fs.cfa_reg)
8940 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
8941 mem = gen_rtx_MEM (mode, addr);
8942 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
8946 /* Emit code to save registers using MOV insns.
8947 First register is stored at CFA - CFA_OFFSET. */
8949 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
8953 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8954 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8956 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
8957 cfa_offset -= UNITS_PER_WORD;
8961 /* Emit code to save SSE registers using MOV insns.
8962 First register is stored at CFA - CFA_OFFSET. */
8964 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
8968 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8969 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8971 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
8976 static GTY(()) rtx queued_cfa_restores;
8978 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8979 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
8980 Don't add the note if the previously saved value will be left untouched
8981 within stack red-zone till return, as unwinders can find the same value
8982 in the register and on the stack. */
8985 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
8987 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
8992 add_reg_note (insn, REG_CFA_RESTORE, reg);
8993 RTX_FRAME_RELATED_P (insn) = 1;
8997 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9000 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9003 ix86_add_queued_cfa_restore_notes (rtx insn)
9006 if (!queued_cfa_restores)
9008 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9010 XEXP (last, 1) = REG_NOTES (insn);
9011 REG_NOTES (insn) = queued_cfa_restores;
9012 queued_cfa_restores = NULL_RTX;
9013 RTX_FRAME_RELATED_P (insn) = 1;
9016 /* Expand prologue or epilogue stack adjustment.
9017 The pattern exist to put a dependency on all ebp-based memory accesses.
9018 STYLE should be negative if instructions should be marked as frame related,
9019 zero if %r11 register is live and cannot be freely used and positive
9023 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9024 int style, bool set_cfa)
9026 struct machine_function *m = cfun->machine;
9028 bool add_frame_related_expr = false;
9031 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9032 else if (x86_64_immediate_operand (offset, DImode))
9033 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9037 /* r11 is used by indirect sibcall return as well, set before the
9038 epilogue and used after the epilogue. */
9040 tmp = gen_rtx_REG (DImode, R11_REG);
9043 gcc_assert (src != hard_frame_pointer_rtx
9044 && dest != hard_frame_pointer_rtx);
9045 tmp = hard_frame_pointer_rtx;
9047 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9049 add_frame_related_expr = true;
9051 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9054 insn = emit_insn (insn);
9056 ix86_add_queued_cfa_restore_notes (insn);
9062 gcc_assert (m->fs.cfa_reg == src);
9063 m->fs.cfa_offset += INTVAL (offset);
9064 m->fs.cfa_reg = dest;
9066 r = gen_rtx_PLUS (Pmode, src, offset);
9067 r = gen_rtx_SET (VOIDmode, dest, r);
9068 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9069 RTX_FRAME_RELATED_P (insn) = 1;
9073 RTX_FRAME_RELATED_P (insn) = 1;
9074 if (add_frame_related_expr)
9076 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9077 r = gen_rtx_SET (VOIDmode, dest, r);
9078 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9082 if (dest == stack_pointer_rtx)
9084 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9085 bool valid = m->fs.sp_valid;
9087 if (src == hard_frame_pointer_rtx)
9089 valid = m->fs.fp_valid;
9090 ooffset = m->fs.fp_offset;
9092 else if (src == crtl->drap_reg)
9094 valid = m->fs.drap_valid;
9099 /* Else there are two possibilities: SP itself, which we set
9100 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9101 taken care of this by hand along the eh_return path. */
9102 gcc_checking_assert (src == stack_pointer_rtx
9103 || offset == const0_rtx);
9106 m->fs.sp_offset = ooffset - INTVAL (offset);
9107 m->fs.sp_valid = valid;
9111 /* Find an available register to be used as dynamic realign argument
9112 pointer regsiter. Such a register will be written in prologue and
9113 used in begin of body, so it must not be
9114 1. parameter passing register.
9116 We reuse static-chain register if it is available. Otherwise, we
9117 use DI for i386 and R13 for x86-64. We chose R13 since it has
9120 Return: the regno of chosen register. */
9123 find_drap_reg (void)
9125 tree decl = cfun->decl;
9129 /* Use R13 for nested function or function need static chain.
9130 Since function with tail call may use any caller-saved
9131 registers in epilogue, DRAP must not use caller-saved
9132 register in such case. */
9133 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9140 /* Use DI for nested function or function need static chain.
9141 Since function with tail call may use any caller-saved
9142 registers in epilogue, DRAP must not use caller-saved
9143 register in such case. */
9144 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9147 /* Reuse static chain register if it isn't used for parameter
9149 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9151 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9152 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9159 /* Return minimum incoming stack alignment. */
9162 ix86_minimum_incoming_stack_boundary (bool sibcall)
9164 unsigned int incoming_stack_boundary;
9166 /* Prefer the one specified at command line. */
9167 if (ix86_user_incoming_stack_boundary)
9168 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9169 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9170 if -mstackrealign is used, it isn't used for sibcall check and
9171 estimated stack alignment is 128bit. */
9174 && ix86_force_align_arg_pointer
9175 && crtl->stack_alignment_estimated == 128)
9176 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9178 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9180 /* Incoming stack alignment can be changed on individual functions
9181 via force_align_arg_pointer attribute. We use the smallest
9182 incoming stack boundary. */
9183 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9184 && lookup_attribute (ix86_force_align_arg_pointer_string,
9185 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9186 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9188 /* The incoming stack frame has to be aligned at least at
9189 parm_stack_boundary. */
9190 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9191 incoming_stack_boundary = crtl->parm_stack_boundary;
9193 /* Stack at entrance of main is aligned by runtime. We use the
9194 smallest incoming stack boundary. */
9195 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9196 && DECL_NAME (current_function_decl)
9197 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9198 && DECL_FILE_SCOPE_P (current_function_decl))
9199 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9201 return incoming_stack_boundary;
9204 /* Update incoming stack boundary and estimated stack alignment. */
9207 ix86_update_stack_boundary (void)
9209 ix86_incoming_stack_boundary
9210 = ix86_minimum_incoming_stack_boundary (false);
9212 /* x86_64 vararg needs 16byte stack alignment for register save
9216 && crtl->stack_alignment_estimated < 128)
9217 crtl->stack_alignment_estimated = 128;
9220 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9221 needed or an rtx for DRAP otherwise. */
9224 ix86_get_drap_rtx (void)
9226 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9227 crtl->need_drap = true;
9229 if (stack_realign_drap)
9231 /* Assign DRAP to vDRAP and returns vDRAP */
9232 unsigned int regno = find_drap_reg ();
9237 arg_ptr = gen_rtx_REG (Pmode, regno);
9238 crtl->drap_reg = arg_ptr;
9241 drap_vreg = copy_to_reg (arg_ptr);
9245 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9248 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9249 RTX_FRAME_RELATED_P (insn) = 1;
9257 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9260 ix86_internal_arg_pointer (void)
9262 return virtual_incoming_args_rtx;
9265 struct scratch_reg {
9270 /* Return a short-lived scratch register for use on function entry.
9271 In 32-bit mode, it is valid only after the registers are saved
9272 in the prologue. This register must be released by means of
9273 release_scratch_register_on_entry once it is dead. */
9276 get_scratch_register_on_entry (struct scratch_reg *sr)
9284 /* We always use R11 in 64-bit mode. */
9289 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9291 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9292 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9293 int regparm = ix86_function_regparm (fntype, decl);
9295 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9297 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9298 for the static chain register. */
9299 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9300 && drap_regno != AX_REG)
9302 else if (regparm < 2 && drap_regno != DX_REG)
9304 /* ecx is the static chain register. */
9305 else if (regparm < 3 && !fastcall_p && !static_chain_p
9306 && drap_regno != CX_REG)
9308 else if (ix86_save_reg (BX_REG, true))
9310 /* esi is the static chain register. */
9311 else if (!(regparm == 3 && static_chain_p)
9312 && ix86_save_reg (SI_REG, true))
9314 else if (ix86_save_reg (DI_REG, true))
9318 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9323 sr->reg = gen_rtx_REG (Pmode, regno);
9326 rtx insn = emit_insn (gen_push (sr->reg));
9327 RTX_FRAME_RELATED_P (insn) = 1;
9331 /* Release a scratch register obtained from the preceding function. */
9334 release_scratch_register_on_entry (struct scratch_reg *sr)
9338 rtx x, insn = emit_insn (gen_pop (sr->reg));
9340 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9341 RTX_FRAME_RELATED_P (insn) = 1;
9342 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9343 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9344 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9348 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9350 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9353 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9355 /* We skip the probe for the first interval + a small dope of 4 words and
9356 probe that many bytes past the specified size to maintain a protection
9357 area at the botton of the stack. */
9358 const int dope = 4 * UNITS_PER_WORD;
9359 rtx size_rtx = GEN_INT (size), last;
9361 /* See if we have a constant small number of probes to generate. If so,
9362 that's the easy case. The run-time loop is made up of 11 insns in the
9363 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9364 for n # of intervals. */
9365 if (size <= 5 * PROBE_INTERVAL)
9367 HOST_WIDE_INT i, adjust;
9368 bool first_probe = true;
9370 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9371 values of N from 1 until it exceeds SIZE. If only one probe is
9372 needed, this will not generate any code. Then adjust and probe
9373 to PROBE_INTERVAL + SIZE. */
9374 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9378 adjust = 2 * PROBE_INTERVAL + dope;
9379 first_probe = false;
9382 adjust = PROBE_INTERVAL;
9384 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9385 plus_constant (stack_pointer_rtx, -adjust)));
9386 emit_stack_probe (stack_pointer_rtx);
9390 adjust = size + PROBE_INTERVAL + dope;
9392 adjust = size + PROBE_INTERVAL - i;
9394 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9395 plus_constant (stack_pointer_rtx, -adjust)));
9396 emit_stack_probe (stack_pointer_rtx);
9398 /* Adjust back to account for the additional first interval. */
9399 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9400 plus_constant (stack_pointer_rtx,
9401 PROBE_INTERVAL + dope)));
9404 /* Otherwise, do the same as above, but in a loop. Note that we must be
9405 extra careful with variables wrapping around because we might be at
9406 the very top (or the very bottom) of the address space and we have
9407 to be able to handle this case properly; in particular, we use an
9408 equality test for the loop condition. */
9411 HOST_WIDE_INT rounded_size;
9412 struct scratch_reg sr;
9414 get_scratch_register_on_entry (&sr);
9417 /* Step 1: round SIZE to the previous multiple of the interval. */
9419 rounded_size = size & -PROBE_INTERVAL;
9422 /* Step 2: compute initial and final value of the loop counter. */
9424 /* SP = SP_0 + PROBE_INTERVAL. */
9425 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9426 plus_constant (stack_pointer_rtx,
9427 - (PROBE_INTERVAL + dope))));
9429 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9430 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9431 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9432 gen_rtx_PLUS (Pmode, sr.reg,
9433 stack_pointer_rtx)));
9438 while (SP != LAST_ADDR)
9440 SP = SP + PROBE_INTERVAL
9444 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9445 values of N from 1 until it is equal to ROUNDED_SIZE. */
9447 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9450 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9451 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9453 if (size != rounded_size)
9455 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9456 plus_constant (stack_pointer_rtx,
9457 rounded_size - size)));
9458 emit_stack_probe (stack_pointer_rtx);
9461 /* Adjust back to account for the additional first interval. */
9462 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9463 plus_constant (stack_pointer_rtx,
9464 PROBE_INTERVAL + dope)));
9466 release_scratch_register_on_entry (&sr);
9469 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9471 /* Even if the stack pointer isn't the CFA register, we need to correctly
9472 describe the adjustments made to it, in particular differentiate the
9473 frame-related ones from the frame-unrelated ones. */
9476 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9477 XVECEXP (expr, 0, 0)
9478 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9479 plus_constant (stack_pointer_rtx, -size));
9480 XVECEXP (expr, 0, 1)
9481 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9482 plus_constant (stack_pointer_rtx,
9483 PROBE_INTERVAL + dope + size));
9484 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9485 RTX_FRAME_RELATED_P (last) = 1;
9487 cfun->machine->fs.sp_offset += size;
9490 /* Make sure nothing is scheduled before we are done. */
9491 emit_insn (gen_blockage ());
9494 /* Adjust the stack pointer up to REG while probing it. */
9497 output_adjust_stack_and_probe (rtx reg)
9499 static int labelno = 0;
9500 char loop_lab[32], end_lab[32];
9503 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9504 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9506 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9508 /* Jump to END_LAB if SP == LAST_ADDR. */
9509 xops[0] = stack_pointer_rtx;
9511 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9512 fputs ("\tje\t", asm_out_file);
9513 assemble_name_raw (asm_out_file, end_lab);
9514 fputc ('\n', asm_out_file);
9516 /* SP = SP + PROBE_INTERVAL. */
9517 xops[1] = GEN_INT (PROBE_INTERVAL);
9518 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9521 xops[1] = const0_rtx;
9522 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9524 fprintf (asm_out_file, "\tjmp\t");
9525 assemble_name_raw (asm_out_file, loop_lab);
9526 fputc ('\n', asm_out_file);
9528 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9533 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9534 inclusive. These are offsets from the current stack pointer. */
9537 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9539 /* See if we have a constant small number of probes to generate. If so,
9540 that's the easy case. The run-time loop is made up of 7 insns in the
9541 generic case while the compile-time loop is made up of n insns for n #
9543 if (size <= 7 * PROBE_INTERVAL)
9547 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9548 it exceeds SIZE. If only one probe is needed, this will not
9549 generate any code. Then probe at FIRST + SIZE. */
9550 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9551 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9553 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9556 /* Otherwise, do the same as above, but in a loop. Note that we must be
9557 extra careful with variables wrapping around because we might be at
9558 the very top (or the very bottom) of the address space and we have
9559 to be able to handle this case properly; in particular, we use an
9560 equality test for the loop condition. */
9563 HOST_WIDE_INT rounded_size, last;
9564 struct scratch_reg sr;
9566 get_scratch_register_on_entry (&sr);
9569 /* Step 1: round SIZE to the previous multiple of the interval. */
9571 rounded_size = size & -PROBE_INTERVAL;
9574 /* Step 2: compute initial and final value of the loop counter. */
9576 /* TEST_OFFSET = FIRST. */
9577 emit_move_insn (sr.reg, GEN_INT (-first));
9579 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9580 last = first + rounded_size;
9585 while (TEST_ADDR != LAST_ADDR)
9587 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9591 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9592 until it is equal to ROUNDED_SIZE. */
9594 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9597 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9598 that SIZE is equal to ROUNDED_SIZE. */
9600 if (size != rounded_size)
9601 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9604 rounded_size - size));
9606 release_scratch_register_on_entry (&sr);
9609 /* Make sure nothing is scheduled before we are done. */
9610 emit_insn (gen_blockage ());
9613 /* Probe a range of stack addresses from REG to END, inclusive. These are
9614 offsets from the current stack pointer. */
9617 output_probe_stack_range (rtx reg, rtx end)
9619 static int labelno = 0;
9620 char loop_lab[32], end_lab[32];
9623 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9624 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9626 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9628 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9631 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9632 fputs ("\tje\t", asm_out_file);
9633 assemble_name_raw (asm_out_file, end_lab);
9634 fputc ('\n', asm_out_file);
9636 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9637 xops[1] = GEN_INT (PROBE_INTERVAL);
9638 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9640 /* Probe at TEST_ADDR. */
9641 xops[0] = stack_pointer_rtx;
9643 xops[2] = const0_rtx;
9644 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9646 fprintf (asm_out_file, "\tjmp\t");
9647 assemble_name_raw (asm_out_file, loop_lab);
9648 fputc ('\n', asm_out_file);
9650 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9655 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9656 to be generated in correct form. */
9658 ix86_finalize_stack_realign_flags (void)
9660 /* Check if stack realign is really needed after reload, and
9661 stores result in cfun */
9662 unsigned int incoming_stack_boundary
9663 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9664 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9665 unsigned int stack_realign = (incoming_stack_boundary
9666 < (current_function_is_leaf
9667 ? crtl->max_used_stack_slot_alignment
9668 : crtl->stack_alignment_needed));
9670 if (crtl->stack_realign_finalized)
9672 /* After stack_realign_needed is finalized, we can't no longer
9674 gcc_assert (crtl->stack_realign_needed == stack_realign);
9678 crtl->stack_realign_needed = stack_realign;
9679 crtl->stack_realign_finalized = true;
9683 /* Expand the prologue into a bunch of separate insns. */
9686 ix86_expand_prologue (void)
9688 struct machine_function *m = cfun->machine;
9691 struct ix86_frame frame;
9692 HOST_WIDE_INT allocate;
9693 bool int_registers_saved;
9695 ix86_finalize_stack_realign_flags ();
9697 /* DRAP should not coexist with stack_realign_fp */
9698 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9700 memset (&m->fs, 0, sizeof (m->fs));
9702 /* Initialize CFA state for before the prologue. */
9703 m->fs.cfa_reg = stack_pointer_rtx;
9704 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9706 /* Track SP offset to the CFA. We continue tracking this after we've
9707 swapped the CFA register away from SP. In the case of re-alignment
9708 this is fudged; we're interested to offsets within the local frame. */
9709 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9710 m->fs.sp_valid = true;
9712 ix86_compute_frame_layout (&frame);
9714 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9716 /* We should have already generated an error for any use of
9717 ms_hook on a nested function. */
9718 gcc_checking_assert (!ix86_static_chain_on_stack);
9720 /* Check if profiling is active and we shall use profiling before
9721 prologue variant. If so sorry. */
9722 if (crtl->profile && flag_fentry != 0)
9723 sorry ("ms_hook_prologue attribute isn%'t compatible "
9724 "with -mfentry for 32-bit");
9726 /* In ix86_asm_output_function_label we emitted:
9727 8b ff movl.s %edi,%edi
9729 8b ec movl.s %esp,%ebp
9731 This matches the hookable function prologue in Win32 API
9732 functions in Microsoft Windows XP Service Pack 2 and newer.
9733 Wine uses this to enable Windows apps to hook the Win32 API
9734 functions provided by Wine.
9736 What that means is that we've already set up the frame pointer. */
9738 if (frame_pointer_needed
9739 && !(crtl->drap_reg && crtl->stack_realign_needed))
9743 /* We've decided to use the frame pointer already set up.
9744 Describe this to the unwinder by pretending that both
9745 push and mov insns happen right here.
9747 Putting the unwind info here at the end of the ms_hook
9748 is done so that we can make absolutely certain we get
9749 the required byte sequence at the start of the function,
9750 rather than relying on an assembler that can produce
9751 the exact encoding required.
9753 However it does mean (in the unpatched case) that we have
9754 a 1 insn window where the asynchronous unwind info is
9755 incorrect. However, if we placed the unwind info at
9756 its correct location we would have incorrect unwind info
9757 in the patched case. Which is probably all moot since
9758 I don't expect Wine generates dwarf2 unwind info for the
9759 system libraries that use this feature. */
9761 insn = emit_insn (gen_blockage ());
9763 push = gen_push (hard_frame_pointer_rtx);
9764 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9766 RTX_FRAME_RELATED_P (push) = 1;
9767 RTX_FRAME_RELATED_P (mov) = 1;
9769 RTX_FRAME_RELATED_P (insn) = 1;
9770 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9771 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9773 /* Note that gen_push incremented m->fs.cfa_offset, even
9774 though we didn't emit the push insn here. */
9775 m->fs.cfa_reg = hard_frame_pointer_rtx;
9776 m->fs.fp_offset = m->fs.cfa_offset;
9777 m->fs.fp_valid = true;
9781 /* The frame pointer is not needed so pop %ebp again.
9782 This leaves us with a pristine state. */
9783 emit_insn (gen_pop (hard_frame_pointer_rtx));
9787 /* The first insn of a function that accepts its static chain on the
9788 stack is to push the register that would be filled in by a direct
9789 call. This insn will be skipped by the trampoline. */
9790 else if (ix86_static_chain_on_stack)
9792 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9793 emit_insn (gen_blockage ());
9795 /* We don't want to interpret this push insn as a register save,
9796 only as a stack adjustment. The real copy of the register as
9797 a save will be done later, if needed. */
9798 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9799 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9800 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9801 RTX_FRAME_RELATED_P (insn) = 1;
9804 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9805 of DRAP is needed and stack realignment is really needed after reload */
9806 if (stack_realign_drap)
9808 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9810 /* Only need to push parameter pointer reg if it is caller saved. */
9811 if (!call_used_regs[REGNO (crtl->drap_reg)])
9813 /* Push arg pointer reg */
9814 insn = emit_insn (gen_push (crtl->drap_reg));
9815 RTX_FRAME_RELATED_P (insn) = 1;
9818 /* Grab the argument pointer. */
9819 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9820 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9821 RTX_FRAME_RELATED_P (insn) = 1;
9822 m->fs.cfa_reg = crtl->drap_reg;
9823 m->fs.cfa_offset = 0;
9825 /* Align the stack. */
9826 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9828 GEN_INT (-align_bytes)));
9829 RTX_FRAME_RELATED_P (insn) = 1;
9831 /* Replicate the return address on the stack so that return
9832 address can be reached via (argp - 1) slot. This is needed
9833 to implement macro RETURN_ADDR_RTX and intrinsic function
9834 expand_builtin_return_addr etc. */
9835 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9836 t = gen_frame_mem (Pmode, t);
9837 insn = emit_insn (gen_push (t));
9838 RTX_FRAME_RELATED_P (insn) = 1;
9840 /* For the purposes of frame and register save area addressing,
9841 we've started over with a new frame. */
9842 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9843 m->fs.realigned = true;
9846 if (frame_pointer_needed && !m->fs.fp_valid)
9848 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9849 slower on all targets. Also sdb doesn't like it. */
9850 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9851 RTX_FRAME_RELATED_P (insn) = 1;
9853 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
9855 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9856 RTX_FRAME_RELATED_P (insn) = 1;
9858 if (m->fs.cfa_reg == stack_pointer_rtx)
9859 m->fs.cfa_reg = hard_frame_pointer_rtx;
9860 m->fs.fp_offset = m->fs.sp_offset;
9861 m->fs.fp_valid = true;
9865 int_registers_saved = (frame.nregs == 0);
9867 if (!int_registers_saved)
9869 /* If saving registers via PUSH, do so now. */
9870 if (!frame.save_regs_using_mov)
9872 ix86_emit_save_regs ();
9873 int_registers_saved = true;
9874 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9877 /* When using red zone we may start register saving before allocating
9878 the stack frame saving one cycle of the prologue. However, avoid
9879 doing this if we have to probe the stack; at least on x86_64 the
9880 stack probe can turn into a call that clobbers a red zone location. */
9881 else if (ix86_using_red_zone ()
9882 && (! TARGET_STACK_PROBE
9883 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9885 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9886 int_registers_saved = true;
9890 if (stack_realign_fp)
9892 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9893 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9895 /* The computation of the size of the re-aligned stack frame means
9896 that we must allocate the size of the register save area before
9897 performing the actual alignment. Otherwise we cannot guarantee
9898 that there's enough storage above the realignment point. */
9899 if (m->fs.sp_offset != frame.sse_reg_save_offset)
9900 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9901 GEN_INT (m->fs.sp_offset
9902 - frame.sse_reg_save_offset),
9905 /* Align the stack. */
9906 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9908 GEN_INT (-align_bytes)));
9910 /* For the purposes of register save area addressing, the stack
9911 pointer is no longer valid. As for the value of sp_offset,
9912 see ix86_compute_frame_layout, which we need to match in order
9913 to pass verification of stack_pointer_offset at the end. */
9914 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
9915 m->fs.sp_valid = false;
9918 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9920 if (flag_stack_usage_info)
9922 /* We start to count from ARG_POINTER. */
9923 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9925 /* If it was realigned, take into account the fake frame. */
9926 if (stack_realign_drap)
9928 if (ix86_static_chain_on_stack)
9929 stack_size += UNITS_PER_WORD;
9931 if (!call_used_regs[REGNO (crtl->drap_reg)])
9932 stack_size += UNITS_PER_WORD;
9934 /* This over-estimates by 1 minimal-stack-alignment-unit but
9935 mitigates that by counting in the new return address slot. */
9936 current_function_dynamic_stack_size
9937 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9940 current_function_static_stack_size = stack_size;
9943 /* The stack has already been decremented by the instruction calling us
9944 so probe if the size is non-negative to preserve the protection area. */
9945 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9947 /* We expect the registers to be saved when probes are used. */
9948 gcc_assert (int_registers_saved);
9950 if (STACK_CHECK_MOVING_SP)
9952 ix86_adjust_stack_and_probe (allocate);
9957 HOST_WIDE_INT size = allocate;
9959 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9960 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9962 if (TARGET_STACK_PROBE)
9963 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9965 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9971 else if (!ix86_target_stack_probe ()
9972 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9974 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9975 GEN_INT (-allocate), -1,
9976 m->fs.cfa_reg == stack_pointer_rtx);
9980 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9982 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
9984 bool eax_live = false;
9985 bool r10_live = false;
9988 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
9989 if (!TARGET_64BIT_MS_ABI)
9990 eax_live = ix86_eax_live_at_start_p ();
9994 emit_insn (gen_push (eax));
9995 allocate -= UNITS_PER_WORD;
9999 r10 = gen_rtx_REG (Pmode, R10_REG);
10000 emit_insn (gen_push (r10));
10001 allocate -= UNITS_PER_WORD;
10004 emit_move_insn (eax, GEN_INT (allocate));
10005 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10007 /* Use the fact that AX still contains ALLOCATE. */
10008 adjust_stack_insn = (TARGET_64BIT
10009 ? gen_pro_epilogue_adjust_stack_di_sub
10010 : gen_pro_epilogue_adjust_stack_si_sub);
10012 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10013 stack_pointer_rtx, eax));
10015 /* Note that SEH directives need to continue tracking the stack
10016 pointer even after the frame pointer has been set up. */
10017 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10019 if (m->fs.cfa_reg == stack_pointer_rtx)
10020 m->fs.cfa_offset += allocate;
10022 RTX_FRAME_RELATED_P (insn) = 1;
10023 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10024 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10025 plus_constant (stack_pointer_rtx,
10028 m->fs.sp_offset += allocate;
10030 if (r10_live && eax_live)
10032 t = choose_baseaddr (m->fs.sp_offset - allocate);
10033 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10034 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10035 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10037 else if (eax_live || r10_live)
10039 t = choose_baseaddr (m->fs.sp_offset - allocate);
10040 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10043 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10045 /* If we havn't already set up the frame pointer, do so now. */
10046 if (frame_pointer_needed && !m->fs.fp_valid)
10048 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10049 GEN_INT (frame.stack_pointer_offset
10050 - frame.hard_frame_pointer_offset));
10051 insn = emit_insn (insn);
10052 RTX_FRAME_RELATED_P (insn) = 1;
10053 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10055 if (m->fs.cfa_reg == stack_pointer_rtx)
10056 m->fs.cfa_reg = hard_frame_pointer_rtx;
10057 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10058 m->fs.fp_valid = true;
10061 if (!int_registers_saved)
10062 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10063 if (frame.nsseregs)
10064 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10066 pic_reg_used = false;
10067 if (pic_offset_table_rtx
10068 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10071 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10073 if (alt_pic_reg_used != INVALID_REGNUM)
10074 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10076 pic_reg_used = true;
10083 if (ix86_cmodel == CM_LARGE_PIC)
10085 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10086 rtx label = gen_label_rtx ();
10087 emit_label (label);
10088 LABEL_PRESERVE_P (label) = 1;
10089 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10090 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10091 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10092 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10093 pic_offset_table_rtx, tmp_reg));
10096 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10100 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10101 RTX_FRAME_RELATED_P (insn) = 1;
10102 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10106 /* In the pic_reg_used case, make sure that the got load isn't deleted
10107 when mcount needs it. Blockage to avoid call movement across mcount
10108 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10110 if (crtl->profile && !flag_fentry && pic_reg_used)
10111 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10113 if (crtl->drap_reg && !crtl->stack_realign_needed)
10115 /* vDRAP is setup but after reload it turns out stack realign
10116 isn't necessary, here we will emit prologue to setup DRAP
10117 without stack realign adjustment */
10118 t = choose_baseaddr (0);
10119 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10122 /* Prevent instructions from being scheduled into register save push
10123 sequence when access to the redzone area is done through frame pointer.
10124 The offset between the frame pointer and the stack pointer is calculated
10125 relative to the value of the stack pointer at the end of the function
10126 prologue, and moving instructions that access redzone area via frame
10127 pointer inside push sequence violates this assumption. */
10128 if (frame_pointer_needed && frame.red_zone_size)
10129 emit_insn (gen_memory_blockage ());
10131 /* Emit cld instruction if stringops are used in the function. */
10132 if (TARGET_CLD && ix86_current_function_needs_cld)
10133 emit_insn (gen_cld ());
10135 /* SEH requires that the prologue end within 256 bytes of the start of
10136 the function. Prevent instruction schedules that would extend that. */
10138 emit_insn (gen_blockage ());
10141 /* Emit code to restore REG using a POP insn. */
10144 ix86_emit_restore_reg_using_pop (rtx reg)
10146 struct machine_function *m = cfun->machine;
10147 rtx insn = emit_insn (gen_pop (reg));
10149 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10150 m->fs.sp_offset -= UNITS_PER_WORD;
10152 if (m->fs.cfa_reg == crtl->drap_reg
10153 && REGNO (reg) == REGNO (crtl->drap_reg))
10155 /* Previously we'd represented the CFA as an expression
10156 like *(%ebp - 8). We've just popped that value from
10157 the stack, which means we need to reset the CFA to
10158 the drap register. This will remain until we restore
10159 the stack pointer. */
10160 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10161 RTX_FRAME_RELATED_P (insn) = 1;
10163 /* This means that the DRAP register is valid for addressing too. */
10164 m->fs.drap_valid = true;
10168 if (m->fs.cfa_reg == stack_pointer_rtx)
10170 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10171 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10172 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10173 RTX_FRAME_RELATED_P (insn) = 1;
10175 m->fs.cfa_offset -= UNITS_PER_WORD;
10178 /* When the frame pointer is the CFA, and we pop it, we are
10179 swapping back to the stack pointer as the CFA. This happens
10180 for stack frames that don't allocate other data, so we assume
10181 the stack pointer is now pointing at the return address, i.e.
10182 the function entry state, which makes the offset be 1 word. */
10183 if (reg == hard_frame_pointer_rtx)
10185 m->fs.fp_valid = false;
10186 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10188 m->fs.cfa_reg = stack_pointer_rtx;
10189 m->fs.cfa_offset -= UNITS_PER_WORD;
10191 add_reg_note (insn, REG_CFA_DEF_CFA,
10192 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10193 GEN_INT (m->fs.cfa_offset)));
10194 RTX_FRAME_RELATED_P (insn) = 1;
10199 /* Emit code to restore saved registers using POP insns. */
10202 ix86_emit_restore_regs_using_pop (void)
10204 unsigned int regno;
10206 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10207 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10208 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10211 /* Emit code and notes for the LEAVE instruction. */
10214 ix86_emit_leave (void)
10216 struct machine_function *m = cfun->machine;
10217 rtx insn = emit_insn (ix86_gen_leave ());
10219 ix86_add_queued_cfa_restore_notes (insn);
10221 gcc_assert (m->fs.fp_valid);
10222 m->fs.sp_valid = true;
10223 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10224 m->fs.fp_valid = false;
10226 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10228 m->fs.cfa_reg = stack_pointer_rtx;
10229 m->fs.cfa_offset = m->fs.sp_offset;
10231 add_reg_note (insn, REG_CFA_DEF_CFA,
10232 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10233 RTX_FRAME_RELATED_P (insn) = 1;
10234 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10239 /* Emit code to restore saved registers using MOV insns.
10240 First register is restored from CFA - CFA_OFFSET. */
10242 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10243 bool maybe_eh_return)
10245 struct machine_function *m = cfun->machine;
10246 unsigned int regno;
10248 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10249 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10251 rtx reg = gen_rtx_REG (Pmode, regno);
10254 mem = choose_baseaddr (cfa_offset);
10255 mem = gen_frame_mem (Pmode, mem);
10256 insn = emit_move_insn (reg, mem);
10258 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10260 /* Previously we'd represented the CFA as an expression
10261 like *(%ebp - 8). We've just popped that value from
10262 the stack, which means we need to reset the CFA to
10263 the drap register. This will remain until we restore
10264 the stack pointer. */
10265 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10266 RTX_FRAME_RELATED_P (insn) = 1;
10268 /* This means that the DRAP register is valid for addressing. */
10269 m->fs.drap_valid = true;
10272 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10274 cfa_offset -= UNITS_PER_WORD;
10278 /* Emit code to restore saved registers using MOV insns.
10279 First register is restored from CFA - CFA_OFFSET. */
10281 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10282 bool maybe_eh_return)
10284 unsigned int regno;
10286 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10287 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10289 rtx reg = gen_rtx_REG (V4SFmode, regno);
10292 mem = choose_baseaddr (cfa_offset);
10293 mem = gen_rtx_MEM (V4SFmode, mem);
10294 set_mem_align (mem, 128);
10295 emit_move_insn (reg, mem);
10297 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10303 /* Restore function stack, frame, and registers. */
10306 ix86_expand_epilogue (int style)
10308 struct machine_function *m = cfun->machine;
10309 struct machine_frame_state frame_state_save = m->fs;
10310 struct ix86_frame frame;
10311 bool restore_regs_via_mov;
10314 ix86_finalize_stack_realign_flags ();
10315 ix86_compute_frame_layout (&frame);
10317 m->fs.sp_valid = (!frame_pointer_needed
10318 || (current_function_sp_is_unchanging
10319 && !stack_realign_fp));
10320 gcc_assert (!m->fs.sp_valid
10321 || m->fs.sp_offset == frame.stack_pointer_offset);
10323 /* The FP must be valid if the frame pointer is present. */
10324 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10325 gcc_assert (!m->fs.fp_valid
10326 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10328 /* We must have *some* valid pointer to the stack frame. */
10329 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10331 /* The DRAP is never valid at this point. */
10332 gcc_assert (!m->fs.drap_valid);
10334 /* See the comment about red zone and frame
10335 pointer usage in ix86_expand_prologue. */
10336 if (frame_pointer_needed && frame.red_zone_size)
10337 emit_insn (gen_memory_blockage ());
10339 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10340 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10342 /* Determine the CFA offset of the end of the red-zone. */
10343 m->fs.red_zone_offset = 0;
10344 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10346 /* The red-zone begins below the return address. */
10347 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10349 /* When the register save area is in the aligned portion of
10350 the stack, determine the maximum runtime displacement that
10351 matches up with the aligned frame. */
10352 if (stack_realign_drap)
10353 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10357 /* Special care must be taken for the normal return case of a function
10358 using eh_return: the eax and edx registers are marked as saved, but
10359 not restored along this path. Adjust the save location to match. */
10360 if (crtl->calls_eh_return && style != 2)
10361 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10363 /* EH_RETURN requires the use of moves to function properly. */
10364 if (crtl->calls_eh_return)
10365 restore_regs_via_mov = true;
10366 /* SEH requires the use of pops to identify the epilogue. */
10367 else if (TARGET_SEH)
10368 restore_regs_via_mov = false;
10369 /* If we're only restoring one register and sp is not valid then
10370 using a move instruction to restore the register since it's
10371 less work than reloading sp and popping the register. */
10372 else if (!m->fs.sp_valid && frame.nregs <= 1)
10373 restore_regs_via_mov = true;
10374 else if (TARGET_EPILOGUE_USING_MOVE
10375 && cfun->machine->use_fast_prologue_epilogue
10376 && (frame.nregs > 1
10377 || m->fs.sp_offset != frame.reg_save_offset))
10378 restore_regs_via_mov = true;
10379 else if (frame_pointer_needed
10381 && m->fs.sp_offset != frame.reg_save_offset)
10382 restore_regs_via_mov = true;
10383 else if (frame_pointer_needed
10384 && TARGET_USE_LEAVE
10385 && cfun->machine->use_fast_prologue_epilogue
10386 && frame.nregs == 1)
10387 restore_regs_via_mov = true;
10389 restore_regs_via_mov = false;
10391 if (restore_regs_via_mov || frame.nsseregs)
10393 /* Ensure that the entire register save area is addressable via
10394 the stack pointer, if we will restore via sp. */
10396 && m->fs.sp_offset > 0x7fffffff
10397 && !(m->fs.fp_valid || m->fs.drap_valid)
10398 && (frame.nsseregs + frame.nregs) != 0)
10400 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10401 GEN_INT (m->fs.sp_offset
10402 - frame.sse_reg_save_offset),
10404 m->fs.cfa_reg == stack_pointer_rtx);
10408 /* If there are any SSE registers to restore, then we have to do it
10409 via moves, since there's obviously no pop for SSE regs. */
10410 if (frame.nsseregs)
10411 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10414 if (restore_regs_via_mov)
10419 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10421 /* eh_return epilogues need %ecx added to the stack pointer. */
10424 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10426 /* Stack align doesn't work with eh_return. */
10427 gcc_assert (!stack_realign_drap);
10428 /* Neither does regparm nested functions. */
10429 gcc_assert (!ix86_static_chain_on_stack);
10431 if (frame_pointer_needed)
10433 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10434 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10435 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10437 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10438 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10440 /* Note that we use SA as a temporary CFA, as the return
10441 address is at the proper place relative to it. We
10442 pretend this happens at the FP restore insn because
10443 prior to this insn the FP would be stored at the wrong
10444 offset relative to SA, and after this insn we have no
10445 other reasonable register to use for the CFA. We don't
10446 bother resetting the CFA to the SP for the duration of
10447 the return insn. */
10448 add_reg_note (insn, REG_CFA_DEF_CFA,
10449 plus_constant (sa, UNITS_PER_WORD));
10450 ix86_add_queued_cfa_restore_notes (insn);
10451 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10452 RTX_FRAME_RELATED_P (insn) = 1;
10454 m->fs.cfa_reg = sa;
10455 m->fs.cfa_offset = UNITS_PER_WORD;
10456 m->fs.fp_valid = false;
10458 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10459 const0_rtx, style, false);
10463 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10464 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10465 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10466 ix86_add_queued_cfa_restore_notes (insn);
10468 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10469 if (m->fs.cfa_offset != UNITS_PER_WORD)
10471 m->fs.cfa_offset = UNITS_PER_WORD;
10472 add_reg_note (insn, REG_CFA_DEF_CFA,
10473 plus_constant (stack_pointer_rtx,
10475 RTX_FRAME_RELATED_P (insn) = 1;
10478 m->fs.sp_offset = UNITS_PER_WORD;
10479 m->fs.sp_valid = true;
10484 /* SEH requires that the function end with (1) a stack adjustment
10485 if necessary, (2) a sequence of pops, and (3) a return or
10486 jump instruction. Prevent insns from the function body from
10487 being scheduled into this sequence. */
10490 /* Prevent a catch region from being adjacent to the standard
10491 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10492 several other flags that would be interesting to test are
10494 if (flag_non_call_exceptions)
10495 emit_insn (gen_nops (const1_rtx));
10497 emit_insn (gen_blockage ());
10500 /* First step is to deallocate the stack frame so that we can
10501 pop the registers. */
10502 if (!m->fs.sp_valid)
10504 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10505 GEN_INT (m->fs.fp_offset
10506 - frame.reg_save_offset),
10509 else if (m->fs.sp_offset != frame.reg_save_offset)
10511 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10512 GEN_INT (m->fs.sp_offset
10513 - frame.reg_save_offset),
10515 m->fs.cfa_reg == stack_pointer_rtx);
10518 ix86_emit_restore_regs_using_pop ();
10521 /* If we used a stack pointer and haven't already got rid of it,
10523 if (m->fs.fp_valid)
10525 /* If the stack pointer is valid and pointing at the frame
10526 pointer store address, then we only need a pop. */
10527 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10528 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10529 /* Leave results in shorter dependency chains on CPUs that are
10530 able to grok it fast. */
10531 else if (TARGET_USE_LEAVE
10532 || optimize_function_for_size_p (cfun)
10533 || !cfun->machine->use_fast_prologue_epilogue)
10534 ix86_emit_leave ();
10537 pro_epilogue_adjust_stack (stack_pointer_rtx,
10538 hard_frame_pointer_rtx,
10539 const0_rtx, style, !using_drap);
10540 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10546 int param_ptr_offset = UNITS_PER_WORD;
10549 gcc_assert (stack_realign_drap);
10551 if (ix86_static_chain_on_stack)
10552 param_ptr_offset += UNITS_PER_WORD;
10553 if (!call_used_regs[REGNO (crtl->drap_reg)])
10554 param_ptr_offset += UNITS_PER_WORD;
10556 insn = emit_insn (gen_rtx_SET
10557 (VOIDmode, stack_pointer_rtx,
10558 gen_rtx_PLUS (Pmode,
10560 GEN_INT (-param_ptr_offset))));
10561 m->fs.cfa_reg = stack_pointer_rtx;
10562 m->fs.cfa_offset = param_ptr_offset;
10563 m->fs.sp_offset = param_ptr_offset;
10564 m->fs.realigned = false;
10566 add_reg_note (insn, REG_CFA_DEF_CFA,
10567 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10568 GEN_INT (param_ptr_offset)));
10569 RTX_FRAME_RELATED_P (insn) = 1;
10571 if (!call_used_regs[REGNO (crtl->drap_reg)])
10572 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10575 /* At this point the stack pointer must be valid, and we must have
10576 restored all of the registers. We may not have deallocated the
10577 entire stack frame. We've delayed this until now because it may
10578 be possible to merge the local stack deallocation with the
10579 deallocation forced by ix86_static_chain_on_stack. */
10580 gcc_assert (m->fs.sp_valid);
10581 gcc_assert (!m->fs.fp_valid);
10582 gcc_assert (!m->fs.realigned);
10583 if (m->fs.sp_offset != UNITS_PER_WORD)
10585 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10586 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10590 /* Sibcall epilogues don't want a return instruction. */
10593 m->fs = frame_state_save;
10597 /* Emit vzeroupper if needed. */
10598 if (TARGET_VZEROUPPER
10599 && !TREE_THIS_VOLATILE (cfun->decl)
10600 && !cfun->machine->caller_return_avx256_p)
10601 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10603 if (crtl->args.pops_args && crtl->args.size)
10605 rtx popc = GEN_INT (crtl->args.pops_args);
10607 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10608 address, do explicit add, and jump indirectly to the caller. */
10610 if (crtl->args.pops_args >= 65536)
10612 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10615 /* There is no "pascal" calling convention in any 64bit ABI. */
10616 gcc_assert (!TARGET_64BIT);
10618 insn = emit_insn (gen_pop (ecx));
10619 m->fs.cfa_offset -= UNITS_PER_WORD;
10620 m->fs.sp_offset -= UNITS_PER_WORD;
10622 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10623 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10624 add_reg_note (insn, REG_CFA_REGISTER,
10625 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10626 RTX_FRAME_RELATED_P (insn) = 1;
10628 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10630 emit_jump_insn (gen_return_indirect_internal (ecx));
10633 emit_jump_insn (gen_return_pop_internal (popc));
10636 emit_jump_insn (gen_return_internal ());
10638 /* Restore the state back to the state from the prologue,
10639 so that it's correct for the next epilogue. */
10640 m->fs = frame_state_save;
10643 /* Reset from the function's potential modifications. */
10646 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10647 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10649 if (pic_offset_table_rtx)
10650 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10652 /* Mach-O doesn't support labels at the end of objects, so if
10653 it looks like we might want one, insert a NOP. */
10655 rtx insn = get_last_insn ();
10658 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10659 insn = PREV_INSN (insn);
10663 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10664 fputs ("\tnop\n", file);
10670 /* Return a scratch register to use in the split stack prologue. The
10671 split stack prologue is used for -fsplit-stack. It is the first
10672 instructions in the function, even before the regular prologue.
10673 The scratch register can be any caller-saved register which is not
10674 used for parameters or for the static chain. */
10676 static unsigned int
10677 split_stack_prologue_scratch_regno (void)
10686 is_fastcall = (lookup_attribute ("fastcall",
10687 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10689 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10693 if (DECL_STATIC_CHAIN (cfun->decl))
10695 sorry ("-fsplit-stack does not support fastcall with "
10696 "nested function");
10697 return INVALID_REGNUM;
10701 else if (regparm < 3)
10703 if (!DECL_STATIC_CHAIN (cfun->decl))
10709 sorry ("-fsplit-stack does not support 2 register "
10710 " parameters for a nested function");
10711 return INVALID_REGNUM;
10718 /* FIXME: We could make this work by pushing a register
10719 around the addition and comparison. */
10720 sorry ("-fsplit-stack does not support 3 register parameters");
10721 return INVALID_REGNUM;
10726 /* A SYMBOL_REF for the function which allocates new stackspace for
10729 static GTY(()) rtx split_stack_fn;
10731 /* A SYMBOL_REF for the more stack function when using the large
10734 static GTY(()) rtx split_stack_fn_large;
10736 /* Handle -fsplit-stack. These are the first instructions in the
10737 function, even before the regular prologue. */
10740 ix86_expand_split_stack_prologue (void)
10742 struct ix86_frame frame;
10743 HOST_WIDE_INT allocate;
10744 unsigned HOST_WIDE_INT args_size;
10745 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10746 rtx scratch_reg = NULL_RTX;
10747 rtx varargs_label = NULL_RTX;
10750 gcc_assert (flag_split_stack && reload_completed);
10752 ix86_finalize_stack_realign_flags ();
10753 ix86_compute_frame_layout (&frame);
10754 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10756 /* This is the label we will branch to if we have enough stack
10757 space. We expect the basic block reordering pass to reverse this
10758 branch if optimizing, so that we branch in the unlikely case. */
10759 label = gen_label_rtx ();
10761 /* We need to compare the stack pointer minus the frame size with
10762 the stack boundary in the TCB. The stack boundary always gives
10763 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10764 can compare directly. Otherwise we need to do an addition. */
10766 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10767 UNSPEC_STACK_CHECK);
10768 limit = gen_rtx_CONST (Pmode, limit);
10769 limit = gen_rtx_MEM (Pmode, limit);
10770 if (allocate < SPLIT_STACK_AVAILABLE)
10771 current = stack_pointer_rtx;
10774 unsigned int scratch_regno;
10777 /* We need a scratch register to hold the stack pointer minus
10778 the required frame size. Since this is the very start of the
10779 function, the scratch register can be any caller-saved
10780 register which is not used for parameters. */
10781 offset = GEN_INT (- allocate);
10782 scratch_regno = split_stack_prologue_scratch_regno ();
10783 if (scratch_regno == INVALID_REGNUM)
10785 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10786 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10788 /* We don't use ix86_gen_add3 in this case because it will
10789 want to split to lea, but when not optimizing the insn
10790 will not be split after this point. */
10791 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10792 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10797 emit_move_insn (scratch_reg, offset);
10798 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
10799 stack_pointer_rtx));
10801 current = scratch_reg;
10804 ix86_expand_branch (GEU, current, limit, label);
10805 jump_insn = get_last_insn ();
10806 JUMP_LABEL (jump_insn) = label;
10808 /* Mark the jump as very likely to be taken. */
10809 add_reg_note (jump_insn, REG_BR_PROB,
10810 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
10812 if (split_stack_fn == NULL_RTX)
10813 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10814 fn = split_stack_fn;
10816 /* Get more stack space. We pass in the desired stack space and the
10817 size of the arguments to copy to the new stack. In 32-bit mode
10818 we push the parameters; __morestack will return on a new stack
10819 anyhow. In 64-bit mode we pass the parameters in r10 and
10821 allocate_rtx = GEN_INT (allocate);
10822 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
10823 call_fusage = NULL_RTX;
10828 reg10 = gen_rtx_REG (Pmode, R10_REG);
10829 reg11 = gen_rtx_REG (Pmode, R11_REG);
10831 /* If this function uses a static chain, it will be in %r10.
10832 Preserve it across the call to __morestack. */
10833 if (DECL_STATIC_CHAIN (cfun->decl))
10837 rax = gen_rtx_REG (Pmode, AX_REG);
10838 emit_move_insn (rax, reg10);
10839 use_reg (&call_fusage, rax);
10842 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10844 HOST_WIDE_INT argval;
10846 /* When using the large model we need to load the address
10847 into a register, and we've run out of registers. So we
10848 switch to a different calling convention, and we call a
10849 different function: __morestack_large. We pass the
10850 argument size in the upper 32 bits of r10 and pass the
10851 frame size in the lower 32 bits. */
10852 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
10853 gcc_assert ((args_size & 0xffffffff) == args_size);
10855 if (split_stack_fn_large == NULL_RTX)
10856 split_stack_fn_large =
10857 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
10859 if (ix86_cmodel == CM_LARGE_PIC)
10863 label = gen_label_rtx ();
10864 emit_label (label);
10865 LABEL_PRESERVE_P (label) = 1;
10866 emit_insn (gen_set_rip_rex64 (reg10, label));
10867 emit_insn (gen_set_got_offset_rex64 (reg11, label));
10868 emit_insn (gen_adddi3 (reg10, reg10, reg11));
10869 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
10871 x = gen_rtx_CONST (Pmode, x);
10872 emit_move_insn (reg11, x);
10873 x = gen_rtx_PLUS (Pmode, reg10, reg11);
10874 x = gen_const_mem (Pmode, x);
10875 emit_move_insn (reg11, x);
10878 emit_move_insn (reg11, split_stack_fn_large);
10882 argval = ((args_size << 16) << 16) + allocate;
10883 emit_move_insn (reg10, GEN_INT (argval));
10887 emit_move_insn (reg10, allocate_rtx);
10888 emit_move_insn (reg11, GEN_INT (args_size));
10889 use_reg (&call_fusage, reg11);
10892 use_reg (&call_fusage, reg10);
10896 emit_insn (gen_push (GEN_INT (args_size)));
10897 emit_insn (gen_push (allocate_rtx));
10899 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
10900 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10902 add_function_usage_to (call_insn, call_fusage);
10904 /* In order to make call/return prediction work right, we now need
10905 to execute a return instruction. See
10906 libgcc/config/i386/morestack.S for the details on how this works.
10908 For flow purposes gcc must not see this as a return
10909 instruction--we need control flow to continue at the subsequent
10910 label. Therefore, we use an unspec. */
10911 gcc_assert (crtl->args.pops_args < 65536);
10912 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
10914 /* If we are in 64-bit mode and this function uses a static chain,
10915 we saved %r10 in %rax before calling _morestack. */
10916 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
10917 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
10918 gen_rtx_REG (Pmode, AX_REG));
10920 /* If this function calls va_start, we need to store a pointer to
10921 the arguments on the old stack, because they may not have been
10922 all copied to the new stack. At this point the old stack can be
10923 found at the frame pointer value used by __morestack, because
10924 __morestack has set that up before calling back to us. Here we
10925 store that pointer in a scratch register, and in
10926 ix86_expand_prologue we store the scratch register in a stack
10928 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10930 unsigned int scratch_regno;
10934 scratch_regno = split_stack_prologue_scratch_regno ();
10935 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10936 frame_reg = gen_rtx_REG (Pmode, BP_REG);
10940 return address within this function
10941 return address of caller of this function
10943 So we add three words to get to the stack arguments.
10947 return address within this function
10948 first argument to __morestack
10949 second argument to __morestack
10950 return address of caller of this function
10952 So we add five words to get to the stack arguments.
10954 words = TARGET_64BIT ? 3 : 5;
10955 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10956 gen_rtx_PLUS (Pmode, frame_reg,
10957 GEN_INT (words * UNITS_PER_WORD))));
10959 varargs_label = gen_label_rtx ();
10960 emit_jump_insn (gen_jump (varargs_label));
10961 JUMP_LABEL (get_last_insn ()) = varargs_label;
10966 emit_label (label);
10967 LABEL_NUSES (label) = 1;
10969 /* If this function calls va_start, we now have to set the scratch
10970 register for the case where we do not call __morestack. In this
10971 case we need to set it based on the stack pointer. */
10972 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10974 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10975 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10976 GEN_INT (UNITS_PER_WORD))));
10978 emit_label (varargs_label);
10979 LABEL_NUSES (varargs_label) = 1;
10983 /* We may have to tell the dataflow pass that the split stack prologue
10984 is initializing a scratch register. */
10987 ix86_live_on_entry (bitmap regs)
10989 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10991 gcc_assert (flag_split_stack);
10992 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
10996 /* Extract the parts of an RTL expression that is a valid memory address
10997 for an instruction. Return 0 if the structure of the address is
10998 grossly off. Return -1 if the address contains ASHIFT, so it is not
10999 strictly valid, but still used for computing length of lea instruction. */
11002 ix86_decompose_address (rtx addr, struct ix86_address *out)
11004 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11005 rtx base_reg, index_reg;
11006 HOST_WIDE_INT scale = 1;
11007 rtx scale_rtx = NULL_RTX;
11010 enum ix86_address_seg seg = SEG_DEFAULT;
11012 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
11014 else if (GET_CODE (addr) == PLUS)
11016 rtx addends[4], op;
11024 addends[n++] = XEXP (op, 1);
11027 while (GET_CODE (op) == PLUS);
11032 for (i = n; i >= 0; --i)
11035 switch (GET_CODE (op))
11040 index = XEXP (op, 0);
11041 scale_rtx = XEXP (op, 1);
11047 index = XEXP (op, 0);
11048 tmp = XEXP (op, 1);
11049 if (!CONST_INT_P (tmp))
11051 scale = INTVAL (tmp);
11052 if ((unsigned HOST_WIDE_INT) scale > 3)
11054 scale = 1 << scale;
11058 if (XINT (op, 1) == UNSPEC_TP
11059 && TARGET_TLS_DIRECT_SEG_REFS
11060 && seg == SEG_DEFAULT)
11061 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11090 else if (GET_CODE (addr) == MULT)
11092 index = XEXP (addr, 0); /* index*scale */
11093 scale_rtx = XEXP (addr, 1);
11095 else if (GET_CODE (addr) == ASHIFT)
11097 /* We're called for lea too, which implements ashift on occasion. */
11098 index = XEXP (addr, 0);
11099 tmp = XEXP (addr, 1);
11100 if (!CONST_INT_P (tmp))
11102 scale = INTVAL (tmp);
11103 if ((unsigned HOST_WIDE_INT) scale > 3)
11105 scale = 1 << scale;
11109 disp = addr; /* displacement */
11111 /* Extract the integral value of scale. */
11114 if (!CONST_INT_P (scale_rtx))
11116 scale = INTVAL (scale_rtx);
11119 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11120 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11122 /* Avoid useless 0 displacement. */
11123 if (disp == const0_rtx && (base || index))
11126 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11127 if (base_reg && index_reg && scale == 1
11128 && (index_reg == arg_pointer_rtx
11129 || index_reg == frame_pointer_rtx
11130 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11133 tmp = base, base = index, index = tmp;
11134 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11137 /* Special case: %ebp cannot be encoded as a base without a displacement.
11141 && (base_reg == hard_frame_pointer_rtx
11142 || base_reg == frame_pointer_rtx
11143 || base_reg == arg_pointer_rtx
11144 || (REG_P (base_reg)
11145 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11146 || REGNO (base_reg) == R13_REG))))
11149 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11150 Avoid this by transforming to [%esi+0].
11151 Reload calls address legitimization without cfun defined, so we need
11152 to test cfun for being non-NULL. */
11153 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11154 && base_reg && !index_reg && !disp
11155 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11158 /* Special case: encode reg+reg instead of reg*2. */
11159 if (!base && index && scale == 2)
11160 base = index, base_reg = index_reg, scale = 1;
11162 /* Special case: scaling cannot be encoded without base or displacement. */
11163 if (!base && !disp && index && scale != 1)
11167 out->index = index;
11169 out->scale = scale;
11175 /* Return cost of the memory address x.
11176 For i386, it is better to use a complex address than let gcc copy
11177 the address into a reg and make a new pseudo. But not if the address
11178 requires to two regs - that would mean more pseudos with longer
11181 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11183 struct ix86_address parts;
11185 int ok = ix86_decompose_address (x, &parts);
11189 if (parts.base && GET_CODE (parts.base) == SUBREG)
11190 parts.base = SUBREG_REG (parts.base);
11191 if (parts.index && GET_CODE (parts.index) == SUBREG)
11192 parts.index = SUBREG_REG (parts.index);
11194 /* Attempt to minimize number of registers in the address. */
11196 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11198 && (!REG_P (parts.index)
11199 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11203 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11205 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11206 && parts.base != parts.index)
11209 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11210 since it's predecode logic can't detect the length of instructions
11211 and it degenerates to vector decoded. Increase cost of such
11212 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11213 to split such addresses or even refuse such addresses at all.
11215 Following addressing modes are affected:
11220 The first and last case may be avoidable by explicitly coding the zero in
11221 memory address, but I don't have AMD-K6 machine handy to check this
11225 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11226 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11227 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11233 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11234 this is used for to form addresses to local data when -fPIC is in
11238 darwin_local_data_pic (rtx disp)
11240 return (GET_CODE (disp) == UNSPEC
11241 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11244 /* Determine if a given RTX is a valid constant. We already know this
11245 satisfies CONSTANT_P. */
11248 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11250 switch (GET_CODE (x))
11255 if (GET_CODE (x) == PLUS)
11257 if (!CONST_INT_P (XEXP (x, 1)))
11262 if (TARGET_MACHO && darwin_local_data_pic (x))
11265 /* Only some unspecs are valid as "constants". */
11266 if (GET_CODE (x) == UNSPEC)
11267 switch (XINT (x, 1))
11270 case UNSPEC_GOTOFF:
11271 case UNSPEC_PLTOFF:
11272 return TARGET_64BIT;
11274 case UNSPEC_NTPOFF:
11275 x = XVECEXP (x, 0, 0);
11276 return (GET_CODE (x) == SYMBOL_REF
11277 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11278 case UNSPEC_DTPOFF:
11279 x = XVECEXP (x, 0, 0);
11280 return (GET_CODE (x) == SYMBOL_REF
11281 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11286 /* We must have drilled down to a symbol. */
11287 if (GET_CODE (x) == LABEL_REF)
11289 if (GET_CODE (x) != SYMBOL_REF)
11294 /* TLS symbols are never valid. */
11295 if (SYMBOL_REF_TLS_MODEL (x))
11298 /* DLLIMPORT symbols are never valid. */
11299 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11300 && SYMBOL_REF_DLLIMPORT_P (x))
11304 /* mdynamic-no-pic */
11305 if (MACHO_DYNAMIC_NO_PIC_P)
11306 return machopic_symbol_defined_p (x);
11311 if (GET_MODE (x) == TImode
11312 && x != CONST0_RTX (TImode)
11318 if (!standard_sse_constant_p (x))
11325 /* Otherwise we handle everything else in the move patterns. */
11329 /* Determine if it's legal to put X into the constant pool. This
11330 is not possible for the address of thread-local symbols, which
11331 is checked above. */
11334 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11336 /* We can always put integral constants and vectors in memory. */
11337 switch (GET_CODE (x))
11347 return !ix86_legitimate_constant_p (mode, x);
11351 /* Nonzero if the constant value X is a legitimate general operand
11352 when generating PIC code. It is given that flag_pic is on and
11353 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11356 legitimate_pic_operand_p (rtx x)
11360 switch (GET_CODE (x))
11363 inner = XEXP (x, 0);
11364 if (GET_CODE (inner) == PLUS
11365 && CONST_INT_P (XEXP (inner, 1)))
11366 inner = XEXP (inner, 0);
11368 /* Only some unspecs are valid as "constants". */
11369 if (GET_CODE (inner) == UNSPEC)
11370 switch (XINT (inner, 1))
11373 case UNSPEC_GOTOFF:
11374 case UNSPEC_PLTOFF:
11375 return TARGET_64BIT;
11377 x = XVECEXP (inner, 0, 0);
11378 return (GET_CODE (x) == SYMBOL_REF
11379 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11380 case UNSPEC_MACHOPIC_OFFSET:
11381 return legitimate_pic_address_disp_p (x);
11389 return legitimate_pic_address_disp_p (x);
11396 /* Determine if a given CONST RTX is a valid memory displacement
11400 legitimate_pic_address_disp_p (rtx disp)
11404 /* In 64bit mode we can allow direct addresses of symbols and labels
11405 when they are not dynamic symbols. */
11408 rtx op0 = disp, op1;
11410 switch (GET_CODE (disp))
11416 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11418 op0 = XEXP (XEXP (disp, 0), 0);
11419 op1 = XEXP (XEXP (disp, 0), 1);
11420 if (!CONST_INT_P (op1)
11421 || INTVAL (op1) >= 16*1024*1024
11422 || INTVAL (op1) < -16*1024*1024)
11424 if (GET_CODE (op0) == LABEL_REF)
11426 if (GET_CODE (op0) != SYMBOL_REF)
11431 /* TLS references should always be enclosed in UNSPEC. */
11432 if (SYMBOL_REF_TLS_MODEL (op0))
11434 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11435 && ix86_cmodel != CM_LARGE_PIC)
11443 if (GET_CODE (disp) != CONST)
11445 disp = XEXP (disp, 0);
11449 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11450 of GOT tables. We should not need these anyway. */
11451 if (GET_CODE (disp) != UNSPEC
11452 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11453 && XINT (disp, 1) != UNSPEC_GOTOFF
11454 && XINT (disp, 1) != UNSPEC_PCREL
11455 && XINT (disp, 1) != UNSPEC_PLTOFF))
11458 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11459 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11465 if (GET_CODE (disp) == PLUS)
11467 if (!CONST_INT_P (XEXP (disp, 1)))
11469 disp = XEXP (disp, 0);
11473 if (TARGET_MACHO && darwin_local_data_pic (disp))
11476 if (GET_CODE (disp) != UNSPEC)
11479 switch (XINT (disp, 1))
11484 /* We need to check for both symbols and labels because VxWorks loads
11485 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11487 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11488 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11489 case UNSPEC_GOTOFF:
11490 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11491 While ABI specify also 32bit relocation but we don't produce it in
11492 small PIC model at all. */
11493 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11494 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11496 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11498 case UNSPEC_GOTTPOFF:
11499 case UNSPEC_GOTNTPOFF:
11500 case UNSPEC_INDNTPOFF:
11503 disp = XVECEXP (disp, 0, 0);
11504 return (GET_CODE (disp) == SYMBOL_REF
11505 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11506 case UNSPEC_NTPOFF:
11507 disp = XVECEXP (disp, 0, 0);
11508 return (GET_CODE (disp) == SYMBOL_REF
11509 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11510 case UNSPEC_DTPOFF:
11511 disp = XVECEXP (disp, 0, 0);
11512 return (GET_CODE (disp) == SYMBOL_REF
11513 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11519 /* Recognizes RTL expressions that are valid memory addresses for an
11520 instruction. The MODE argument is the machine mode for the MEM
11521 expression that wants to use this address.
11523 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11524 convert common non-canonical forms to canonical form so that they will
11528 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11529 rtx addr, bool strict)
11531 struct ix86_address parts;
11532 rtx base, index, disp;
11533 HOST_WIDE_INT scale;
11535 if (ix86_decompose_address (addr, &parts) <= 0)
11536 /* Decomposition failed. */
11540 index = parts.index;
11542 scale = parts.scale;
11544 /* Validate base register.
11546 Don't allow SUBREG's that span more than a word here. It can lead to spill
11547 failures when the base is one word out of a two word structure, which is
11548 represented internally as a DImode int. */
11556 else if (GET_CODE (base) == SUBREG
11557 && REG_P (SUBREG_REG (base))
11558 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
11560 reg = SUBREG_REG (base);
11562 /* Base is not a register. */
11565 if (GET_MODE (base) != Pmode)
11566 /* Base is not in Pmode. */
11569 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11570 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11571 /* Base is not valid. */
11575 /* Validate index register.
11577 Don't allow SUBREG's that span more than a word here -- same as above. */
11585 else if (GET_CODE (index) == SUBREG
11586 && REG_P (SUBREG_REG (index))
11587 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
11589 reg = SUBREG_REG (index);
11591 /* Index is not a register. */
11594 if (GET_MODE (index) != Pmode)
11595 /* Index is not in Pmode. */
11598 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11599 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11600 /* Index is not valid. */
11604 /* Validate scale factor. */
11608 /* Scale without index. */
11611 if (scale != 2 && scale != 4 && scale != 8)
11612 /* Scale is not a valid multiplier. */
11616 /* Validate displacement. */
11619 if (GET_CODE (disp) == CONST
11620 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11621 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11622 switch (XINT (XEXP (disp, 0), 1))
11624 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11625 used. While ABI specify also 32bit relocations, we don't produce
11626 them at all and use IP relative instead. */
11628 case UNSPEC_GOTOFF:
11629 gcc_assert (flag_pic);
11631 goto is_legitimate_pic;
11633 /* 64bit address unspec. */
11636 case UNSPEC_GOTPCREL:
11638 gcc_assert (flag_pic);
11639 goto is_legitimate_pic;
11641 case UNSPEC_GOTTPOFF:
11642 case UNSPEC_GOTNTPOFF:
11643 case UNSPEC_INDNTPOFF:
11644 case UNSPEC_NTPOFF:
11645 case UNSPEC_DTPOFF:
11648 case UNSPEC_STACK_CHECK:
11649 gcc_assert (flag_split_stack);
11653 /* Invalid address unspec. */
11657 else if (SYMBOLIC_CONST (disp)
11661 && MACHOPIC_INDIRECT
11662 && !machopic_operand_p (disp)
11668 if (TARGET_64BIT && (index || base))
11670 /* foo@dtpoff(%rX) is ok. */
11671 if (GET_CODE (disp) != CONST
11672 || GET_CODE (XEXP (disp, 0)) != PLUS
11673 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11674 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11675 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11676 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11677 /* Non-constant pic memory reference. */
11680 else if ((!TARGET_MACHO || flag_pic)
11681 && ! legitimate_pic_address_disp_p (disp))
11682 /* Displacement is an invalid pic construct. */
11685 else if (MACHO_DYNAMIC_NO_PIC_P
11686 && !ix86_legitimate_constant_p (Pmode, disp))
11687 /* displacment must be referenced via non_lazy_pointer */
11691 /* This code used to verify that a symbolic pic displacement
11692 includes the pic_offset_table_rtx register.
11694 While this is good idea, unfortunately these constructs may
11695 be created by "adds using lea" optimization for incorrect
11704 This code is nonsensical, but results in addressing
11705 GOT table with pic_offset_table_rtx base. We can't
11706 just refuse it easily, since it gets matched by
11707 "addsi3" pattern, that later gets split to lea in the
11708 case output register differs from input. While this
11709 can be handled by separate addsi pattern for this case
11710 that never results in lea, this seems to be easier and
11711 correct fix for crash to disable this test. */
11713 else if (GET_CODE (disp) != LABEL_REF
11714 && !CONST_INT_P (disp)
11715 && (GET_CODE (disp) != CONST
11716 || !ix86_legitimate_constant_p (Pmode, disp))
11717 && (GET_CODE (disp) != SYMBOL_REF
11718 || !ix86_legitimate_constant_p (Pmode, disp)))
11719 /* Displacement is not constant. */
11721 else if (TARGET_64BIT
11722 && !x86_64_immediate_operand (disp, VOIDmode))
11723 /* Displacement is out of range. */
11727 /* Everything looks valid. */
11731 /* Determine if a given RTX is a valid constant address. */
11734 constant_address_p (rtx x)
11736 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11739 /* Return a unique alias set for the GOT. */
11741 static alias_set_type
11742 ix86_GOT_alias_set (void)
11744 static alias_set_type set = -1;
11746 set = new_alias_set ();
11750 /* Return a legitimate reference for ORIG (an address) using the
11751 register REG. If REG is 0, a new pseudo is generated.
11753 There are two types of references that must be handled:
11755 1. Global data references must load the address from the GOT, via
11756 the PIC reg. An insn is emitted to do this load, and the reg is
11759 2. Static data references, constant pool addresses, and code labels
11760 compute the address as an offset from the GOT, whose base is in
11761 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11762 differentiate them from global data objects. The returned
11763 address is the PIC reg + an unspec constant.
11765 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11766 reg also appears in the address. */
11769 legitimize_pic_address (rtx orig, rtx reg)
11772 rtx new_rtx = orig;
11776 if (TARGET_MACHO && !TARGET_64BIT)
11779 reg = gen_reg_rtx (Pmode);
11780 /* Use the generic Mach-O PIC machinery. */
11781 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11785 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11787 else if (TARGET_64BIT
11788 && ix86_cmodel != CM_SMALL_PIC
11789 && gotoff_operand (addr, Pmode))
11792 /* This symbol may be referenced via a displacement from the PIC
11793 base address (@GOTOFF). */
11795 if (reload_in_progress)
11796 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11797 if (GET_CODE (addr) == CONST)
11798 addr = XEXP (addr, 0);
11799 if (GET_CODE (addr) == PLUS)
11801 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11803 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11806 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11807 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11809 tmpreg = gen_reg_rtx (Pmode);
11812 emit_move_insn (tmpreg, new_rtx);
11816 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11817 tmpreg, 1, OPTAB_DIRECT);
11820 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11822 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11824 /* This symbol may be referenced via a displacement from the PIC
11825 base address (@GOTOFF). */
11827 if (reload_in_progress)
11828 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11829 if (GET_CODE (addr) == CONST)
11830 addr = XEXP (addr, 0);
11831 if (GET_CODE (addr) == PLUS)
11833 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11835 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11838 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11839 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11840 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11844 emit_move_insn (reg, new_rtx);
11848 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11849 /* We can't use @GOTOFF for text labels on VxWorks;
11850 see gotoff_operand. */
11851 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11853 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11855 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11856 return legitimize_dllimport_symbol (addr, true);
11857 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
11858 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11859 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11861 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
11862 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11866 /* For x64 PE-COFF there is no GOT table. So we use address
11868 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
11870 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
11871 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11874 reg = gen_reg_rtx (Pmode);
11875 emit_move_insn (reg, new_rtx);
11878 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11880 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
11881 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11882 new_rtx = gen_const_mem (Pmode, new_rtx);
11883 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11886 reg = gen_reg_rtx (Pmode);
11887 /* Use directly gen_movsi, otherwise the address is loaded
11888 into register for CSE. We don't want to CSE this addresses,
11889 instead we CSE addresses from the GOT table, so skip this. */
11890 emit_insn (gen_movsi (reg, new_rtx));
11895 /* This symbol must be referenced via a load from the
11896 Global Offset Table (@GOT). */
11898 if (reload_in_progress)
11899 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11900 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11901 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11903 new_rtx = force_reg (Pmode, new_rtx);
11904 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11905 new_rtx = gen_const_mem (Pmode, new_rtx);
11906 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11909 reg = gen_reg_rtx (Pmode);
11910 emit_move_insn (reg, new_rtx);
11916 if (CONST_INT_P (addr)
11917 && !x86_64_immediate_operand (addr, VOIDmode))
11921 emit_move_insn (reg, addr);
11925 new_rtx = force_reg (Pmode, addr);
11927 else if (GET_CODE (addr) == CONST)
11929 addr = XEXP (addr, 0);
11931 /* We must match stuff we generate before. Assume the only
11932 unspecs that can get here are ours. Not that we could do
11933 anything with them anyway.... */
11934 if (GET_CODE (addr) == UNSPEC
11935 || (GET_CODE (addr) == PLUS
11936 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11938 gcc_assert (GET_CODE (addr) == PLUS);
11940 if (GET_CODE (addr) == PLUS)
11942 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11944 /* Check first to see if this is a constant offset from a @GOTOFF
11945 symbol reference. */
11946 if (gotoff_operand (op0, Pmode)
11947 && CONST_INT_P (op1))
11951 if (reload_in_progress)
11952 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11953 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11955 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11956 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11957 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11961 emit_move_insn (reg, new_rtx);
11967 if (INTVAL (op1) < -16*1024*1024
11968 || INTVAL (op1) >= 16*1024*1024)
11970 if (!x86_64_immediate_operand (op1, Pmode))
11971 op1 = force_reg (Pmode, op1);
11972 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11978 base = legitimize_pic_address (XEXP (addr, 0), reg);
11979 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11980 base == reg ? NULL_RTX : reg);
11982 if (CONST_INT_P (new_rtx))
11983 new_rtx = plus_constant (base, INTVAL (new_rtx));
11986 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11988 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11989 new_rtx = XEXP (new_rtx, 1);
11991 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11999 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12002 get_thread_pointer (bool to_reg)
12006 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12010 reg = gen_reg_rtx (Pmode);
12011 insn = gen_rtx_SET (VOIDmode, reg, tp);
12012 insn = emit_insn (insn);
12017 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12019 static GTY(()) rtx ix86_tls_symbol;
12022 ix86_tls_get_addr (void)
12024 if (!ix86_tls_symbol)
12027 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12028 ? "___tls_get_addr" : "__tls_get_addr");
12030 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12033 return ix86_tls_symbol;
12036 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12038 static GTY(()) rtx ix86_tls_module_base_symbol;
12041 ix86_tls_module_base (void)
12043 if (!ix86_tls_module_base_symbol)
12045 ix86_tls_module_base_symbol
12046 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12048 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12049 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12052 return ix86_tls_module_base_symbol;
12055 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12056 false if we expect this to be used for a memory address and true if
12057 we expect to load the address into a register. */
12060 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12062 rtx dest, base, off;
12063 rtx pic = NULL_RTX, tp = NULL_RTX;
12068 case TLS_MODEL_GLOBAL_DYNAMIC:
12069 dest = gen_reg_rtx (Pmode);
12074 pic = pic_offset_table_rtx;
12077 pic = gen_reg_rtx (Pmode);
12078 emit_insn (gen_set_got (pic));
12082 if (TARGET_GNU2_TLS)
12085 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12087 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12089 tp = get_thread_pointer (true);
12090 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12092 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12096 rtx caddr = ix86_tls_get_addr ();
12100 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12103 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12104 insns = get_insns ();
12107 RTL_CONST_CALL_P (insns) = 1;
12108 emit_libcall_block (insns, dest, rax, x);
12111 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12115 case TLS_MODEL_LOCAL_DYNAMIC:
12116 base = gen_reg_rtx (Pmode);
12121 pic = pic_offset_table_rtx;
12124 pic = gen_reg_rtx (Pmode);
12125 emit_insn (gen_set_got (pic));
12129 if (TARGET_GNU2_TLS)
12131 rtx tmp = ix86_tls_module_base ();
12134 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12136 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12138 tp = get_thread_pointer (true);
12139 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12140 gen_rtx_MINUS (Pmode, tmp, tp));
12144 rtx caddr = ix86_tls_get_addr ();
12148 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12151 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12152 insns = get_insns ();
12155 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
12156 share the LD_BASE result with other LD model accesses. */
12157 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12158 UNSPEC_TLS_LD_BASE);
12160 RTL_CONST_CALL_P (insns) = 1;
12161 emit_libcall_block (insns, base, rax, eqv);
12164 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12167 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12168 off = gen_rtx_CONST (Pmode, off);
12170 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12172 if (TARGET_GNU2_TLS)
12174 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12176 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12180 case TLS_MODEL_INITIAL_EXEC:
12183 if (TARGET_SUN_TLS)
12185 /* The Sun linker took the AMD64 TLS spec literally
12186 and can only handle %rax as destination of the
12187 initial executable code sequence. */
12189 dest = gen_reg_rtx (Pmode);
12190 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12195 type = UNSPEC_GOTNTPOFF;
12199 if (reload_in_progress)
12200 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12201 pic = pic_offset_table_rtx;
12202 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12204 else if (!TARGET_ANY_GNU_TLS)
12206 pic = gen_reg_rtx (Pmode);
12207 emit_insn (gen_set_got (pic));
12208 type = UNSPEC_GOTTPOFF;
12213 type = UNSPEC_INDNTPOFF;
12216 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12217 off = gen_rtx_CONST (Pmode, off);
12219 off = gen_rtx_PLUS (Pmode, pic, off);
12220 off = gen_const_mem (Pmode, off);
12221 set_mem_alias_set (off, ix86_GOT_alias_set ());
12223 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12225 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12226 off = force_reg (Pmode, off);
12227 return gen_rtx_PLUS (Pmode, base, off);
12231 base = get_thread_pointer (true);
12232 dest = gen_reg_rtx (Pmode);
12233 emit_insn (gen_subsi3 (dest, base, off));
12237 case TLS_MODEL_LOCAL_EXEC:
12238 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12239 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12240 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12241 off = gen_rtx_CONST (Pmode, off);
12243 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12245 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12246 return gen_rtx_PLUS (Pmode, base, off);
12250 base = get_thread_pointer (true);
12251 dest = gen_reg_rtx (Pmode);
12252 emit_insn (gen_subsi3 (dest, base, off));
12257 gcc_unreachable ();
12263 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12266 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12267 htab_t dllimport_map;
12270 get_dllimport_decl (tree decl)
12272 struct tree_map *h, in;
12275 const char *prefix;
12276 size_t namelen, prefixlen;
12281 if (!dllimport_map)
12282 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12284 in.hash = htab_hash_pointer (decl);
12285 in.base.from = decl;
12286 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12287 h = (struct tree_map *) *loc;
12291 *loc = h = ggc_alloc_tree_map ();
12293 h->base.from = decl;
12294 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12295 VAR_DECL, NULL, ptr_type_node);
12296 DECL_ARTIFICIAL (to) = 1;
12297 DECL_IGNORED_P (to) = 1;
12298 DECL_EXTERNAL (to) = 1;
12299 TREE_READONLY (to) = 1;
12301 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12302 name = targetm.strip_name_encoding (name);
12303 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12304 ? "*__imp_" : "*__imp__";
12305 namelen = strlen (name);
12306 prefixlen = strlen (prefix);
12307 imp_name = (char *) alloca (namelen + prefixlen + 1);
12308 memcpy (imp_name, prefix, prefixlen);
12309 memcpy (imp_name + prefixlen, name, namelen + 1);
12311 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12312 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12313 SET_SYMBOL_REF_DECL (rtl, to);
12314 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12316 rtl = gen_const_mem (Pmode, rtl);
12317 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12319 SET_DECL_RTL (to, rtl);
12320 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12325 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12326 true if we require the result be a register. */
12329 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12334 gcc_assert (SYMBOL_REF_DECL (symbol));
12335 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12337 x = DECL_RTL (imp_decl);
12339 x = force_reg (Pmode, x);
12343 /* Try machine-dependent ways of modifying an illegitimate address
12344 to be legitimate. If we find one, return the new, valid address.
12345 This macro is used in only one place: `memory_address' in explow.c.
12347 OLDX is the address as it was before break_out_memory_refs was called.
12348 In some cases it is useful to look at this to decide what needs to be done.
12350 It is always safe for this macro to do nothing. It exists to recognize
12351 opportunities to optimize the output.
12353 For the 80386, we handle X+REG by loading X into a register R and
12354 using R+REG. R will go in a general reg and indexing will be used.
12355 However, if REG is a broken-out memory address or multiplication,
12356 nothing needs to be done because REG can certainly go in a general reg.
12358 When -fpic is used, special handling is needed for symbolic references.
12359 See comments by legitimize_pic_address in i386.c for details. */
12362 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12363 enum machine_mode mode)
12368 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12370 return legitimize_tls_address (x, (enum tls_model) log, false);
12371 if (GET_CODE (x) == CONST
12372 && GET_CODE (XEXP (x, 0)) == PLUS
12373 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12374 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12376 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12377 (enum tls_model) log, false);
12378 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12381 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12383 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12384 return legitimize_dllimport_symbol (x, true);
12385 if (GET_CODE (x) == CONST
12386 && GET_CODE (XEXP (x, 0)) == PLUS
12387 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12388 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12390 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12391 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12395 if (flag_pic && SYMBOLIC_CONST (x))
12396 return legitimize_pic_address (x, 0);
12399 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12400 return machopic_indirect_data_reference (x, 0);
12403 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12404 if (GET_CODE (x) == ASHIFT
12405 && CONST_INT_P (XEXP (x, 1))
12406 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12409 log = INTVAL (XEXP (x, 1));
12410 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12411 GEN_INT (1 << log));
12414 if (GET_CODE (x) == PLUS)
12416 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12418 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12419 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12420 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12423 log = INTVAL (XEXP (XEXP (x, 0), 1));
12424 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12425 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12426 GEN_INT (1 << log));
12429 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12430 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12431 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12434 log = INTVAL (XEXP (XEXP (x, 1), 1));
12435 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12436 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12437 GEN_INT (1 << log));
12440 /* Put multiply first if it isn't already. */
12441 if (GET_CODE (XEXP (x, 1)) == MULT)
12443 rtx tmp = XEXP (x, 0);
12444 XEXP (x, 0) = XEXP (x, 1);
12449 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12450 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12451 created by virtual register instantiation, register elimination, and
12452 similar optimizations. */
12453 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12456 x = gen_rtx_PLUS (Pmode,
12457 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12458 XEXP (XEXP (x, 1), 0)),
12459 XEXP (XEXP (x, 1), 1));
12463 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12464 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12465 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12466 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12467 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12468 && CONSTANT_P (XEXP (x, 1)))
12471 rtx other = NULL_RTX;
12473 if (CONST_INT_P (XEXP (x, 1)))
12475 constant = XEXP (x, 1);
12476 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12478 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12480 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12481 other = XEXP (x, 1);
12489 x = gen_rtx_PLUS (Pmode,
12490 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12491 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12492 plus_constant (other, INTVAL (constant)));
12496 if (changed && ix86_legitimate_address_p (mode, x, false))
12499 if (GET_CODE (XEXP (x, 0)) == MULT)
12502 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12505 if (GET_CODE (XEXP (x, 1)) == MULT)
12508 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12512 && REG_P (XEXP (x, 1))
12513 && REG_P (XEXP (x, 0)))
12516 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12519 x = legitimize_pic_address (x, 0);
12522 if (changed && ix86_legitimate_address_p (mode, x, false))
12525 if (REG_P (XEXP (x, 0)))
12527 rtx temp = gen_reg_rtx (Pmode);
12528 rtx val = force_operand (XEXP (x, 1), temp);
12530 emit_move_insn (temp, val);
12532 XEXP (x, 1) = temp;
12536 else if (REG_P (XEXP (x, 1)))
12538 rtx temp = gen_reg_rtx (Pmode);
12539 rtx val = force_operand (XEXP (x, 0), temp);
12541 emit_move_insn (temp, val);
12543 XEXP (x, 0) = temp;
12551 /* Print an integer constant expression in assembler syntax. Addition
12552 and subtraction are the only arithmetic that may appear in these
12553 expressions. FILE is the stdio stream to write to, X is the rtx, and
12554 CODE is the operand print code from the output string. */
12557 output_pic_addr_const (FILE *file, rtx x, int code)
12561 switch (GET_CODE (x))
12564 gcc_assert (flag_pic);
12569 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12570 output_addr_const (file, x);
12573 const char *name = XSTR (x, 0);
12575 /* Mark the decl as referenced so that cgraph will
12576 output the function. */
12577 if (SYMBOL_REF_DECL (x))
12578 mark_decl_referenced (SYMBOL_REF_DECL (x));
12581 if (MACHOPIC_INDIRECT
12582 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12583 name = machopic_indirection_name (x, /*stub_p=*/true);
12585 assemble_name (file, name);
12587 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12588 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12589 fputs ("@PLT", file);
12596 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12597 assemble_name (asm_out_file, buf);
12601 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12605 /* This used to output parentheses around the expression,
12606 but that does not work on the 386 (either ATT or BSD assembler). */
12607 output_pic_addr_const (file, XEXP (x, 0), code);
12611 if (GET_MODE (x) == VOIDmode)
12613 /* We can use %d if the number is <32 bits and positive. */
12614 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12615 fprintf (file, "0x%lx%08lx",
12616 (unsigned long) CONST_DOUBLE_HIGH (x),
12617 (unsigned long) CONST_DOUBLE_LOW (x));
12619 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12622 /* We can't handle floating point constants;
12623 TARGET_PRINT_OPERAND must handle them. */
12624 output_operand_lossage ("floating constant misused");
12628 /* Some assemblers need integer constants to appear first. */
12629 if (CONST_INT_P (XEXP (x, 0)))
12631 output_pic_addr_const (file, XEXP (x, 0), code);
12633 output_pic_addr_const (file, XEXP (x, 1), code);
12637 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12638 output_pic_addr_const (file, XEXP (x, 1), code);
12640 output_pic_addr_const (file, XEXP (x, 0), code);
12646 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12647 output_pic_addr_const (file, XEXP (x, 0), code);
12649 output_pic_addr_const (file, XEXP (x, 1), code);
12651 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12655 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12657 bool f = i386_asm_output_addr_const_extra (file, x);
12662 gcc_assert (XVECLEN (x, 0) == 1);
12663 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12664 switch (XINT (x, 1))
12667 fputs ("@GOT", file);
12669 case UNSPEC_GOTOFF:
12670 fputs ("@GOTOFF", file);
12672 case UNSPEC_PLTOFF:
12673 fputs ("@PLTOFF", file);
12676 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12677 "(%rip)" : "[rip]", file);
12679 case UNSPEC_GOTPCREL:
12680 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12681 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12683 case UNSPEC_GOTTPOFF:
12684 /* FIXME: This might be @TPOFF in Sun ld too. */
12685 fputs ("@gottpoff", file);
12688 fputs ("@tpoff", file);
12690 case UNSPEC_NTPOFF:
12692 fputs ("@tpoff", file);
12694 fputs ("@ntpoff", file);
12696 case UNSPEC_DTPOFF:
12697 fputs ("@dtpoff", file);
12699 case UNSPEC_GOTNTPOFF:
12701 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12702 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12704 fputs ("@gotntpoff", file);
12706 case UNSPEC_INDNTPOFF:
12707 fputs ("@indntpoff", file);
12710 case UNSPEC_MACHOPIC_OFFSET:
12712 machopic_output_function_base_name (file);
12716 output_operand_lossage ("invalid UNSPEC as operand");
12722 output_operand_lossage ("invalid expression as operand");
12726 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12727 We need to emit DTP-relative relocations. */
12729 static void ATTRIBUTE_UNUSED
12730 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12732 fputs (ASM_LONG, file);
12733 output_addr_const (file, x);
12734 fputs ("@dtpoff", file);
12740 fputs (", 0", file);
12743 gcc_unreachable ();
12747 /* Return true if X is a representation of the PIC register. This copes
12748 with calls from ix86_find_base_term, where the register might have
12749 been replaced by a cselib value. */
12752 ix86_pic_register_p (rtx x)
12754 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12755 return (pic_offset_table_rtx
12756 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12758 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12761 /* Helper function for ix86_delegitimize_address.
12762 Attempt to delegitimize TLS local-exec accesses. */
12765 ix86_delegitimize_tls_address (rtx orig_x)
12767 rtx x = orig_x, unspec;
12768 struct ix86_address addr;
12770 if (!TARGET_TLS_DIRECT_SEG_REFS)
12774 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12776 if (ix86_decompose_address (x, &addr) == 0
12777 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
12778 || addr.disp == NULL_RTX
12779 || GET_CODE (addr.disp) != CONST)
12781 unspec = XEXP (addr.disp, 0);
12782 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12783 unspec = XEXP (unspec, 0);
12784 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12786 x = XVECEXP (unspec, 0, 0);
12787 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12788 if (unspec != XEXP (addr.disp, 0))
12789 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12792 rtx idx = addr.index;
12793 if (addr.scale != 1)
12794 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12795 x = gen_rtx_PLUS (Pmode, idx, x);
12798 x = gen_rtx_PLUS (Pmode, addr.base, x);
12799 if (MEM_P (orig_x))
12800 x = replace_equiv_address_nv (orig_x, x);
12804 /* In the name of slightly smaller debug output, and to cater to
12805 general assembler lossage, recognize PIC+GOTOFF and turn it back
12806 into a direct symbol reference.
12808 On Darwin, this is necessary to avoid a crash, because Darwin
12809 has a different PIC label for each routine but the DWARF debugging
12810 information is not associated with any particular routine, so it's
12811 necessary to remove references to the PIC label from RTL stored by
12812 the DWARF output code. */
12815 ix86_delegitimize_address (rtx x)
12817 rtx orig_x = delegitimize_mem_from_attrs (x);
12818 /* addend is NULL or some rtx if x is something+GOTOFF where
12819 something doesn't include the PIC register. */
12820 rtx addend = NULL_RTX;
12821 /* reg_addend is NULL or a multiple of some register. */
12822 rtx reg_addend = NULL_RTX;
12823 /* const_addend is NULL or a const_int. */
12824 rtx const_addend = NULL_RTX;
12825 /* This is the result, or NULL. */
12826 rtx result = NULL_RTX;
12835 if (GET_CODE (x) != CONST
12836 || GET_CODE (XEXP (x, 0)) != UNSPEC
12837 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
12838 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
12839 || !MEM_P (orig_x))
12840 return ix86_delegitimize_tls_address (orig_x);
12841 x = XVECEXP (XEXP (x, 0), 0, 0);
12842 if (GET_MODE (orig_x) != Pmode)
12844 x = simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
12851 if (GET_CODE (x) != PLUS
12852 || GET_CODE (XEXP (x, 1)) != CONST)
12853 return ix86_delegitimize_tls_address (orig_x);
12855 if (ix86_pic_register_p (XEXP (x, 0)))
12856 /* %ebx + GOT/GOTOFF */
12858 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12860 /* %ebx + %reg * scale + GOT/GOTOFF */
12861 reg_addend = XEXP (x, 0);
12862 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12863 reg_addend = XEXP (reg_addend, 1);
12864 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12865 reg_addend = XEXP (reg_addend, 0);
12868 reg_addend = NULL_RTX;
12869 addend = XEXP (x, 0);
12873 addend = XEXP (x, 0);
12875 x = XEXP (XEXP (x, 1), 0);
12876 if (GET_CODE (x) == PLUS
12877 && CONST_INT_P (XEXP (x, 1)))
12879 const_addend = XEXP (x, 1);
12883 if (GET_CODE (x) == UNSPEC
12884 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12885 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
12886 result = XVECEXP (x, 0, 0);
12888 if (TARGET_MACHO && darwin_local_data_pic (x)
12889 && !MEM_P (orig_x))
12890 result = XVECEXP (x, 0, 0);
12893 return ix86_delegitimize_tls_address (orig_x);
12896 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12898 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12901 /* If the rest of original X doesn't involve the PIC register, add
12902 addend and subtract pic_offset_table_rtx. This can happen e.g.
12904 leal (%ebx, %ecx, 4), %ecx
12906 movl foo@GOTOFF(%ecx), %edx
12907 in which case we return (%ecx - %ebx) + foo. */
12908 if (pic_offset_table_rtx)
12909 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12910 pic_offset_table_rtx),
12915 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12917 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
12918 if (result == NULL_RTX)
12924 /* If X is a machine specific address (i.e. a symbol or label being
12925 referenced as a displacement from the GOT implemented using an
12926 UNSPEC), then return the base term. Otherwise return X. */
12929 ix86_find_base_term (rtx x)
12935 if (GET_CODE (x) != CONST)
12937 term = XEXP (x, 0);
12938 if (GET_CODE (term) == PLUS
12939 && (CONST_INT_P (XEXP (term, 1))
12940 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
12941 term = XEXP (term, 0);
12942 if (GET_CODE (term) != UNSPEC
12943 || (XINT (term, 1) != UNSPEC_GOTPCREL
12944 && XINT (term, 1) != UNSPEC_PCREL))
12947 return XVECEXP (term, 0, 0);
12950 return ix86_delegitimize_address (x);
12954 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
12955 int fp, FILE *file)
12957 const char *suffix;
12959 if (mode == CCFPmode || mode == CCFPUmode)
12961 code = ix86_fp_compare_code_to_integer (code);
12965 code = reverse_condition (code);
13016 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13020 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13021 Those same assemblers have the same but opposite lossage on cmov. */
13022 if (mode == CCmode)
13023 suffix = fp ? "nbe" : "a";
13024 else if (mode == CCCmode)
13027 gcc_unreachable ();
13043 gcc_unreachable ();
13047 gcc_assert (mode == CCmode || mode == CCCmode);
13064 gcc_unreachable ();
13068 /* ??? As above. */
13069 gcc_assert (mode == CCmode || mode == CCCmode);
13070 suffix = fp ? "nb" : "ae";
13073 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13077 /* ??? As above. */
13078 if (mode == CCmode)
13080 else if (mode == CCCmode)
13081 suffix = fp ? "nb" : "ae";
13083 gcc_unreachable ();
13086 suffix = fp ? "u" : "p";
13089 suffix = fp ? "nu" : "np";
13092 gcc_unreachable ();
13094 fputs (suffix, file);
13097 /* Print the name of register X to FILE based on its machine mode and number.
13098 If CODE is 'w', pretend the mode is HImode.
13099 If CODE is 'b', pretend the mode is QImode.
13100 If CODE is 'k', pretend the mode is SImode.
13101 If CODE is 'q', pretend the mode is DImode.
13102 If CODE is 'x', pretend the mode is V4SFmode.
13103 If CODE is 't', pretend the mode is V8SFmode.
13104 If CODE is 'h', pretend the reg is the 'high' byte register.
13105 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13106 If CODE is 'd', duplicate the operand for AVX instruction.
13110 print_reg (rtx x, int code, FILE *file)
13113 bool duplicated = code == 'd' && TARGET_AVX;
13115 gcc_assert (x == pc_rtx
13116 || (REGNO (x) != ARG_POINTER_REGNUM
13117 && REGNO (x) != FRAME_POINTER_REGNUM
13118 && REGNO (x) != FLAGS_REG
13119 && REGNO (x) != FPSR_REG
13120 && REGNO (x) != FPCR_REG));
13122 if (ASSEMBLER_DIALECT == ASM_ATT)
13127 gcc_assert (TARGET_64BIT);
13128 fputs ("rip", file);
13132 if (code == 'w' || MMX_REG_P (x))
13134 else if (code == 'b')
13136 else if (code == 'k')
13138 else if (code == 'q')
13140 else if (code == 'y')
13142 else if (code == 'h')
13144 else if (code == 'x')
13146 else if (code == 't')
13149 code = GET_MODE_SIZE (GET_MODE (x));
13151 /* Irritatingly, AMD extended registers use different naming convention
13152 from the normal registers. */
13153 if (REX_INT_REG_P (x))
13155 gcc_assert (TARGET_64BIT);
13159 error ("extended registers have no high halves");
13162 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13165 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13168 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13171 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13174 error ("unsupported operand size for extended register");
13184 if (STACK_TOP_P (x))
13193 if (! ANY_FP_REG_P (x))
13194 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13199 reg = hi_reg_name[REGNO (x)];
13202 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13204 reg = qi_reg_name[REGNO (x)];
13207 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13209 reg = qi_high_reg_name[REGNO (x)];
13214 gcc_assert (!duplicated);
13216 fputs (hi_reg_name[REGNO (x)] + 1, file);
13221 gcc_unreachable ();
13227 if (ASSEMBLER_DIALECT == ASM_ATT)
13228 fprintf (file, ", %%%s", reg);
13230 fprintf (file, ", %s", reg);
13234 /* Locate some local-dynamic symbol still in use by this function
13235 so that we can print its name in some tls_local_dynamic_base
13239 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13243 if (GET_CODE (x) == SYMBOL_REF
13244 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13246 cfun->machine->some_ld_name = XSTR (x, 0);
13253 static const char *
13254 get_some_local_dynamic_name (void)
13258 if (cfun->machine->some_ld_name)
13259 return cfun->machine->some_ld_name;
13261 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13262 if (NONDEBUG_INSN_P (insn)
13263 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13264 return cfun->machine->some_ld_name;
13269 /* Meaning of CODE:
13270 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13271 C -- print opcode suffix for set/cmov insn.
13272 c -- like C, but print reversed condition
13273 F,f -- likewise, but for floating-point.
13274 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13276 R -- print the prefix for register names.
13277 z -- print the opcode suffix for the size of the current operand.
13278 Z -- likewise, with special suffixes for x87 instructions.
13279 * -- print a star (in certain assembler syntax)
13280 A -- print an absolute memory reference.
13281 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13282 s -- print a shift double count, followed by the assemblers argument
13284 b -- print the QImode name of the register for the indicated operand.
13285 %b0 would print %al if operands[0] is reg 0.
13286 w -- likewise, print the HImode name of the register.
13287 k -- likewise, print the SImode name of the register.
13288 q -- likewise, print the DImode name of the register.
13289 x -- likewise, print the V4SFmode name of the register.
13290 t -- likewise, print the V8SFmode name of the register.
13291 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13292 y -- print "st(0)" instead of "st" as a register.
13293 d -- print duplicated register operand for AVX instruction.
13294 D -- print condition for SSE cmp instruction.
13295 P -- if PIC, print an @PLT suffix.
13296 p -- print raw symbol name.
13297 X -- don't print any sort of PIC '@' suffix for a symbol.
13298 & -- print some in-use local-dynamic symbol name.
13299 H -- print a memory address offset by 8; used for sse high-parts
13300 Y -- print condition for XOP pcom* instruction.
13301 + -- print a branch hint as 'cs' or 'ds' prefix
13302 ; -- print a semicolon (after prefixes due to bug in older gas).
13303 @ -- print a segment register of thread base pointer load
13307 ix86_print_operand (FILE *file, rtx x, int code)
13314 if (ASSEMBLER_DIALECT == ASM_ATT)
13320 const char *name = get_some_local_dynamic_name ();
13322 output_operand_lossage ("'%%&' used without any "
13323 "local dynamic TLS references");
13325 assemble_name (file, name);
13330 switch (ASSEMBLER_DIALECT)
13337 /* Intel syntax. For absolute addresses, registers should not
13338 be surrounded by braces. */
13342 ix86_print_operand (file, x, 0);
13349 gcc_unreachable ();
13352 ix86_print_operand (file, x, 0);
13357 if (ASSEMBLER_DIALECT == ASM_ATT)
13362 if (ASSEMBLER_DIALECT == ASM_ATT)
13367 if (ASSEMBLER_DIALECT == ASM_ATT)
13372 if (ASSEMBLER_DIALECT == ASM_ATT)
13377 if (ASSEMBLER_DIALECT == ASM_ATT)
13382 if (ASSEMBLER_DIALECT == ASM_ATT)
13387 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13389 /* Opcodes don't get size suffixes if using Intel opcodes. */
13390 if (ASSEMBLER_DIALECT == ASM_INTEL)
13393 switch (GET_MODE_SIZE (GET_MODE (x)))
13412 output_operand_lossage
13413 ("invalid operand size for operand code '%c'", code);
13418 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13420 (0, "non-integer operand used with operand code '%c'", code);
13424 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13425 if (ASSEMBLER_DIALECT == ASM_INTEL)
13428 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13430 switch (GET_MODE_SIZE (GET_MODE (x)))
13433 #ifdef HAVE_AS_IX86_FILDS
13443 #ifdef HAVE_AS_IX86_FILDQ
13446 fputs ("ll", file);
13454 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13456 /* 387 opcodes don't get size suffixes
13457 if the operands are registers. */
13458 if (STACK_REG_P (x))
13461 switch (GET_MODE_SIZE (GET_MODE (x)))
13482 output_operand_lossage
13483 ("invalid operand type used with operand code '%c'", code);
13487 output_operand_lossage
13488 ("invalid operand size for operand code '%c'", code);
13506 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13508 ix86_print_operand (file, x, 0);
13509 fputs (", ", file);
13514 /* Little bit of braindamage here. The SSE compare instructions
13515 does use completely different names for the comparisons that the
13516 fp conditional moves. */
13519 switch (GET_CODE (x))
13522 fputs ("eq", file);
13525 fputs ("eq_us", file);
13528 fputs ("lt", file);
13531 fputs ("nge", file);
13534 fputs ("le", file);
13537 fputs ("ngt", file);
13540 fputs ("unord", file);
13543 fputs ("neq", file);
13546 fputs ("neq_oq", file);
13549 fputs ("ge", file);
13552 fputs ("nlt", file);
13555 fputs ("gt", file);
13558 fputs ("nle", file);
13561 fputs ("ord", file);
13564 output_operand_lossage ("operand is not a condition code, "
13565 "invalid operand code 'D'");
13571 switch (GET_CODE (x))
13575 fputs ("eq", file);
13579 fputs ("lt", file);
13583 fputs ("le", file);
13586 fputs ("unord", file);
13590 fputs ("neq", file);
13594 fputs ("nlt", file);
13598 fputs ("nle", file);
13601 fputs ("ord", file);
13604 output_operand_lossage ("operand is not a condition code, "
13605 "invalid operand code 'D'");
13611 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13612 if (ASSEMBLER_DIALECT == ASM_ATT)
13614 switch (GET_MODE (x))
13616 case HImode: putc ('w', file); break;
13618 case SFmode: putc ('l', file); break;
13620 case DFmode: putc ('q', file); break;
13621 default: gcc_unreachable ();
13628 if (!COMPARISON_P (x))
13630 output_operand_lossage ("operand is neither a constant nor a "
13631 "condition code, invalid operand code "
13635 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13638 if (!COMPARISON_P (x))
13640 output_operand_lossage ("operand is neither a constant nor a "
13641 "condition code, invalid operand code "
13645 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13646 if (ASSEMBLER_DIALECT == ASM_ATT)
13649 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13652 /* Like above, but reverse condition */
13654 /* Check to see if argument to %c is really a constant
13655 and not a condition code which needs to be reversed. */
13656 if (!COMPARISON_P (x))
13658 output_operand_lossage ("operand is neither a constant nor a "
13659 "condition code, invalid operand "
13663 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13666 if (!COMPARISON_P (x))
13668 output_operand_lossage ("operand is neither a constant nor a "
13669 "condition code, invalid operand "
13673 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13674 if (ASSEMBLER_DIALECT == ASM_ATT)
13677 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13681 /* It doesn't actually matter what mode we use here, as we're
13682 only going to use this for printing. */
13683 x = adjust_address_nv (x, DImode, 8);
13691 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13694 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13697 int pred_val = INTVAL (XEXP (x, 0));
13699 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13700 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13702 int taken = pred_val > REG_BR_PROB_BASE / 2;
13703 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13705 /* Emit hints only in the case default branch prediction
13706 heuristics would fail. */
13707 if (taken != cputaken)
13709 /* We use 3e (DS) prefix for taken branches and
13710 2e (CS) prefix for not taken branches. */
13712 fputs ("ds ; ", file);
13714 fputs ("cs ; ", file);
13722 switch (GET_CODE (x))
13725 fputs ("neq", file);
13728 fputs ("eq", file);
13732 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13736 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13740 fputs ("le", file);
13744 fputs ("lt", file);
13747 fputs ("unord", file);
13750 fputs ("ord", file);
13753 fputs ("ueq", file);
13756 fputs ("nlt", file);
13759 fputs ("nle", file);
13762 fputs ("ule", file);
13765 fputs ("ult", file);
13768 fputs ("une", file);
13771 output_operand_lossage ("operand is not a condition code, "
13772 "invalid operand code 'Y'");
13778 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13784 if (ASSEMBLER_DIALECT == ASM_ATT)
13787 /* The kernel uses a different segment register for performance
13788 reasons; a system call would not have to trash the userspace
13789 segment register, which would be expensive. */
13790 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13791 fputs ("fs", file);
13793 fputs ("gs", file);
13797 output_operand_lossage ("invalid operand code '%c'", code);
13802 print_reg (x, code, file);
13804 else if (MEM_P (x))
13806 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13807 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
13808 && GET_MODE (x) != BLKmode)
13811 switch (GET_MODE_SIZE (GET_MODE (x)))
13813 case 1: size = "BYTE"; break;
13814 case 2: size = "WORD"; break;
13815 case 4: size = "DWORD"; break;
13816 case 8: size = "QWORD"; break;
13817 case 12: size = "TBYTE"; break;
13819 if (GET_MODE (x) == XFmode)
13824 case 32: size = "YMMWORD"; break;
13826 gcc_unreachable ();
13829 /* Check for explicit size override (codes 'b', 'w' and 'k') */
13832 else if (code == 'w')
13834 else if (code == 'k')
13837 fputs (size, file);
13838 fputs (" PTR ", file);
13842 /* Avoid (%rip) for call operands. */
13843 if (CONSTANT_ADDRESS_P (x) && code == 'P'
13844 && !CONST_INT_P (x))
13845 output_addr_const (file, x);
13846 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
13847 output_operand_lossage ("invalid constraints for operand");
13849 output_address (x);
13852 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
13857 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13858 REAL_VALUE_TO_TARGET_SINGLE (r, l);
13860 if (ASSEMBLER_DIALECT == ASM_ATT)
13862 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13864 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
13866 fprintf (file, "0x%08x", (unsigned int) l);
13869 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
13874 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13875 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13877 if (ASSEMBLER_DIALECT == ASM_ATT)
13879 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
13882 /* These float cases don't actually occur as immediate operands. */
13883 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
13887 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13888 fputs (dstr, file);
13893 /* We have patterns that allow zero sets of memory, for instance.
13894 In 64-bit mode, we should probably support all 8-byte vectors,
13895 since we can in fact encode that into an immediate. */
13896 if (GET_CODE (x) == CONST_VECTOR)
13898 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
13902 if (code != 'P' && code != 'p')
13904 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
13906 if (ASSEMBLER_DIALECT == ASM_ATT)
13909 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13910 || GET_CODE (x) == LABEL_REF)
13912 if (ASSEMBLER_DIALECT == ASM_ATT)
13915 fputs ("OFFSET FLAT:", file);
13918 if (CONST_INT_P (x))
13919 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13920 else if (flag_pic || MACHOPIC_INDIRECT)
13921 output_pic_addr_const (file, x, code);
13923 output_addr_const (file, x);
13928 ix86_print_operand_punct_valid_p (unsigned char code)
13930 return (code == '@' || code == '*' || code == '+'
13931 || code == '&' || code == ';');
13934 /* Print a memory operand whose address is ADDR. */
13937 ix86_print_operand_address (FILE *file, rtx addr)
13939 struct ix86_address parts;
13940 rtx base, index, disp;
13942 int ok = ix86_decompose_address (addr, &parts);
13947 index = parts.index;
13949 scale = parts.scale;
13957 if (ASSEMBLER_DIALECT == ASM_ATT)
13959 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
13962 gcc_unreachable ();
13965 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13966 if (TARGET_64BIT && !base && !index)
13970 if (GET_CODE (disp) == CONST
13971 && GET_CODE (XEXP (disp, 0)) == PLUS
13972 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13973 symbol = XEXP (XEXP (disp, 0), 0);
13975 if (GET_CODE (symbol) == LABEL_REF
13976 || (GET_CODE (symbol) == SYMBOL_REF
13977 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13980 if (!base && !index)
13982 /* Displacement only requires special attention. */
13984 if (CONST_INT_P (disp))
13986 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
13987 fputs ("ds:", file);
13988 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13991 output_pic_addr_const (file, disp, 0);
13993 output_addr_const (file, disp);
13997 if (ASSEMBLER_DIALECT == ASM_ATT)
14002 output_pic_addr_const (file, disp, 0);
14003 else if (GET_CODE (disp) == LABEL_REF)
14004 output_asm_label (disp);
14006 output_addr_const (file, disp);
14011 print_reg (base, 0, file);
14015 print_reg (index, 0, file);
14017 fprintf (file, ",%d", scale);
14023 rtx offset = NULL_RTX;
14027 /* Pull out the offset of a symbol; print any symbol itself. */
14028 if (GET_CODE (disp) == CONST
14029 && GET_CODE (XEXP (disp, 0)) == PLUS
14030 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14032 offset = XEXP (XEXP (disp, 0), 1);
14033 disp = gen_rtx_CONST (VOIDmode,
14034 XEXP (XEXP (disp, 0), 0));
14038 output_pic_addr_const (file, disp, 0);
14039 else if (GET_CODE (disp) == LABEL_REF)
14040 output_asm_label (disp);
14041 else if (CONST_INT_P (disp))
14044 output_addr_const (file, disp);
14050 print_reg (base, 0, file);
14053 if (INTVAL (offset) >= 0)
14055 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14059 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14066 print_reg (index, 0, file);
14068 fprintf (file, "*%d", scale);
14075 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14078 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14082 if (GET_CODE (x) != UNSPEC)
14085 op = XVECEXP (x, 0, 0);
14086 switch (XINT (x, 1))
14088 case UNSPEC_GOTTPOFF:
14089 output_addr_const (file, op);
14090 /* FIXME: This might be @TPOFF in Sun ld. */
14091 fputs ("@gottpoff", file);
14094 output_addr_const (file, op);
14095 fputs ("@tpoff", file);
14097 case UNSPEC_NTPOFF:
14098 output_addr_const (file, op);
14100 fputs ("@tpoff", file);
14102 fputs ("@ntpoff", file);
14104 case UNSPEC_DTPOFF:
14105 output_addr_const (file, op);
14106 fputs ("@dtpoff", file);
14108 case UNSPEC_GOTNTPOFF:
14109 output_addr_const (file, op);
14111 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14112 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14114 fputs ("@gotntpoff", file);
14116 case UNSPEC_INDNTPOFF:
14117 output_addr_const (file, op);
14118 fputs ("@indntpoff", file);
14121 case UNSPEC_MACHOPIC_OFFSET:
14122 output_addr_const (file, op);
14124 machopic_output_function_base_name (file);
14128 case UNSPEC_STACK_CHECK:
14132 gcc_assert (flag_split_stack);
14134 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14135 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14137 gcc_unreachable ();
14140 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14151 /* Split one or more double-mode RTL references into pairs of half-mode
14152 references. The RTL can be REG, offsettable MEM, integer constant, or
14153 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14154 split and "num" is its length. lo_half and hi_half are output arrays
14155 that parallel "operands". */
14158 split_double_mode (enum machine_mode mode, rtx operands[],
14159 int num, rtx lo_half[], rtx hi_half[])
14161 enum machine_mode half_mode;
14167 half_mode = DImode;
14170 half_mode = SImode;
14173 gcc_unreachable ();
14176 byte = GET_MODE_SIZE (half_mode);
14180 rtx op = operands[num];
14182 /* simplify_subreg refuse to split volatile memory addresses,
14183 but we still have to handle it. */
14186 lo_half[num] = adjust_address (op, half_mode, 0);
14187 hi_half[num] = adjust_address (op, half_mode, byte);
14191 lo_half[num] = simplify_gen_subreg (half_mode, op,
14192 GET_MODE (op) == VOIDmode
14193 ? mode : GET_MODE (op), 0);
14194 hi_half[num] = simplify_gen_subreg (half_mode, op,
14195 GET_MODE (op) == VOIDmode
14196 ? mode : GET_MODE (op), byte);
14201 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14202 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14203 is the expression of the binary operation. The output may either be
14204 emitted here, or returned to the caller, like all output_* functions.
14206 There is no guarantee that the operands are the same mode, as they
14207 might be within FLOAT or FLOAT_EXTEND expressions. */
14209 #ifndef SYSV386_COMPAT
14210 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14211 wants to fix the assemblers because that causes incompatibility
14212 with gcc. No-one wants to fix gcc because that causes
14213 incompatibility with assemblers... You can use the option of
14214 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14215 #define SYSV386_COMPAT 1
14219 output_387_binary_op (rtx insn, rtx *operands)
14221 static char buf[40];
14224 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14226 #ifdef ENABLE_CHECKING
14227 /* Even if we do not want to check the inputs, this documents input
14228 constraints. Which helps in understanding the following code. */
14229 if (STACK_REG_P (operands[0])
14230 && ((REG_P (operands[1])
14231 && REGNO (operands[0]) == REGNO (operands[1])
14232 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14233 || (REG_P (operands[2])
14234 && REGNO (operands[0]) == REGNO (operands[2])
14235 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14236 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14239 gcc_assert (is_sse);
14242 switch (GET_CODE (operands[3]))
14245 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14246 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14254 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14255 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14263 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14264 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14272 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14273 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14281 gcc_unreachable ();
14288 strcpy (buf, ssep);
14289 if (GET_MODE (operands[0]) == SFmode)
14290 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14292 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14296 strcpy (buf, ssep + 1);
14297 if (GET_MODE (operands[0]) == SFmode)
14298 strcat (buf, "ss\t{%2, %0|%0, %2}");
14300 strcat (buf, "sd\t{%2, %0|%0, %2}");
14306 switch (GET_CODE (operands[3]))
14310 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14312 rtx temp = operands[2];
14313 operands[2] = operands[1];
14314 operands[1] = temp;
14317 /* know operands[0] == operands[1]. */
14319 if (MEM_P (operands[2]))
14325 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14327 if (STACK_TOP_P (operands[0]))
14328 /* How is it that we are storing to a dead operand[2]?
14329 Well, presumably operands[1] is dead too. We can't
14330 store the result to st(0) as st(0) gets popped on this
14331 instruction. Instead store to operands[2] (which I
14332 think has to be st(1)). st(1) will be popped later.
14333 gcc <= 2.8.1 didn't have this check and generated
14334 assembly code that the Unixware assembler rejected. */
14335 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14337 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14341 if (STACK_TOP_P (operands[0]))
14342 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14344 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14349 if (MEM_P (operands[1]))
14355 if (MEM_P (operands[2]))
14361 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14364 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14365 derived assemblers, confusingly reverse the direction of
14366 the operation for fsub{r} and fdiv{r} when the
14367 destination register is not st(0). The Intel assembler
14368 doesn't have this brain damage. Read !SYSV386_COMPAT to
14369 figure out what the hardware really does. */
14370 if (STACK_TOP_P (operands[0]))
14371 p = "{p\t%0, %2|rp\t%2, %0}";
14373 p = "{rp\t%2, %0|p\t%0, %2}";
14375 if (STACK_TOP_P (operands[0]))
14376 /* As above for fmul/fadd, we can't store to st(0). */
14377 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14379 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14384 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14387 if (STACK_TOP_P (operands[0]))
14388 p = "{rp\t%0, %1|p\t%1, %0}";
14390 p = "{p\t%1, %0|rp\t%0, %1}";
14392 if (STACK_TOP_P (operands[0]))
14393 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14395 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14400 if (STACK_TOP_P (operands[0]))
14402 if (STACK_TOP_P (operands[1]))
14403 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14405 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14408 else if (STACK_TOP_P (operands[1]))
14411 p = "{\t%1, %0|r\t%0, %1}";
14413 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14419 p = "{r\t%2, %0|\t%0, %2}";
14421 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14427 gcc_unreachable ();
14434 /* Return needed mode for entity in optimize_mode_switching pass. */
14437 ix86_mode_needed (int entity, rtx insn)
14439 enum attr_i387_cw mode;
14441 /* The mode UNINITIALIZED is used to store control word after a
14442 function call or ASM pattern. The mode ANY specify that function
14443 has no requirements on the control word and make no changes in the
14444 bits we are interested in. */
14447 || (NONJUMP_INSN_P (insn)
14448 && (asm_noperands (PATTERN (insn)) >= 0
14449 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14450 return I387_CW_UNINITIALIZED;
14452 if (recog_memoized (insn) < 0)
14453 return I387_CW_ANY;
14455 mode = get_attr_i387_cw (insn);
14460 if (mode == I387_CW_TRUNC)
14465 if (mode == I387_CW_FLOOR)
14470 if (mode == I387_CW_CEIL)
14475 if (mode == I387_CW_MASK_PM)
14480 gcc_unreachable ();
14483 return I387_CW_ANY;
14486 /* Output code to initialize control word copies used by trunc?f?i and
14487 rounding patterns. CURRENT_MODE is set to current control word,
14488 while NEW_MODE is set to new control word. */
14491 emit_i387_cw_initialization (int mode)
14493 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14496 enum ix86_stack_slot slot;
14498 rtx reg = gen_reg_rtx (HImode);
14500 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14501 emit_move_insn (reg, copy_rtx (stored_mode));
14503 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14504 || optimize_function_for_size_p (cfun))
14508 case I387_CW_TRUNC:
14509 /* round toward zero (truncate) */
14510 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14511 slot = SLOT_CW_TRUNC;
14514 case I387_CW_FLOOR:
14515 /* round down toward -oo */
14516 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14517 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14518 slot = SLOT_CW_FLOOR;
14522 /* round up toward +oo */
14523 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14524 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14525 slot = SLOT_CW_CEIL;
14528 case I387_CW_MASK_PM:
14529 /* mask precision exception for nearbyint() */
14530 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14531 slot = SLOT_CW_MASK_PM;
14535 gcc_unreachable ();
14542 case I387_CW_TRUNC:
14543 /* round toward zero (truncate) */
14544 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14545 slot = SLOT_CW_TRUNC;
14548 case I387_CW_FLOOR:
14549 /* round down toward -oo */
14550 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14551 slot = SLOT_CW_FLOOR;
14555 /* round up toward +oo */
14556 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14557 slot = SLOT_CW_CEIL;
14560 case I387_CW_MASK_PM:
14561 /* mask precision exception for nearbyint() */
14562 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14563 slot = SLOT_CW_MASK_PM;
14567 gcc_unreachable ();
14571 gcc_assert (slot < MAX_386_STACK_LOCALS);
14573 new_mode = assign_386_stack_local (HImode, slot);
14574 emit_move_insn (new_mode, reg);
14577 /* Output code for INSN to convert a float to a signed int. OPERANDS
14578 are the insn operands. The output may be [HSD]Imode and the input
14579 operand may be [SDX]Fmode. */
14582 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
14584 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14585 int dimode_p = GET_MODE (operands[0]) == DImode;
14586 int round_mode = get_attr_i387_cw (insn);
14588 /* Jump through a hoop or two for DImode, since the hardware has no
14589 non-popping instruction. We used to do this a different way, but
14590 that was somewhat fragile and broke with post-reload splitters. */
14591 if ((dimode_p || fisttp) && !stack_top_dies)
14592 output_asm_insn ("fld\t%y1", operands);
14594 gcc_assert (STACK_TOP_P (operands[1]));
14595 gcc_assert (MEM_P (operands[0]));
14596 gcc_assert (GET_MODE (operands[1]) != TFmode);
14599 output_asm_insn ("fisttp%Z0\t%0", operands);
14602 if (round_mode != I387_CW_ANY)
14603 output_asm_insn ("fldcw\t%3", operands);
14604 if (stack_top_dies || dimode_p)
14605 output_asm_insn ("fistp%Z0\t%0", operands);
14607 output_asm_insn ("fist%Z0\t%0", operands);
14608 if (round_mode != I387_CW_ANY)
14609 output_asm_insn ("fldcw\t%2", operands);
14615 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14616 have the values zero or one, indicates the ffreep insn's operand
14617 from the OPERANDS array. */
14619 static const char *
14620 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14622 if (TARGET_USE_FFREEP)
14623 #ifdef HAVE_AS_IX86_FFREEP
14624 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14627 static char retval[32];
14628 int regno = REGNO (operands[opno]);
14630 gcc_assert (FP_REGNO_P (regno));
14632 regno -= FIRST_STACK_REG;
14634 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14639 return opno ? "fstp\t%y1" : "fstp\t%y0";
14643 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14644 should be used. UNORDERED_P is true when fucom should be used. */
14647 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
14649 int stack_top_dies;
14650 rtx cmp_op0, cmp_op1;
14651 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14655 cmp_op0 = operands[0];
14656 cmp_op1 = operands[1];
14660 cmp_op0 = operands[1];
14661 cmp_op1 = operands[2];
14666 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14667 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14668 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14669 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14671 if (GET_MODE (operands[0]) == SFmode)
14673 return &ucomiss[TARGET_AVX ? 0 : 1];
14675 return &comiss[TARGET_AVX ? 0 : 1];
14678 return &ucomisd[TARGET_AVX ? 0 : 1];
14680 return &comisd[TARGET_AVX ? 0 : 1];
14683 gcc_assert (STACK_TOP_P (cmp_op0));
14685 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14687 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14689 if (stack_top_dies)
14691 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14692 return output_387_ffreep (operands, 1);
14695 return "ftst\n\tfnstsw\t%0";
14698 if (STACK_REG_P (cmp_op1)
14700 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14701 && REGNO (cmp_op1) != FIRST_STACK_REG)
14703 /* If both the top of the 387 stack dies, and the other operand
14704 is also a stack register that dies, then this must be a
14705 `fcompp' float compare */
14709 /* There is no double popping fcomi variant. Fortunately,
14710 eflags is immune from the fstp's cc clobbering. */
14712 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14714 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14715 return output_387_ffreep (operands, 0);
14720 return "fucompp\n\tfnstsw\t%0";
14722 return "fcompp\n\tfnstsw\t%0";
14727 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14729 static const char * const alt[16] =
14731 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14732 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14733 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14734 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14736 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14737 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14741 "fcomi\t{%y1, %0|%0, %y1}",
14742 "fcomip\t{%y1, %0|%0, %y1}",
14743 "fucomi\t{%y1, %0|%0, %y1}",
14744 "fucomip\t{%y1, %0|%0, %y1}",
14755 mask = eflags_p << 3;
14756 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14757 mask |= unordered_p << 1;
14758 mask |= stack_top_dies;
14760 gcc_assert (mask < 16);
14769 ix86_output_addr_vec_elt (FILE *file, int value)
14771 const char *directive = ASM_LONG;
14775 directive = ASM_QUAD;
14777 gcc_assert (!TARGET_64BIT);
14780 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14784 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14786 const char *directive = ASM_LONG;
14789 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14790 directive = ASM_QUAD;
14792 gcc_assert (!TARGET_64BIT);
14794 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14795 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14796 fprintf (file, "%s%s%d-%s%d\n",
14797 directive, LPREFIX, value, LPREFIX, rel);
14798 else if (HAVE_AS_GOTOFF_IN_DATA)
14799 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14801 else if (TARGET_MACHO)
14803 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14804 machopic_output_function_base_name (file);
14809 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14810 GOT_SYMBOL_NAME, LPREFIX, value);
14813 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
14817 ix86_expand_clear (rtx dest)
14821 /* We play register width games, which are only valid after reload. */
14822 gcc_assert (reload_completed);
14824 /* Avoid HImode and its attendant prefix byte. */
14825 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
14826 dest = gen_rtx_REG (SImode, REGNO (dest));
14827 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
14829 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
14830 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
14832 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14833 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
14839 /* X is an unchanging MEM. If it is a constant pool reference, return
14840 the constant pool rtx, else NULL. */
14843 maybe_get_pool_constant (rtx x)
14845 x = ix86_delegitimize_address (XEXP (x, 0));
14847 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
14848 return get_pool_constant (x);
14854 ix86_expand_move (enum machine_mode mode, rtx operands[])
14857 enum tls_model model;
14862 if (GET_CODE (op1) == SYMBOL_REF)
14864 model = SYMBOL_REF_TLS_MODEL (op1);
14867 op1 = legitimize_tls_address (op1, model, true);
14868 op1 = force_operand (op1, op0);
14872 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14873 && SYMBOL_REF_DLLIMPORT_P (op1))
14874 op1 = legitimize_dllimport_symbol (op1, false);
14876 else if (GET_CODE (op1) == CONST
14877 && GET_CODE (XEXP (op1, 0)) == PLUS
14878 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
14880 rtx addend = XEXP (XEXP (op1, 0), 1);
14881 rtx symbol = XEXP (XEXP (op1, 0), 0);
14884 model = SYMBOL_REF_TLS_MODEL (symbol);
14886 tmp = legitimize_tls_address (symbol, model, true);
14887 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14888 && SYMBOL_REF_DLLIMPORT_P (symbol))
14889 tmp = legitimize_dllimport_symbol (symbol, true);
14893 tmp = force_operand (tmp, NULL);
14894 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
14895 op0, 1, OPTAB_DIRECT);
14901 if ((flag_pic || MACHOPIC_INDIRECT)
14902 && mode == Pmode && symbolic_operand (op1, Pmode))
14904 if (TARGET_MACHO && !TARGET_64BIT)
14907 /* dynamic-no-pic */
14908 if (MACHOPIC_INDIRECT)
14910 rtx temp = ((reload_in_progress
14911 || ((op0 && REG_P (op0))
14913 ? op0 : gen_reg_rtx (Pmode));
14914 op1 = machopic_indirect_data_reference (op1, temp);
14916 op1 = machopic_legitimize_pic_address (op1, mode,
14917 temp == op1 ? 0 : temp);
14919 if (op0 != op1 && GET_CODE (op0) != MEM)
14921 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
14925 if (GET_CODE (op0) == MEM)
14926 op1 = force_reg (Pmode, op1);
14930 if (GET_CODE (temp) != REG)
14931 temp = gen_reg_rtx (Pmode);
14932 temp = legitimize_pic_address (op1, temp);
14937 /* dynamic-no-pic */
14943 op1 = force_reg (Pmode, op1);
14944 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
14946 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
14947 op1 = legitimize_pic_address (op1, reg);
14956 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
14957 || !push_operand (op0, mode))
14959 op1 = force_reg (mode, op1);
14961 if (push_operand (op0, mode)
14962 && ! general_no_elim_operand (op1, mode))
14963 op1 = copy_to_mode_reg (mode, op1);
14965 /* Force large constants in 64bit compilation into register
14966 to get them CSEed. */
14967 if (can_create_pseudo_p ()
14968 && (mode == DImode) && TARGET_64BIT
14969 && immediate_operand (op1, mode)
14970 && !x86_64_zext_immediate_operand (op1, VOIDmode)
14971 && !register_operand (op0, mode)
14973 op1 = copy_to_mode_reg (mode, op1);
14975 if (can_create_pseudo_p ()
14976 && FLOAT_MODE_P (mode)
14977 && GET_CODE (op1) == CONST_DOUBLE)
14979 /* If we are loading a floating point constant to a register,
14980 force the value to memory now, since we'll get better code
14981 out the back end. */
14983 op1 = validize_mem (force_const_mem (mode, op1));
14984 if (!register_operand (op0, mode))
14986 rtx temp = gen_reg_rtx (mode);
14987 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
14988 emit_move_insn (op0, temp);
14994 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14998 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15000 rtx op0 = operands[0], op1 = operands[1];
15001 unsigned int align = GET_MODE_ALIGNMENT (mode);
15003 /* Force constants other than zero into memory. We do not know how
15004 the instructions used to build constants modify the upper 64 bits
15005 of the register, once we have that information we may be able
15006 to handle some of them more efficiently. */
15007 if (can_create_pseudo_p ()
15008 && register_operand (op0, mode)
15009 && (CONSTANT_P (op1)
15010 || (GET_CODE (op1) == SUBREG
15011 && CONSTANT_P (SUBREG_REG (op1))))
15012 && !standard_sse_constant_p (op1))
15013 op1 = validize_mem (force_const_mem (mode, op1));
15015 /* We need to check memory alignment for SSE mode since attribute
15016 can make operands unaligned. */
15017 if (can_create_pseudo_p ()
15018 && SSE_REG_MODE_P (mode)
15019 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15020 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15024 /* ix86_expand_vector_move_misalign() does not like constants ... */
15025 if (CONSTANT_P (op1)
15026 || (GET_CODE (op1) == SUBREG
15027 && CONSTANT_P (SUBREG_REG (op1))))
15028 op1 = validize_mem (force_const_mem (mode, op1));
15030 /* ... nor both arguments in memory. */
15031 if (!register_operand (op0, mode)
15032 && !register_operand (op1, mode))
15033 op1 = force_reg (mode, op1);
15035 tmp[0] = op0; tmp[1] = op1;
15036 ix86_expand_vector_move_misalign (mode, tmp);
15040 /* Make operand1 a register if it isn't already. */
15041 if (can_create_pseudo_p ()
15042 && !register_operand (op0, mode)
15043 && !register_operand (op1, mode))
15045 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15049 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15052 /* Split 32-byte AVX unaligned load and store if needed. */
15055 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15058 rtx (*extract) (rtx, rtx, rtx);
15059 rtx (*move_unaligned) (rtx, rtx);
15060 enum machine_mode mode;
15062 switch (GET_MODE (op0))
15065 gcc_unreachable ();
15067 extract = gen_avx_vextractf128v32qi;
15068 move_unaligned = gen_avx_movdqu256;
15072 extract = gen_avx_vextractf128v8sf;
15073 move_unaligned = gen_avx_movups256;
15077 extract = gen_avx_vextractf128v4df;
15078 move_unaligned = gen_avx_movupd256;
15083 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15085 rtx r = gen_reg_rtx (mode);
15086 m = adjust_address (op1, mode, 0);
15087 emit_move_insn (r, m);
15088 m = adjust_address (op1, mode, 16);
15089 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15090 emit_move_insn (op0, r);
15092 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15094 m = adjust_address (op0, mode, 0);
15095 emit_insn (extract (m, op1, const0_rtx));
15096 m = adjust_address (op0, mode, 16);
15097 emit_insn (extract (m, op1, const1_rtx));
15100 emit_insn (move_unaligned (op0, op1));
15103 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15104 straight to ix86_expand_vector_move. */
15105 /* Code generation for scalar reg-reg moves of single and double precision data:
15106 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15110 if (x86_sse_partial_reg_dependency == true)
15115 Code generation for scalar loads of double precision data:
15116 if (x86_sse_split_regs == true)
15117 movlpd mem, reg (gas syntax)
15121 Code generation for unaligned packed loads of single precision data
15122 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15123 if (x86_sse_unaligned_move_optimal)
15126 if (x86_sse_partial_reg_dependency == true)
15138 Code generation for unaligned packed loads of double precision data
15139 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15140 if (x86_sse_unaligned_move_optimal)
15143 if (x86_sse_split_regs == true)
15156 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15165 switch (GET_MODE_CLASS (mode))
15167 case MODE_VECTOR_INT:
15169 switch (GET_MODE_SIZE (mode))
15172 /* If we're optimizing for size, movups is the smallest. */
15173 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15175 op0 = gen_lowpart (V4SFmode, op0);
15176 op1 = gen_lowpart (V4SFmode, op1);
15177 emit_insn (gen_sse_movups (op0, op1));
15180 op0 = gen_lowpart (V16QImode, op0);
15181 op1 = gen_lowpart (V16QImode, op1);
15182 emit_insn (gen_sse2_movdqu (op0, op1));
15185 op0 = gen_lowpart (V32QImode, op0);
15186 op1 = gen_lowpart (V32QImode, op1);
15187 ix86_avx256_split_vector_move_misalign (op0, op1);
15190 gcc_unreachable ();
15193 case MODE_VECTOR_FLOAT:
15194 op0 = gen_lowpart (mode, op0);
15195 op1 = gen_lowpart (mode, op1);
15200 emit_insn (gen_sse_movups (op0, op1));
15203 ix86_avx256_split_vector_move_misalign (op0, op1);
15206 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15208 op0 = gen_lowpart (V4SFmode, op0);
15209 op1 = gen_lowpart (V4SFmode, op1);
15210 emit_insn (gen_sse_movups (op0, op1));
15213 emit_insn (gen_sse2_movupd (op0, op1));
15216 ix86_avx256_split_vector_move_misalign (op0, op1);
15219 gcc_unreachable ();
15224 gcc_unreachable ();
15232 /* If we're optimizing for size, movups is the smallest. */
15233 if (optimize_insn_for_size_p ()
15234 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15236 op0 = gen_lowpart (V4SFmode, op0);
15237 op1 = gen_lowpart (V4SFmode, op1);
15238 emit_insn (gen_sse_movups (op0, op1));
15242 /* ??? If we have typed data, then it would appear that using
15243 movdqu is the only way to get unaligned data loaded with
15245 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15247 op0 = gen_lowpart (V16QImode, op0);
15248 op1 = gen_lowpart (V16QImode, op1);
15249 emit_insn (gen_sse2_movdqu (op0, op1));
15253 if (TARGET_SSE2 && mode == V2DFmode)
15257 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15259 op0 = gen_lowpart (V2DFmode, op0);
15260 op1 = gen_lowpart (V2DFmode, op1);
15261 emit_insn (gen_sse2_movupd (op0, op1));
15265 /* When SSE registers are split into halves, we can avoid
15266 writing to the top half twice. */
15267 if (TARGET_SSE_SPLIT_REGS)
15269 emit_clobber (op0);
15274 /* ??? Not sure about the best option for the Intel chips.
15275 The following would seem to satisfy; the register is
15276 entirely cleared, breaking the dependency chain. We
15277 then store to the upper half, with a dependency depth
15278 of one. A rumor has it that Intel recommends two movsd
15279 followed by an unpacklpd, but this is unconfirmed. And
15280 given that the dependency depth of the unpacklpd would
15281 still be one, I'm not sure why this would be better. */
15282 zero = CONST0_RTX (V2DFmode);
15285 m = adjust_address (op1, DFmode, 0);
15286 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15287 m = adjust_address (op1, DFmode, 8);
15288 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15292 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15294 op0 = gen_lowpart (V4SFmode, op0);
15295 op1 = gen_lowpart (V4SFmode, op1);
15296 emit_insn (gen_sse_movups (op0, op1));
15300 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15301 emit_move_insn (op0, CONST0_RTX (mode));
15303 emit_clobber (op0);
15305 if (mode != V4SFmode)
15306 op0 = gen_lowpart (V4SFmode, op0);
15307 m = adjust_address (op1, V2SFmode, 0);
15308 emit_insn (gen_sse_loadlps (op0, op0, m));
15309 m = adjust_address (op1, V2SFmode, 8);
15310 emit_insn (gen_sse_loadhps (op0, op0, m));
15313 else if (MEM_P (op0))
15315 /* If we're optimizing for size, movups is the smallest. */
15316 if (optimize_insn_for_size_p ()
15317 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15319 op0 = gen_lowpart (V4SFmode, op0);
15320 op1 = gen_lowpart (V4SFmode, op1);
15321 emit_insn (gen_sse_movups (op0, op1));
15325 /* ??? Similar to above, only less clear because of quote
15326 typeless stores unquote. */
15327 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15328 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15330 op0 = gen_lowpart (V16QImode, op0);
15331 op1 = gen_lowpart (V16QImode, op1);
15332 emit_insn (gen_sse2_movdqu (op0, op1));
15336 if (TARGET_SSE2 && mode == V2DFmode)
15338 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15340 op0 = gen_lowpart (V2DFmode, op0);
15341 op1 = gen_lowpart (V2DFmode, op1);
15342 emit_insn (gen_sse2_movupd (op0, op1));
15346 m = adjust_address (op0, DFmode, 0);
15347 emit_insn (gen_sse2_storelpd (m, op1));
15348 m = adjust_address (op0, DFmode, 8);
15349 emit_insn (gen_sse2_storehpd (m, op1));
15354 if (mode != V4SFmode)
15355 op1 = gen_lowpart (V4SFmode, op1);
15357 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15359 op0 = gen_lowpart (V4SFmode, op0);
15360 emit_insn (gen_sse_movups (op0, op1));
15364 m = adjust_address (op0, V2SFmode, 0);
15365 emit_insn (gen_sse_storelps (m, op1));
15366 m = adjust_address (op0, V2SFmode, 8);
15367 emit_insn (gen_sse_storehps (m, op1));
15372 gcc_unreachable ();
15375 /* Expand a push in MODE. This is some mode for which we do not support
15376 proper push instructions, at least from the registers that we expect
15377 the value to live in. */
15380 ix86_expand_push (enum machine_mode mode, rtx x)
15384 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15385 GEN_INT (-GET_MODE_SIZE (mode)),
15386 stack_pointer_rtx, 1, OPTAB_DIRECT);
15387 if (tmp != stack_pointer_rtx)
15388 emit_move_insn (stack_pointer_rtx, tmp);
15390 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15392 /* When we push an operand onto stack, it has to be aligned at least
15393 at the function argument boundary. However since we don't have
15394 the argument type, we can't determine the actual argument
15396 emit_move_insn (tmp, x);
15399 /* Helper function of ix86_fixup_binary_operands to canonicalize
15400 operand order. Returns true if the operands should be swapped. */
15403 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15406 rtx dst = operands[0];
15407 rtx src1 = operands[1];
15408 rtx src2 = operands[2];
15410 /* If the operation is not commutative, we can't do anything. */
15411 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15414 /* Highest priority is that src1 should match dst. */
15415 if (rtx_equal_p (dst, src1))
15417 if (rtx_equal_p (dst, src2))
15420 /* Next highest priority is that immediate constants come second. */
15421 if (immediate_operand (src2, mode))
15423 if (immediate_operand (src1, mode))
15426 /* Lowest priority is that memory references should come second. */
15436 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15437 destination to use for the operation. If different from the true
15438 destination in operands[0], a copy operation will be required. */
15441 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15444 rtx dst = operands[0];
15445 rtx src1 = operands[1];
15446 rtx src2 = operands[2];
15448 /* Canonicalize operand order. */
15449 if (ix86_swap_binary_operands_p (code, mode, operands))
15453 /* It is invalid to swap operands of different modes. */
15454 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15461 /* Both source operands cannot be in memory. */
15462 if (MEM_P (src1) && MEM_P (src2))
15464 /* Optimization: Only read from memory once. */
15465 if (rtx_equal_p (src1, src2))
15467 src2 = force_reg (mode, src2);
15471 src2 = force_reg (mode, src2);
15474 /* If the destination is memory, and we do not have matching source
15475 operands, do things in registers. */
15476 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15477 dst = gen_reg_rtx (mode);
15479 /* Source 1 cannot be a constant. */
15480 if (CONSTANT_P (src1))
15481 src1 = force_reg (mode, src1);
15483 /* Source 1 cannot be a non-matching memory. */
15484 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15485 src1 = force_reg (mode, src1);
15487 operands[1] = src1;
15488 operands[2] = src2;
15492 /* Similarly, but assume that the destination has already been
15493 set up properly. */
15496 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15497 enum machine_mode mode, rtx operands[])
15499 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15500 gcc_assert (dst == operands[0]);
15503 /* Attempt to expand a binary operator. Make the expansion closer to the
15504 actual machine, then just general_operand, which will allow 3 separate
15505 memory references (one output, two input) in a single insn. */
15508 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15511 rtx src1, src2, dst, op, clob;
15513 dst = ix86_fixup_binary_operands (code, mode, operands);
15514 src1 = operands[1];
15515 src2 = operands[2];
15517 /* Emit the instruction. */
15519 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15520 if (reload_in_progress)
15522 /* Reload doesn't know about the flags register, and doesn't know that
15523 it doesn't want to clobber it. We can only do this with PLUS. */
15524 gcc_assert (code == PLUS);
15527 else if (reload_completed
15529 && !rtx_equal_p (dst, src1))
15531 /* This is going to be an LEA; avoid splitting it later. */
15536 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15537 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15540 /* Fix up the destination if needed. */
15541 if (dst != operands[0])
15542 emit_move_insn (operands[0], dst);
15545 /* Return TRUE or FALSE depending on whether the binary operator meets the
15546 appropriate constraints. */
15549 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15552 rtx dst = operands[0];
15553 rtx src1 = operands[1];
15554 rtx src2 = operands[2];
15556 /* Both source operands cannot be in memory. */
15557 if (MEM_P (src1) && MEM_P (src2))
15560 /* Canonicalize operand order for commutative operators. */
15561 if (ix86_swap_binary_operands_p (code, mode, operands))
15568 /* If the destination is memory, we must have a matching source operand. */
15569 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15572 /* Source 1 cannot be a constant. */
15573 if (CONSTANT_P (src1))
15576 /* Source 1 cannot be a non-matching memory. */
15577 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15579 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15580 return (code == AND
15583 || (TARGET_64BIT && mode == DImode))
15584 && CONST_INT_P (src2)
15585 && (INTVAL (src2) == 0xff
15586 || INTVAL (src2) == 0xffff));
15592 /* Attempt to expand a unary operator. Make the expansion closer to the
15593 actual machine, then just general_operand, which will allow 2 separate
15594 memory references (one output, one input) in a single insn. */
15597 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15600 int matching_memory;
15601 rtx src, dst, op, clob;
15606 /* If the destination is memory, and we do not have matching source
15607 operands, do things in registers. */
15608 matching_memory = 0;
15611 if (rtx_equal_p (dst, src))
15612 matching_memory = 1;
15614 dst = gen_reg_rtx (mode);
15617 /* When source operand is memory, destination must match. */
15618 if (MEM_P (src) && !matching_memory)
15619 src = force_reg (mode, src);
15621 /* Emit the instruction. */
15623 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15624 if (reload_in_progress || code == NOT)
15626 /* Reload doesn't know about the flags register, and doesn't know that
15627 it doesn't want to clobber it. */
15628 gcc_assert (code == NOT);
15633 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15634 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15637 /* Fix up the destination if needed. */
15638 if (dst != operands[0])
15639 emit_move_insn (operands[0], dst);
15642 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15643 divisor are within the range [0-255]. */
15646 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15649 rtx end_label, qimode_label;
15650 rtx insn, div, mod;
15651 rtx scratch, tmp0, tmp1, tmp2;
15652 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15653 rtx (*gen_zero_extend) (rtx, rtx);
15654 rtx (*gen_test_ccno_1) (rtx, rtx);
15659 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15660 gen_test_ccno_1 = gen_testsi_ccno_1;
15661 gen_zero_extend = gen_zero_extendqisi2;
15664 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15665 gen_test_ccno_1 = gen_testdi_ccno_1;
15666 gen_zero_extend = gen_zero_extendqidi2;
15669 gcc_unreachable ();
15672 end_label = gen_label_rtx ();
15673 qimode_label = gen_label_rtx ();
15675 scratch = gen_reg_rtx (mode);
15677 /* Use 8bit unsigned divimod if dividend and divisor are within
15678 the range [0-255]. */
15679 emit_move_insn (scratch, operands[2]);
15680 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15681 scratch, 1, OPTAB_DIRECT);
15682 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15683 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15684 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15685 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15686 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15688 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15689 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15690 JUMP_LABEL (insn) = qimode_label;
15692 /* Generate original signed/unsigned divimod. */
15693 div = gen_divmod4_1 (operands[0], operands[1],
15694 operands[2], operands[3]);
15697 /* Branch to the end. */
15698 emit_jump_insn (gen_jump (end_label));
15701 /* Generate 8bit unsigned divide. */
15702 emit_label (qimode_label);
15703 /* Don't use operands[0] for result of 8bit divide since not all
15704 registers support QImode ZERO_EXTRACT. */
15705 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15706 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15707 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15708 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15712 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15713 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15717 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15718 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15721 /* Extract remainder from AH. */
15722 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15723 if (REG_P (operands[1]))
15724 insn = emit_move_insn (operands[1], tmp1);
15727 /* Need a new scratch register since the old one has result
15729 scratch = gen_reg_rtx (mode);
15730 emit_move_insn (scratch, tmp1);
15731 insn = emit_move_insn (operands[1], scratch);
15733 set_unique_reg_note (insn, REG_EQUAL, mod);
15735 /* Zero extend quotient from AL. */
15736 tmp1 = gen_lowpart (QImode, tmp0);
15737 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15738 set_unique_reg_note (insn, REG_EQUAL, div);
15740 emit_label (end_label);
15743 #define LEA_SEARCH_THRESHOLD 12
15745 /* Search backward for non-agu definition of register number REGNO1
15746 or register number REGNO2 in INSN's basic block until
15747 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15748 2. Reach BB boundary, or
15749 3. Reach agu definition.
15750 Returns the distance between the non-agu definition point and INSN.
15751 If no definition point, returns -1. */
15754 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15757 basic_block bb = BLOCK_FOR_INSN (insn);
15760 enum attr_type insn_type;
15762 if (insn != BB_HEAD (bb))
15764 rtx prev = PREV_INSN (insn);
15765 while (prev && distance < LEA_SEARCH_THRESHOLD)
15767 if (NONDEBUG_INSN_P (prev))
15770 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15771 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15772 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15773 && (regno1 == DF_REF_REGNO (*def_rec)
15774 || regno2 == DF_REF_REGNO (*def_rec)))
15776 insn_type = get_attr_type (prev);
15777 if (insn_type != TYPE_LEA)
15781 if (prev == BB_HEAD (bb))
15783 prev = PREV_INSN (prev);
15787 if (distance < LEA_SEARCH_THRESHOLD)
15791 bool simple_loop = false;
15793 FOR_EACH_EDGE (e, ei, bb->preds)
15796 simple_loop = true;
15802 rtx prev = BB_END (bb);
15805 && distance < LEA_SEARCH_THRESHOLD)
15807 if (NONDEBUG_INSN_P (prev))
15810 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15811 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15812 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15813 && (regno1 == DF_REF_REGNO (*def_rec)
15814 || regno2 == DF_REF_REGNO (*def_rec)))
15816 insn_type = get_attr_type (prev);
15817 if (insn_type != TYPE_LEA)
15821 prev = PREV_INSN (prev);
15829 /* get_attr_type may modify recog data. We want to make sure
15830 that recog data is valid for instruction INSN, on which
15831 distance_non_agu_define is called. INSN is unchanged here. */
15832 extract_insn_cached (insn);
15836 /* Return the distance between INSN and the next insn that uses
15837 register number REGNO0 in memory address. Return -1 if no such
15838 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15841 distance_agu_use (unsigned int regno0, rtx insn)
15843 basic_block bb = BLOCK_FOR_INSN (insn);
15848 if (insn != BB_END (bb))
15850 rtx next = NEXT_INSN (insn);
15851 while (next && distance < LEA_SEARCH_THRESHOLD)
15853 if (NONDEBUG_INSN_P (next))
15857 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15858 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15859 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15860 && regno0 == DF_REF_REGNO (*use_rec))
15862 /* Return DISTANCE if OP0 is used in memory
15863 address in NEXT. */
15867 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15868 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15869 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15870 && regno0 == DF_REF_REGNO (*def_rec))
15872 /* Return -1 if OP0 is set in NEXT. */
15876 if (next == BB_END (bb))
15878 next = NEXT_INSN (next);
15882 if (distance < LEA_SEARCH_THRESHOLD)
15886 bool simple_loop = false;
15888 FOR_EACH_EDGE (e, ei, bb->succs)
15891 simple_loop = true;
15897 rtx next = BB_HEAD (bb);
15900 && distance < LEA_SEARCH_THRESHOLD)
15902 if (NONDEBUG_INSN_P (next))
15906 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15907 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15908 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15909 && regno0 == DF_REF_REGNO (*use_rec))
15911 /* Return DISTANCE if OP0 is used in memory
15912 address in NEXT. */
15916 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15917 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15918 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15919 && regno0 == DF_REF_REGNO (*def_rec))
15921 /* Return -1 if OP0 is set in NEXT. */
15926 next = NEXT_INSN (next);
15934 /* Define this macro to tune LEA priority vs ADD, it take effect when
15935 there is a dilemma of choicing LEA or ADD
15936 Negative value: ADD is more preferred than LEA
15938 Positive value: LEA is more preferred than ADD*/
15939 #define IX86_LEA_PRIORITY 2
15941 /* Return true if it is ok to optimize an ADD operation to LEA
15942 operation to avoid flag register consumation. For most processors,
15943 ADD is faster than LEA. For the processors like ATOM, if the
15944 destination register of LEA holds an actual address which will be
15945 used soon, LEA is better and otherwise ADD is better. */
15948 ix86_lea_for_add_ok (rtx insn, rtx operands[])
15950 unsigned int regno0 = true_regnum (operands[0]);
15951 unsigned int regno1 = true_regnum (operands[1]);
15952 unsigned int regno2 = true_regnum (operands[2]);
15954 /* If a = b + c, (a!=b && a!=c), must use lea form. */
15955 if (regno0 != regno1 && regno0 != regno2)
15958 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15962 int dist_define, dist_use;
15964 /* Return false if REGNO0 isn't used in memory address. */
15965 dist_use = distance_agu_use (regno0, insn);
15969 dist_define = distance_non_agu_define (regno1, regno2, insn);
15970 if (dist_define <= 0)
15973 /* If this insn has both backward non-agu dependence and forward
15974 agu dependence, the one with short distance take effect. */
15975 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
15982 /* Return true if destination reg of SET_BODY is shift count of
15986 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
15992 /* Retrieve destination of SET_BODY. */
15993 switch (GET_CODE (set_body))
15996 set_dest = SET_DEST (set_body);
15997 if (!set_dest || !REG_P (set_dest))
16001 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16002 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16010 /* Retrieve shift count of USE_BODY. */
16011 switch (GET_CODE (use_body))
16014 shift_rtx = XEXP (use_body, 1);
16017 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16018 if (ix86_dep_by_shift_count_body (set_body,
16019 XVECEXP (use_body, 0, i)))
16027 && (GET_CODE (shift_rtx) == ASHIFT
16028 || GET_CODE (shift_rtx) == LSHIFTRT
16029 || GET_CODE (shift_rtx) == ASHIFTRT
16030 || GET_CODE (shift_rtx) == ROTATE
16031 || GET_CODE (shift_rtx) == ROTATERT))
16033 rtx shift_count = XEXP (shift_rtx, 1);
16035 /* Return true if shift count is dest of SET_BODY. */
16036 if (REG_P (shift_count)
16037 && true_regnum (set_dest) == true_regnum (shift_count))
16044 /* Return true if destination reg of SET_INSN is shift count of
16048 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16050 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16051 PATTERN (use_insn));
16054 /* Return TRUE or FALSE depending on whether the unary operator meets the
16055 appropriate constraints. */
16058 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16059 enum machine_mode mode ATTRIBUTE_UNUSED,
16060 rtx operands[2] ATTRIBUTE_UNUSED)
16062 /* If one of operands is memory, source and destination must match. */
16063 if ((MEM_P (operands[0])
16064 || MEM_P (operands[1]))
16065 && ! rtx_equal_p (operands[0], operands[1]))
16070 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16071 are ok, keeping in mind the possible movddup alternative. */
16074 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16076 if (MEM_P (operands[0]))
16077 return rtx_equal_p (operands[0], operands[1 + high]);
16078 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16079 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16083 /* Post-reload splitter for converting an SF or DFmode value in an
16084 SSE register into an unsigned SImode. */
16087 ix86_split_convert_uns_si_sse (rtx operands[])
16089 enum machine_mode vecmode;
16090 rtx value, large, zero_or_two31, input, two31, x;
16092 large = operands[1];
16093 zero_or_two31 = operands[2];
16094 input = operands[3];
16095 two31 = operands[4];
16096 vecmode = GET_MODE (large);
16097 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16099 /* Load up the value into the low element. We must ensure that the other
16100 elements are valid floats -- zero is the easiest such value. */
16103 if (vecmode == V4SFmode)
16104 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16106 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16110 input = gen_rtx_REG (vecmode, REGNO (input));
16111 emit_move_insn (value, CONST0_RTX (vecmode));
16112 if (vecmode == V4SFmode)
16113 emit_insn (gen_sse_movss (value, value, input));
16115 emit_insn (gen_sse2_movsd (value, value, input));
16118 emit_move_insn (large, two31);
16119 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16121 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16122 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16124 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16125 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16127 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16128 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16130 large = gen_rtx_REG (V4SImode, REGNO (large));
16131 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16133 x = gen_rtx_REG (V4SImode, REGNO (value));
16134 if (vecmode == V4SFmode)
16135 emit_insn (gen_sse2_cvttps2dq (x, value));
16137 emit_insn (gen_sse2_cvttpd2dq (x, value));
16140 emit_insn (gen_xorv4si3 (value, value, large));
16143 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16144 Expects the 64-bit DImode to be supplied in a pair of integral
16145 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16146 -mfpmath=sse, !optimize_size only. */
16149 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16151 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16152 rtx int_xmm, fp_xmm;
16153 rtx biases, exponents;
16156 int_xmm = gen_reg_rtx (V4SImode);
16157 if (TARGET_INTER_UNIT_MOVES)
16158 emit_insn (gen_movdi_to_sse (int_xmm, input));
16159 else if (TARGET_SSE_SPLIT_REGS)
16161 emit_clobber (int_xmm);
16162 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16166 x = gen_reg_rtx (V2DImode);
16167 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16168 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16171 x = gen_rtx_CONST_VECTOR (V4SImode,
16172 gen_rtvec (4, GEN_INT (0x43300000UL),
16173 GEN_INT (0x45300000UL),
16174 const0_rtx, const0_rtx));
16175 exponents = validize_mem (force_const_mem (V4SImode, x));
16177 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16178 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16180 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16181 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16182 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16183 (0x1.0p84 + double(fp_value_hi_xmm)).
16184 Note these exponents differ by 32. */
16186 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16188 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16189 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16190 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16191 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16192 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16193 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16194 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16195 biases = validize_mem (force_const_mem (V2DFmode, biases));
16196 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16198 /* Add the upper and lower DFmode values together. */
16200 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16203 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16204 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16205 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16208 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16211 /* Not used, but eases macroization of patterns. */
16213 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16214 rtx input ATTRIBUTE_UNUSED)
16216 gcc_unreachable ();
16219 /* Convert an unsigned SImode value into a DFmode. Only currently used
16220 for SSE, but applicable anywhere. */
16223 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16225 REAL_VALUE_TYPE TWO31r;
16228 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16229 NULL, 1, OPTAB_DIRECT);
16231 fp = gen_reg_rtx (DFmode);
16232 emit_insn (gen_floatsidf2 (fp, x));
16234 real_ldexp (&TWO31r, &dconst1, 31);
16235 x = const_double_from_real_value (TWO31r, DFmode);
16237 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16239 emit_move_insn (target, x);
16242 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16243 32-bit mode; otherwise we have a direct convert instruction. */
16246 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16248 REAL_VALUE_TYPE TWO32r;
16249 rtx fp_lo, fp_hi, x;
16251 fp_lo = gen_reg_rtx (DFmode);
16252 fp_hi = gen_reg_rtx (DFmode);
16254 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16256 real_ldexp (&TWO32r, &dconst1, 32);
16257 x = const_double_from_real_value (TWO32r, DFmode);
16258 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16260 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16262 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16265 emit_move_insn (target, x);
16268 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16269 For x86_32, -mfpmath=sse, !optimize_size only. */
16271 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16273 REAL_VALUE_TYPE ONE16r;
16274 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16276 real_ldexp (&ONE16r, &dconst1, 16);
16277 x = const_double_from_real_value (ONE16r, SFmode);
16278 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16279 NULL, 0, OPTAB_DIRECT);
16280 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16281 NULL, 0, OPTAB_DIRECT);
16282 fp_hi = gen_reg_rtx (SFmode);
16283 fp_lo = gen_reg_rtx (SFmode);
16284 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16285 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16286 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16288 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16290 if (!rtx_equal_p (target, fp_hi))
16291 emit_move_insn (target, fp_hi);
16294 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16295 then replicate the value for all elements of the vector
16299 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16306 v = gen_rtvec (4, value, value, value, value);
16307 return gen_rtx_CONST_VECTOR (V4SImode, v);
16311 v = gen_rtvec (2, value, value);
16312 return gen_rtx_CONST_VECTOR (V2DImode, v);
16316 v = gen_rtvec (8, value, value, value, value,
16317 value, value, value, value);
16319 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16320 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16321 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16322 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16323 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16327 v = gen_rtvec (4, value, value, value, value);
16329 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16330 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16331 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16335 v = gen_rtvec (4, value, value, value, value);
16337 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16338 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16339 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16343 v = gen_rtvec (2, value, value);
16345 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16346 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16349 gcc_unreachable ();
16353 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16354 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16355 for an SSE register. If VECT is true, then replicate the mask for
16356 all elements of the vector register. If INVERT is true, then create
16357 a mask excluding the sign bit. */
16360 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16362 enum machine_mode vec_mode, imode;
16363 HOST_WIDE_INT hi, lo;
16368 /* Find the sign bit, sign extended to 2*HWI. */
16375 mode = GET_MODE_INNER (mode);
16377 lo = 0x80000000, hi = lo < 0;
16384 mode = GET_MODE_INNER (mode);
16386 if (HOST_BITS_PER_WIDE_INT >= 64)
16387 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16389 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16394 vec_mode = VOIDmode;
16395 if (HOST_BITS_PER_WIDE_INT >= 64)
16398 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16405 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16409 lo = ~lo, hi = ~hi;
16415 mask = immed_double_const (lo, hi, imode);
16417 vec = gen_rtvec (2, v, mask);
16418 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
16419 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
16426 gcc_unreachable ();
16430 lo = ~lo, hi = ~hi;
16432 /* Force this value into the low part of a fp vector constant. */
16433 mask = immed_double_const (lo, hi, imode);
16434 mask = gen_lowpart (mode, mask);
16436 if (vec_mode == VOIDmode)
16437 return force_reg (mode, mask);
16439 v = ix86_build_const_vector (vec_mode, vect, mask);
16440 return force_reg (vec_mode, v);
16443 /* Generate code for floating point ABS or NEG. */
16446 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
16449 rtx mask, set, dst, src;
16450 bool use_sse = false;
16451 bool vector_mode = VECTOR_MODE_P (mode);
16452 enum machine_mode vmode = mode;
16456 else if (mode == TFmode)
16458 else if (TARGET_SSE_MATH)
16460 use_sse = SSE_FLOAT_MODE_P (mode);
16461 if (mode == SFmode)
16463 else if (mode == DFmode)
16467 /* NEG and ABS performed with SSE use bitwise mask operations.
16468 Create the appropriate mask now. */
16470 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
16477 set = gen_rtx_fmt_e (code, mode, src);
16478 set = gen_rtx_SET (VOIDmode, dst, set);
16485 use = gen_rtx_USE (VOIDmode, mask);
16487 par = gen_rtvec (2, set, use);
16490 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16491 par = gen_rtvec (3, set, use, clob);
16493 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16499 /* Expand a copysign operation. Special case operand 0 being a constant. */
16502 ix86_expand_copysign (rtx operands[])
16504 enum machine_mode mode, vmode;
16505 rtx dest, op0, op1, mask, nmask;
16507 dest = operands[0];
16511 mode = GET_MODE (dest);
16513 if (mode == SFmode)
16515 else if (mode == DFmode)
16520 if (GET_CODE (op0) == CONST_DOUBLE)
16522 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
16524 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
16525 op0 = simplify_unary_operation (ABS, mode, op0, mode);
16527 if (mode == SFmode || mode == DFmode)
16529 if (op0 == CONST0_RTX (mode))
16530 op0 = CONST0_RTX (vmode);
16533 rtx v = ix86_build_const_vector (vmode, false, op0);
16535 op0 = force_reg (vmode, v);
16538 else if (op0 != CONST0_RTX (mode))
16539 op0 = force_reg (mode, op0);
16541 mask = ix86_build_signbit_mask (vmode, 0, 0);
16543 if (mode == SFmode)
16544 copysign_insn = gen_copysignsf3_const;
16545 else if (mode == DFmode)
16546 copysign_insn = gen_copysigndf3_const;
16548 copysign_insn = gen_copysigntf3_const;
16550 emit_insn (copysign_insn (dest, op0, op1, mask));
16554 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
16556 nmask = ix86_build_signbit_mask (vmode, 0, 1);
16557 mask = ix86_build_signbit_mask (vmode, 0, 0);
16559 if (mode == SFmode)
16560 copysign_insn = gen_copysignsf3_var;
16561 else if (mode == DFmode)
16562 copysign_insn = gen_copysigndf3_var;
16564 copysign_insn = gen_copysigntf3_var;
16566 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
16570 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16571 be a constant, and so has already been expanded into a vector constant. */
16574 ix86_split_copysign_const (rtx operands[])
16576 enum machine_mode mode, vmode;
16577 rtx dest, op0, mask, x;
16579 dest = operands[0];
16581 mask = operands[3];
16583 mode = GET_MODE (dest);
16584 vmode = GET_MODE (mask);
16586 dest = simplify_gen_subreg (vmode, dest, mode, 0);
16587 x = gen_rtx_AND (vmode, dest, mask);
16588 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16590 if (op0 != CONST0_RTX (vmode))
16592 x = gen_rtx_IOR (vmode, dest, op0);
16593 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16597 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16598 so we have to do two masks. */
16601 ix86_split_copysign_var (rtx operands[])
16603 enum machine_mode mode, vmode;
16604 rtx dest, scratch, op0, op1, mask, nmask, x;
16606 dest = operands[0];
16607 scratch = operands[1];
16610 nmask = operands[4];
16611 mask = operands[5];
16613 mode = GET_MODE (dest);
16614 vmode = GET_MODE (mask);
16616 if (rtx_equal_p (op0, op1))
16618 /* Shouldn't happen often (it's useless, obviously), but when it does
16619 we'd generate incorrect code if we continue below. */
16620 emit_move_insn (dest, op0);
16624 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16626 gcc_assert (REGNO (op1) == REGNO (scratch));
16628 x = gen_rtx_AND (vmode, scratch, mask);
16629 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16632 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16633 x = gen_rtx_NOT (vmode, dest);
16634 x = gen_rtx_AND (vmode, x, op0);
16635 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16639 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16641 x = gen_rtx_AND (vmode, scratch, mask);
16643 else /* alternative 2,4 */
16645 gcc_assert (REGNO (mask) == REGNO (scratch));
16646 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16647 x = gen_rtx_AND (vmode, scratch, op1);
16649 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16651 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16653 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16654 x = gen_rtx_AND (vmode, dest, nmask);
16656 else /* alternative 3,4 */
16658 gcc_assert (REGNO (nmask) == REGNO (dest));
16660 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16661 x = gen_rtx_AND (vmode, dest, op0);
16663 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16666 x = gen_rtx_IOR (vmode, dest, scratch);
16667 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16670 /* Return TRUE or FALSE depending on whether the first SET in INSN
16671 has source and destination with matching CC modes, and that the
16672 CC mode is at least as constrained as REQ_MODE. */
16675 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16678 enum machine_mode set_mode;
16680 set = PATTERN (insn);
16681 if (GET_CODE (set) == PARALLEL)
16682 set = XVECEXP (set, 0, 0);
16683 gcc_assert (GET_CODE (set) == SET);
16684 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16686 set_mode = GET_MODE (SET_DEST (set));
16690 if (req_mode != CCNOmode
16691 && (req_mode != CCmode
16692 || XEXP (SET_SRC (set), 1) != const0_rtx))
16696 if (req_mode == CCGCmode)
16700 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16704 if (req_mode == CCZmode)
16714 if (set_mode != req_mode)
16719 gcc_unreachable ();
16722 return GET_MODE (SET_SRC (set)) == set_mode;
16725 /* Generate insn patterns to do an integer compare of OPERANDS. */
16728 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16730 enum machine_mode cmpmode;
16733 cmpmode = SELECT_CC_MODE (code, op0, op1);
16734 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16736 /* This is very simple, but making the interface the same as in the
16737 FP case makes the rest of the code easier. */
16738 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16739 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16741 /* Return the test that should be put into the flags user, i.e.
16742 the bcc, scc, or cmov instruction. */
16743 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16746 /* Figure out whether to use ordered or unordered fp comparisons.
16747 Return the appropriate mode to use. */
16750 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16752 /* ??? In order to make all comparisons reversible, we do all comparisons
16753 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16754 all forms trapping and nontrapping comparisons, we can make inequality
16755 comparisons trapping again, since it results in better code when using
16756 FCOM based compares. */
16757 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16761 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16763 enum machine_mode mode = GET_MODE (op0);
16765 if (SCALAR_FLOAT_MODE_P (mode))
16767 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16768 return ix86_fp_compare_mode (code);
16773 /* Only zero flag is needed. */
16774 case EQ: /* ZF=0 */
16775 case NE: /* ZF!=0 */
16777 /* Codes needing carry flag. */
16778 case GEU: /* CF=0 */
16779 case LTU: /* CF=1 */
16780 /* Detect overflow checks. They need just the carry flag. */
16781 if (GET_CODE (op0) == PLUS
16782 && rtx_equal_p (op1, XEXP (op0, 0)))
16786 case GTU: /* CF=0 & ZF=0 */
16787 case LEU: /* CF=1 | ZF=1 */
16788 /* Detect overflow checks. They need just the carry flag. */
16789 if (GET_CODE (op0) == MINUS
16790 && rtx_equal_p (op1, XEXP (op0, 0)))
16794 /* Codes possibly doable only with sign flag when
16795 comparing against zero. */
16796 case GE: /* SF=OF or SF=0 */
16797 case LT: /* SF<>OF or SF=1 */
16798 if (op1 == const0_rtx)
16801 /* For other cases Carry flag is not required. */
16803 /* Codes doable only with sign flag when comparing
16804 against zero, but we miss jump instruction for it
16805 so we need to use relational tests against overflow
16806 that thus needs to be zero. */
16807 case GT: /* ZF=0 & SF=OF */
16808 case LE: /* ZF=1 | SF<>OF */
16809 if (op1 == const0_rtx)
16813 /* strcmp pattern do (use flags) and combine may ask us for proper
16818 gcc_unreachable ();
16822 /* Return the fixed registers used for condition codes. */
16825 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16832 /* If two condition code modes are compatible, return a condition code
16833 mode which is compatible with both. Otherwise, return
16836 static enum machine_mode
16837 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
16842 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16845 if ((m1 == CCGCmode && m2 == CCGOCmode)
16846 || (m1 == CCGOCmode && m2 == CCGCmode))
16852 gcc_unreachable ();
16882 /* These are only compatible with themselves, which we already
16889 /* Return a comparison we can do and that it is equivalent to
16890 swap_condition (code) apart possibly from orderedness.
16891 But, never change orderedness if TARGET_IEEE_FP, returning
16892 UNKNOWN in that case if necessary. */
16894 static enum rtx_code
16895 ix86_fp_swap_condition (enum rtx_code code)
16899 case GT: /* GTU - CF=0 & ZF=0 */
16900 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
16901 case GE: /* GEU - CF=0 */
16902 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
16903 case UNLT: /* LTU - CF=1 */
16904 return TARGET_IEEE_FP ? UNKNOWN : GT;
16905 case UNLE: /* LEU - CF=1 | ZF=1 */
16906 return TARGET_IEEE_FP ? UNKNOWN : GE;
16908 return swap_condition (code);
16912 /* Return cost of comparison CODE using the best strategy for performance.
16913 All following functions do use number of instructions as a cost metrics.
16914 In future this should be tweaked to compute bytes for optimize_size and
16915 take into account performance of various instructions on various CPUs. */
16918 ix86_fp_comparison_cost (enum rtx_code code)
16922 /* The cost of code using bit-twiddling on %ah. */
16939 arith_cost = TARGET_IEEE_FP ? 5 : 4;
16943 arith_cost = TARGET_IEEE_FP ? 6 : 4;
16946 gcc_unreachable ();
16949 switch (ix86_fp_comparison_strategy (code))
16951 case IX86_FPCMP_COMI:
16952 return arith_cost > 4 ? 3 : 2;
16953 case IX86_FPCMP_SAHF:
16954 return arith_cost > 4 ? 4 : 3;
16960 /* Return strategy to use for floating-point. We assume that fcomi is always
16961 preferrable where available, since that is also true when looking at size
16962 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16964 enum ix86_fpcmp_strategy
16965 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
16967 /* Do fcomi/sahf based test when profitable. */
16970 return IX86_FPCMP_COMI;
16972 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
16973 return IX86_FPCMP_SAHF;
16975 return IX86_FPCMP_ARITH;
16978 /* Swap, force into registers, or otherwise massage the two operands
16979 to a fp comparison. The operands are updated in place; the new
16980 comparison code is returned. */
16982 static enum rtx_code
16983 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
16985 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
16986 rtx op0 = *pop0, op1 = *pop1;
16987 enum machine_mode op_mode = GET_MODE (op0);
16988 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
16990 /* All of the unordered compare instructions only work on registers.
16991 The same is true of the fcomi compare instructions. The XFmode
16992 compare instructions require registers except when comparing
16993 against zero or when converting operand 1 from fixed point to
16997 && (fpcmp_mode == CCFPUmode
16998 || (op_mode == XFmode
16999 && ! (standard_80387_constant_p (op0) == 1
17000 || standard_80387_constant_p (op1) == 1)
17001 && GET_CODE (op1) != FLOAT)
17002 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17004 op0 = force_reg (op_mode, op0);
17005 op1 = force_reg (op_mode, op1);
17009 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17010 things around if they appear profitable, otherwise force op0
17011 into a register. */
17013 if (standard_80387_constant_p (op0) == 0
17015 && ! (standard_80387_constant_p (op1) == 0
17018 enum rtx_code new_code = ix86_fp_swap_condition (code);
17019 if (new_code != UNKNOWN)
17022 tmp = op0, op0 = op1, op1 = tmp;
17028 op0 = force_reg (op_mode, op0);
17030 if (CONSTANT_P (op1))
17032 int tmp = standard_80387_constant_p (op1);
17034 op1 = validize_mem (force_const_mem (op_mode, op1));
17038 op1 = force_reg (op_mode, op1);
17041 op1 = force_reg (op_mode, op1);
17045 /* Try to rearrange the comparison to make it cheaper. */
17046 if (ix86_fp_comparison_cost (code)
17047 > ix86_fp_comparison_cost (swap_condition (code))
17048 && (REG_P (op1) || can_create_pseudo_p ()))
17051 tmp = op0, op0 = op1, op1 = tmp;
17052 code = swap_condition (code);
17054 op0 = force_reg (op_mode, op0);
17062 /* Convert comparison codes we use to represent FP comparison to integer
17063 code that will result in proper branch. Return UNKNOWN if no such code
17067 ix86_fp_compare_code_to_integer (enum rtx_code code)
17096 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17099 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17101 enum machine_mode fpcmp_mode, intcmp_mode;
17104 fpcmp_mode = ix86_fp_compare_mode (code);
17105 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17107 /* Do fcomi/sahf based test when profitable. */
17108 switch (ix86_fp_comparison_strategy (code))
17110 case IX86_FPCMP_COMI:
17111 intcmp_mode = fpcmp_mode;
17112 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17113 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17118 case IX86_FPCMP_SAHF:
17119 intcmp_mode = fpcmp_mode;
17120 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17121 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17125 scratch = gen_reg_rtx (HImode);
17126 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17127 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17130 case IX86_FPCMP_ARITH:
17131 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17132 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17133 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17135 scratch = gen_reg_rtx (HImode);
17136 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17138 /* In the unordered case, we have to check C2 for NaN's, which
17139 doesn't happen to work out to anything nice combination-wise.
17140 So do some bit twiddling on the value we've got in AH to come
17141 up with an appropriate set of condition codes. */
17143 intcmp_mode = CCNOmode;
17148 if (code == GT || !TARGET_IEEE_FP)
17150 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17155 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17156 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17157 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17158 intcmp_mode = CCmode;
17164 if (code == LT && TARGET_IEEE_FP)
17166 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17167 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17168 intcmp_mode = CCmode;
17173 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17179 if (code == GE || !TARGET_IEEE_FP)
17181 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17186 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17187 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17193 if (code == LE && TARGET_IEEE_FP)
17195 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17196 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17197 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17198 intcmp_mode = CCmode;
17203 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17209 if (code == EQ && TARGET_IEEE_FP)
17211 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17212 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17213 intcmp_mode = CCmode;
17218 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17224 if (code == NE && TARGET_IEEE_FP)
17226 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17227 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17233 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17239 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17243 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17248 gcc_unreachable ();
17256 /* Return the test that should be put into the flags user, i.e.
17257 the bcc, scc, or cmov instruction. */
17258 return gen_rtx_fmt_ee (code, VOIDmode,
17259 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17264 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17268 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17269 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17271 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17273 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17274 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17277 ret = ix86_expand_int_compare (code, op0, op1);
17283 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17285 enum machine_mode mode = GET_MODE (op0);
17297 tmp = ix86_expand_compare (code, op0, op1);
17298 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17299 gen_rtx_LABEL_REF (VOIDmode, label),
17301 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17308 /* Expand DImode branch into multiple compare+branch. */
17310 rtx lo[2], hi[2], label2;
17311 enum rtx_code code1, code2, code3;
17312 enum machine_mode submode;
17314 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17316 tmp = op0, op0 = op1, op1 = tmp;
17317 code = swap_condition (code);
17320 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17321 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17323 submode = mode == DImode ? SImode : DImode;
17325 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17326 avoid two branches. This costs one extra insn, so disable when
17327 optimizing for size. */
17329 if ((code == EQ || code == NE)
17330 && (!optimize_insn_for_size_p ()
17331 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17336 if (hi[1] != const0_rtx)
17337 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17338 NULL_RTX, 0, OPTAB_WIDEN);
17341 if (lo[1] != const0_rtx)
17342 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17343 NULL_RTX, 0, OPTAB_WIDEN);
17345 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17346 NULL_RTX, 0, OPTAB_WIDEN);
17348 ix86_expand_branch (code, tmp, const0_rtx, label);
17352 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17353 op1 is a constant and the low word is zero, then we can just
17354 examine the high word. Similarly for low word -1 and
17355 less-or-equal-than or greater-than. */
17357 if (CONST_INT_P (hi[1]))
17360 case LT: case LTU: case GE: case GEU:
17361 if (lo[1] == const0_rtx)
17363 ix86_expand_branch (code, hi[0], hi[1], label);
17367 case LE: case LEU: case GT: case GTU:
17368 if (lo[1] == constm1_rtx)
17370 ix86_expand_branch (code, hi[0], hi[1], label);
17378 /* Otherwise, we need two or three jumps. */
17380 label2 = gen_label_rtx ();
17383 code2 = swap_condition (code);
17384 code3 = unsigned_condition (code);
17388 case LT: case GT: case LTU: case GTU:
17391 case LE: code1 = LT; code2 = GT; break;
17392 case GE: code1 = GT; code2 = LT; break;
17393 case LEU: code1 = LTU; code2 = GTU; break;
17394 case GEU: code1 = GTU; code2 = LTU; break;
17396 case EQ: code1 = UNKNOWN; code2 = NE; break;
17397 case NE: code2 = UNKNOWN; break;
17400 gcc_unreachable ();
17405 * if (hi(a) < hi(b)) goto true;
17406 * if (hi(a) > hi(b)) goto false;
17407 * if (lo(a) < lo(b)) goto true;
17411 if (code1 != UNKNOWN)
17412 ix86_expand_branch (code1, hi[0], hi[1], label);
17413 if (code2 != UNKNOWN)
17414 ix86_expand_branch (code2, hi[0], hi[1], label2);
17416 ix86_expand_branch (code3, lo[0], lo[1], label);
17418 if (code2 != UNKNOWN)
17419 emit_label (label2);
17424 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
17429 /* Split branch based on floating point condition. */
17431 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
17432 rtx target1, rtx target2, rtx tmp, rtx pushed)
17437 if (target2 != pc_rtx)
17440 code = reverse_condition_maybe_unordered (code);
17445 condition = ix86_expand_fp_compare (code, op1, op2,
17448 /* Remove pushed operand from stack. */
17450 ix86_free_from_memory (GET_MODE (pushed));
17452 i = emit_jump_insn (gen_rtx_SET
17454 gen_rtx_IF_THEN_ELSE (VOIDmode,
17455 condition, target1, target2)));
17456 if (split_branch_probability >= 0)
17457 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
17461 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
17465 gcc_assert (GET_MODE (dest) == QImode);
17467 ret = ix86_expand_compare (code, op0, op1);
17468 PUT_MODE (ret, QImode);
17469 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
17472 /* Expand comparison setting or clearing carry flag. Return true when
17473 successful and set pop for the operation. */
17475 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
17477 enum machine_mode mode =
17478 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
17480 /* Do not handle double-mode compares that go through special path. */
17481 if (mode == (TARGET_64BIT ? TImode : DImode))
17484 if (SCALAR_FLOAT_MODE_P (mode))
17486 rtx compare_op, compare_seq;
17488 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17490 /* Shortcut: following common codes never translate
17491 into carry flag compares. */
17492 if (code == EQ || code == NE || code == UNEQ || code == LTGT
17493 || code == ORDERED || code == UNORDERED)
17496 /* These comparisons require zero flag; swap operands so they won't. */
17497 if ((code == GT || code == UNLE || code == LE || code == UNGT)
17498 && !TARGET_IEEE_FP)
17503 code = swap_condition (code);
17506 /* Try to expand the comparison and verify that we end up with
17507 carry flag based comparison. This fails to be true only when
17508 we decide to expand comparison using arithmetic that is not
17509 too common scenario. */
17511 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17512 compare_seq = get_insns ();
17515 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
17516 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
17517 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
17519 code = GET_CODE (compare_op);
17521 if (code != LTU && code != GEU)
17524 emit_insn (compare_seq);
17529 if (!INTEGRAL_MODE_P (mode))
17538 /* Convert a==0 into (unsigned)a<1. */
17541 if (op1 != const0_rtx)
17544 code = (code == EQ ? LTU : GEU);
17547 /* Convert a>b into b<a or a>=b-1. */
17550 if (CONST_INT_P (op1))
17552 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
17553 /* Bail out on overflow. We still can swap operands but that
17554 would force loading of the constant into register. */
17555 if (op1 == const0_rtx
17556 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
17558 code = (code == GTU ? GEU : LTU);
17565 code = (code == GTU ? LTU : GEU);
17569 /* Convert a>=0 into (unsigned)a<0x80000000. */
17572 if (mode == DImode || op1 != const0_rtx)
17574 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17575 code = (code == LT ? GEU : LTU);
17579 if (mode == DImode || op1 != constm1_rtx)
17581 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17582 code = (code == LE ? GEU : LTU);
17588 /* Swapping operands may cause constant to appear as first operand. */
17589 if (!nonimmediate_operand (op0, VOIDmode))
17591 if (!can_create_pseudo_p ())
17593 op0 = force_reg (mode, op0);
17595 *pop = ix86_expand_compare (code, op0, op1);
17596 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
17601 ix86_expand_int_movcc (rtx operands[])
17603 enum rtx_code code = GET_CODE (operands[1]), compare_code;
17604 rtx compare_seq, compare_op;
17605 enum machine_mode mode = GET_MODE (operands[0]);
17606 bool sign_bit_compare_p = false;
17607 rtx op0 = XEXP (operands[1], 0);
17608 rtx op1 = XEXP (operands[1], 1);
17611 compare_op = ix86_expand_compare (code, op0, op1);
17612 compare_seq = get_insns ();
17615 compare_code = GET_CODE (compare_op);
17617 if ((op1 == const0_rtx && (code == GE || code == LT))
17618 || (op1 == constm1_rtx && (code == GT || code == LE)))
17619 sign_bit_compare_p = true;
17621 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17622 HImode insns, we'd be swallowed in word prefix ops. */
17624 if ((mode != HImode || TARGET_FAST_PREFIX)
17625 && (mode != (TARGET_64BIT ? TImode : DImode))
17626 && CONST_INT_P (operands[2])
17627 && CONST_INT_P (operands[3]))
17629 rtx out = operands[0];
17630 HOST_WIDE_INT ct = INTVAL (operands[2]);
17631 HOST_WIDE_INT cf = INTVAL (operands[3]);
17632 HOST_WIDE_INT diff;
17635 /* Sign bit compares are better done using shifts than we do by using
17637 if (sign_bit_compare_p
17638 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17640 /* Detect overlap between destination and compare sources. */
17643 if (!sign_bit_compare_p)
17646 bool fpcmp = false;
17648 compare_code = GET_CODE (compare_op);
17650 flags = XEXP (compare_op, 0);
17652 if (GET_MODE (flags) == CCFPmode
17653 || GET_MODE (flags) == CCFPUmode)
17657 = ix86_fp_compare_code_to_integer (compare_code);
17660 /* To simplify rest of code, restrict to the GEU case. */
17661 if (compare_code == LTU)
17663 HOST_WIDE_INT tmp = ct;
17666 compare_code = reverse_condition (compare_code);
17667 code = reverse_condition (code);
17672 PUT_CODE (compare_op,
17673 reverse_condition_maybe_unordered
17674 (GET_CODE (compare_op)));
17676 PUT_CODE (compare_op,
17677 reverse_condition (GET_CODE (compare_op)));
17681 if (reg_overlap_mentioned_p (out, op0)
17682 || reg_overlap_mentioned_p (out, op1))
17683 tmp = gen_reg_rtx (mode);
17685 if (mode == DImode)
17686 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17688 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17689 flags, compare_op));
17693 if (code == GT || code == GE)
17694 code = reverse_condition (code);
17697 HOST_WIDE_INT tmp = ct;
17702 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17715 tmp = expand_simple_binop (mode, PLUS,
17717 copy_rtx (tmp), 1, OPTAB_DIRECT);
17728 tmp = expand_simple_binop (mode, IOR,
17730 copy_rtx (tmp), 1, OPTAB_DIRECT);
17732 else if (diff == -1 && ct)
17742 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17744 tmp = expand_simple_binop (mode, PLUS,
17745 copy_rtx (tmp), GEN_INT (cf),
17746 copy_rtx (tmp), 1, OPTAB_DIRECT);
17754 * andl cf - ct, dest
17764 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17767 tmp = expand_simple_binop (mode, AND,
17769 gen_int_mode (cf - ct, mode),
17770 copy_rtx (tmp), 1, OPTAB_DIRECT);
17772 tmp = expand_simple_binop (mode, PLUS,
17773 copy_rtx (tmp), GEN_INT (ct),
17774 copy_rtx (tmp), 1, OPTAB_DIRECT);
17777 if (!rtx_equal_p (tmp, out))
17778 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17785 enum machine_mode cmp_mode = GET_MODE (op0);
17788 tmp = ct, ct = cf, cf = tmp;
17791 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17793 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17795 /* We may be reversing unordered compare to normal compare, that
17796 is not valid in general (we may convert non-trapping condition
17797 to trapping one), however on i386 we currently emit all
17798 comparisons unordered. */
17799 compare_code = reverse_condition_maybe_unordered (compare_code);
17800 code = reverse_condition_maybe_unordered (code);
17804 compare_code = reverse_condition (compare_code);
17805 code = reverse_condition (code);
17809 compare_code = UNKNOWN;
17810 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
17811 && CONST_INT_P (op1))
17813 if (op1 == const0_rtx
17814 && (code == LT || code == GE))
17815 compare_code = code;
17816 else if (op1 == constm1_rtx)
17820 else if (code == GT)
17825 /* Optimize dest = (op0 < 0) ? -1 : cf. */
17826 if (compare_code != UNKNOWN
17827 && GET_MODE (op0) == GET_MODE (out)
17828 && (cf == -1 || ct == -1))
17830 /* If lea code below could be used, only optimize
17831 if it results in a 2 insn sequence. */
17833 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
17834 || diff == 3 || diff == 5 || diff == 9)
17835 || (compare_code == LT && ct == -1)
17836 || (compare_code == GE && cf == -1))
17839 * notl op1 (if necessary)
17847 code = reverse_condition (code);
17850 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17852 out = expand_simple_binop (mode, IOR,
17854 out, 1, OPTAB_DIRECT);
17855 if (out != operands[0])
17856 emit_move_insn (operands[0], out);
17863 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
17864 || diff == 3 || diff == 5 || diff == 9)
17865 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
17867 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
17873 * lea cf(dest*(ct-cf)),dest
17877 * This also catches the degenerate setcc-only case.
17883 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17886 /* On x86_64 the lea instruction operates on Pmode, so we need
17887 to get arithmetics done in proper mode to match. */
17889 tmp = copy_rtx (out);
17893 out1 = copy_rtx (out);
17894 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
17898 tmp = gen_rtx_PLUS (mode, tmp, out1);
17904 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
17907 if (!rtx_equal_p (tmp, out))
17910 out = force_operand (tmp, copy_rtx (out));
17912 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
17914 if (!rtx_equal_p (out, operands[0]))
17915 emit_move_insn (operands[0], copy_rtx (out));
17921 * General case: Jumpful:
17922 * xorl dest,dest cmpl op1, op2
17923 * cmpl op1, op2 movl ct, dest
17924 * setcc dest jcc 1f
17925 * decl dest movl cf, dest
17926 * andl (cf-ct),dest 1:
17929 * Size 20. Size 14.
17931 * This is reasonably steep, but branch mispredict costs are
17932 * high on modern cpus, so consider failing only if optimizing
17936 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17937 && BRANCH_COST (optimize_insn_for_speed_p (),
17942 enum machine_mode cmp_mode = GET_MODE (op0);
17947 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17949 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17951 /* We may be reversing unordered compare to normal compare,
17952 that is not valid in general (we may convert non-trapping
17953 condition to trapping one), however on i386 we currently
17954 emit all comparisons unordered. */
17955 code = reverse_condition_maybe_unordered (code);
17959 code = reverse_condition (code);
17960 if (compare_code != UNKNOWN)
17961 compare_code = reverse_condition (compare_code);
17965 if (compare_code != UNKNOWN)
17967 /* notl op1 (if needed)
17972 For x < 0 (resp. x <= -1) there will be no notl,
17973 so if possible swap the constants to get rid of the
17975 True/false will be -1/0 while code below (store flag
17976 followed by decrement) is 0/-1, so the constants need
17977 to be exchanged once more. */
17979 if (compare_code == GE || !cf)
17981 code = reverse_condition (code);
17986 HOST_WIDE_INT tmp = cf;
17991 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17995 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17997 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
17999 copy_rtx (out), 1, OPTAB_DIRECT);
18002 out = expand_simple_binop (mode, AND, copy_rtx (out),
18003 gen_int_mode (cf - ct, mode),
18004 copy_rtx (out), 1, OPTAB_DIRECT);
18006 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18007 copy_rtx (out), 1, OPTAB_DIRECT);
18008 if (!rtx_equal_p (out, operands[0]))
18009 emit_move_insn (operands[0], copy_rtx (out));
18015 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18017 /* Try a few things more with specific constants and a variable. */
18020 rtx var, orig_out, out, tmp;
18022 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18025 /* If one of the two operands is an interesting constant, load a
18026 constant with the above and mask it in with a logical operation. */
18028 if (CONST_INT_P (operands[2]))
18031 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18032 operands[3] = constm1_rtx, op = and_optab;
18033 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18034 operands[3] = const0_rtx, op = ior_optab;
18038 else if (CONST_INT_P (operands[3]))
18041 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18042 operands[2] = constm1_rtx, op = and_optab;
18043 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18044 operands[2] = const0_rtx, op = ior_optab;
18051 orig_out = operands[0];
18052 tmp = gen_reg_rtx (mode);
18055 /* Recurse to get the constant loaded. */
18056 if (ix86_expand_int_movcc (operands) == 0)
18059 /* Mask in the interesting variable. */
18060 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18062 if (!rtx_equal_p (out, orig_out))
18063 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18069 * For comparison with above,
18079 if (! nonimmediate_operand (operands[2], mode))
18080 operands[2] = force_reg (mode, operands[2]);
18081 if (! nonimmediate_operand (operands[3], mode))
18082 operands[3] = force_reg (mode, operands[3]);
18084 if (! register_operand (operands[2], VOIDmode)
18086 || ! register_operand (operands[3], VOIDmode)))
18087 operands[2] = force_reg (mode, operands[2]);
18090 && ! register_operand (operands[3], VOIDmode))
18091 operands[3] = force_reg (mode, operands[3]);
18093 emit_insn (compare_seq);
18094 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18095 gen_rtx_IF_THEN_ELSE (mode,
18096 compare_op, operands[2],
18101 /* Swap, force into registers, or otherwise massage the two operands
18102 to an sse comparison with a mask result. Thus we differ a bit from
18103 ix86_prepare_fp_compare_args which expects to produce a flags result.
18105 The DEST operand exists to help determine whether to commute commutative
18106 operators. The POP0/POP1 operands are updated in place. The new
18107 comparison code is returned, or UNKNOWN if not implementable. */
18109 static enum rtx_code
18110 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18111 rtx *pop0, rtx *pop1)
18119 /* We have no LTGT as an operator. We could implement it with
18120 NE & ORDERED, but this requires an extra temporary. It's
18121 not clear that it's worth it. */
18128 /* These are supported directly. */
18135 /* For commutative operators, try to canonicalize the destination
18136 operand to be first in the comparison - this helps reload to
18137 avoid extra moves. */
18138 if (!dest || !rtx_equal_p (dest, *pop1))
18146 /* These are not supported directly. Swap the comparison operands
18147 to transform into something that is supported. */
18151 code = swap_condition (code);
18155 gcc_unreachable ();
18161 /* Detect conditional moves that exactly match min/max operational
18162 semantics. Note that this is IEEE safe, as long as we don't
18163 interchange the operands.
18165 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18166 and TRUE if the operation is successful and instructions are emitted. */
18169 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18170 rtx cmp_op1, rtx if_true, rtx if_false)
18172 enum machine_mode mode;
18178 else if (code == UNGE)
18181 if_true = if_false;
18187 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18189 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18194 mode = GET_MODE (dest);
18196 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18197 but MODE may be a vector mode and thus not appropriate. */
18198 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18200 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18203 if_true = force_reg (mode, if_true);
18204 v = gen_rtvec (2, if_true, if_false);
18205 tmp = gen_rtx_UNSPEC (mode, v, u);
18209 code = is_min ? SMIN : SMAX;
18210 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18213 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18217 /* Expand an sse vector comparison. Return the register with the result. */
18220 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18221 rtx op_true, rtx op_false)
18223 enum machine_mode mode = GET_MODE (dest);
18226 cmp_op0 = force_reg (mode, cmp_op0);
18227 if (!nonimmediate_operand (cmp_op1, mode))
18228 cmp_op1 = force_reg (mode, cmp_op1);
18231 || reg_overlap_mentioned_p (dest, op_true)
18232 || reg_overlap_mentioned_p (dest, op_false))
18233 dest = gen_reg_rtx (mode);
18235 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18236 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18241 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18242 operations. This is used for both scalar and vector conditional moves. */
18245 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18247 enum machine_mode mode = GET_MODE (dest);
18250 if (op_false == CONST0_RTX (mode))
18252 op_true = force_reg (mode, op_true);
18253 x = gen_rtx_AND (mode, cmp, op_true);
18254 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18256 else if (op_true == CONST0_RTX (mode))
18258 op_false = force_reg (mode, op_false);
18259 x = gen_rtx_NOT (mode, cmp);
18260 x = gen_rtx_AND (mode, x, op_false);
18261 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18263 else if (TARGET_XOP)
18265 rtx pcmov = gen_rtx_SET (mode, dest,
18266 gen_rtx_IF_THEN_ELSE (mode, cmp,
18273 op_true = force_reg (mode, op_true);
18274 op_false = force_reg (mode, op_false);
18276 t2 = gen_reg_rtx (mode);
18278 t3 = gen_reg_rtx (mode);
18282 x = gen_rtx_AND (mode, op_true, cmp);
18283 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18285 x = gen_rtx_NOT (mode, cmp);
18286 x = gen_rtx_AND (mode, x, op_false);
18287 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18289 x = gen_rtx_IOR (mode, t3, t2);
18290 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18294 /* Expand a floating-point conditional move. Return true if successful. */
18297 ix86_expand_fp_movcc (rtx operands[])
18299 enum machine_mode mode = GET_MODE (operands[0]);
18300 enum rtx_code code = GET_CODE (operands[1]);
18301 rtx tmp, compare_op;
18302 rtx op0 = XEXP (operands[1], 0);
18303 rtx op1 = XEXP (operands[1], 1);
18305 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18307 enum machine_mode cmode;
18309 /* Since we've no cmove for sse registers, don't force bad register
18310 allocation just to gain access to it. Deny movcc when the
18311 comparison mode doesn't match the move mode. */
18312 cmode = GET_MODE (op0);
18313 if (cmode == VOIDmode)
18314 cmode = GET_MODE (op1);
18318 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18319 if (code == UNKNOWN)
18322 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18323 operands[2], operands[3]))
18326 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18327 operands[2], operands[3]);
18328 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18332 /* The floating point conditional move instructions don't directly
18333 support conditions resulting from a signed integer comparison. */
18335 compare_op = ix86_expand_compare (code, op0, op1);
18336 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18338 tmp = gen_reg_rtx (QImode);
18339 ix86_expand_setcc (tmp, code, op0, op1);
18341 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18344 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18345 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18346 operands[2], operands[3])));
18351 /* Expand a floating-point vector conditional move; a vcond operation
18352 rather than a movcc operation. */
18355 ix86_expand_fp_vcond (rtx operands[])
18357 enum rtx_code code = GET_CODE (operands[3]);
18360 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18361 &operands[4], &operands[5]);
18362 if (code == UNKNOWN)
18365 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18366 operands[5], operands[1], operands[2]))
18369 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18370 operands[1], operands[2]);
18371 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18375 /* Expand a signed/unsigned integral vector conditional move. */
18378 ix86_expand_int_vcond (rtx operands[])
18380 enum machine_mode mode = GET_MODE (operands[0]);
18381 enum rtx_code code = GET_CODE (operands[3]);
18382 bool negate = false;
18385 cop0 = operands[4];
18386 cop1 = operands[5];
18388 /* XOP supports all of the comparisons on all vector int types. */
18391 /* Canonicalize the comparison to EQ, GT, GTU. */
18402 code = reverse_condition (code);
18408 code = reverse_condition (code);
18414 code = swap_condition (code);
18415 x = cop0, cop0 = cop1, cop1 = x;
18419 gcc_unreachable ();
18422 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18423 if (mode == V2DImode)
18428 /* SSE4.1 supports EQ. */
18429 if (!TARGET_SSE4_1)
18435 /* SSE4.2 supports GT/GTU. */
18436 if (!TARGET_SSE4_2)
18441 gcc_unreachable ();
18445 /* Unsigned parallel compare is not supported by the hardware.
18446 Play some tricks to turn this into a signed comparison
18450 cop0 = force_reg (mode, cop0);
18458 rtx (*gen_sub3) (rtx, rtx, rtx);
18460 /* Subtract (-(INT MAX) - 1) from both operands to make
18462 mask = ix86_build_signbit_mask (mode, true, false);
18463 gen_sub3 = (mode == V4SImode
18464 ? gen_subv4si3 : gen_subv2di3);
18465 t1 = gen_reg_rtx (mode);
18466 emit_insn (gen_sub3 (t1, cop0, mask));
18468 t2 = gen_reg_rtx (mode);
18469 emit_insn (gen_sub3 (t2, cop1, mask));
18479 /* Perform a parallel unsigned saturating subtraction. */
18480 x = gen_reg_rtx (mode);
18481 emit_insn (gen_rtx_SET (VOIDmode, x,
18482 gen_rtx_US_MINUS (mode, cop0, cop1)));
18485 cop1 = CONST0_RTX (mode);
18491 gcc_unreachable ();
18496 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
18497 operands[1+negate], operands[2-negate]);
18499 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
18500 operands[2-negate]);
18504 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18505 true if we should do zero extension, else sign extension. HIGH_P is
18506 true if we want the N/2 high elements, else the low elements. */
18509 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18511 enum machine_mode imode = GET_MODE (operands[1]);
18516 rtx (*unpack)(rtx, rtx);
18522 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
18524 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
18528 unpack = gen_sse4_1_zero_extendv4hiv4si2;
18530 unpack = gen_sse4_1_sign_extendv4hiv4si2;
18534 unpack = gen_sse4_1_zero_extendv2siv2di2;
18536 unpack = gen_sse4_1_sign_extendv2siv2di2;
18539 gcc_unreachable ();
18544 /* Shift higher 8 bytes to lower 8 bytes. */
18545 tmp = gen_reg_rtx (imode);
18546 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
18547 gen_lowpart (V1TImode, operands[1]),
18553 emit_insn (unpack (operands[0], tmp));
18557 rtx (*unpack)(rtx, rtx, rtx);
18563 unpack = gen_vec_interleave_highv16qi;
18565 unpack = gen_vec_interleave_lowv16qi;
18569 unpack = gen_vec_interleave_highv8hi;
18571 unpack = gen_vec_interleave_lowv8hi;
18575 unpack = gen_vec_interleave_highv4si;
18577 unpack = gen_vec_interleave_lowv4si;
18580 gcc_unreachable ();
18583 dest = gen_lowpart (imode, operands[0]);
18586 tmp = force_reg (imode, CONST0_RTX (imode));
18588 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
18589 operands[1], pc_rtx, pc_rtx);
18591 emit_insn (unpack (dest, operands[1], tmp));
18595 /* Expand conditional increment or decrement using adb/sbb instructions.
18596 The default case using setcc followed by the conditional move can be
18597 done by generic code. */
18599 ix86_expand_int_addcc (rtx operands[])
18601 enum rtx_code code = GET_CODE (operands[1]);
18603 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18605 rtx val = const0_rtx;
18606 bool fpcmp = false;
18607 enum machine_mode mode;
18608 rtx op0 = XEXP (operands[1], 0);
18609 rtx op1 = XEXP (operands[1], 1);
18611 if (operands[3] != const1_rtx
18612 && operands[3] != constm1_rtx)
18614 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18616 code = GET_CODE (compare_op);
18618 flags = XEXP (compare_op, 0);
18620 if (GET_MODE (flags) == CCFPmode
18621 || GET_MODE (flags) == CCFPUmode)
18624 code = ix86_fp_compare_code_to_integer (code);
18631 PUT_CODE (compare_op,
18632 reverse_condition_maybe_unordered
18633 (GET_CODE (compare_op)));
18635 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18638 mode = GET_MODE (operands[0]);
18640 /* Construct either adc or sbb insn. */
18641 if ((code == LTU) == (operands[3] == constm1_rtx))
18646 insn = gen_subqi3_carry;
18649 insn = gen_subhi3_carry;
18652 insn = gen_subsi3_carry;
18655 insn = gen_subdi3_carry;
18658 gcc_unreachable ();
18666 insn = gen_addqi3_carry;
18669 insn = gen_addhi3_carry;
18672 insn = gen_addsi3_carry;
18675 insn = gen_adddi3_carry;
18678 gcc_unreachable ();
18681 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18687 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18688 but works for floating pointer parameters and nonoffsetable memories.
18689 For pushes, it returns just stack offsets; the values will be saved
18690 in the right order. Maximally three parts are generated. */
18693 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18698 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18700 size = (GET_MODE_SIZE (mode) + 4) / 8;
18702 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18703 gcc_assert (size >= 2 && size <= 4);
18705 /* Optimize constant pool reference to immediates. This is used by fp
18706 moves, that force all constants to memory to allow combining. */
18707 if (MEM_P (operand) && MEM_READONLY_P (operand))
18709 rtx tmp = maybe_get_pool_constant (operand);
18714 if (MEM_P (operand) && !offsettable_memref_p (operand))
18716 /* The only non-offsetable memories we handle are pushes. */
18717 int ok = push_operand (operand, VOIDmode);
18721 operand = copy_rtx (operand);
18722 PUT_MODE (operand, Pmode);
18723 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18727 if (GET_CODE (operand) == CONST_VECTOR)
18729 enum machine_mode imode = int_mode_for_mode (mode);
18730 /* Caution: if we looked through a constant pool memory above,
18731 the operand may actually have a different mode now. That's
18732 ok, since we want to pun this all the way back to an integer. */
18733 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18734 gcc_assert (operand != NULL);
18740 if (mode == DImode)
18741 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18746 if (REG_P (operand))
18748 gcc_assert (reload_completed);
18749 for (i = 0; i < size; i++)
18750 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18752 else if (offsettable_memref_p (operand))
18754 operand = adjust_address (operand, SImode, 0);
18755 parts[0] = operand;
18756 for (i = 1; i < size; i++)
18757 parts[i] = adjust_address (operand, SImode, 4 * i);
18759 else if (GET_CODE (operand) == CONST_DOUBLE)
18764 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18768 real_to_target (l, &r, mode);
18769 parts[3] = gen_int_mode (l[3], SImode);
18770 parts[2] = gen_int_mode (l[2], SImode);
18773 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18774 parts[2] = gen_int_mode (l[2], SImode);
18777 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18780 gcc_unreachable ();
18782 parts[1] = gen_int_mode (l[1], SImode);
18783 parts[0] = gen_int_mode (l[0], SImode);
18786 gcc_unreachable ();
18791 if (mode == TImode)
18792 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18793 if (mode == XFmode || mode == TFmode)
18795 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
18796 if (REG_P (operand))
18798 gcc_assert (reload_completed);
18799 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
18800 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
18802 else if (offsettable_memref_p (operand))
18804 operand = adjust_address (operand, DImode, 0);
18805 parts[0] = operand;
18806 parts[1] = adjust_address (operand, upper_mode, 8);
18808 else if (GET_CODE (operand) == CONST_DOUBLE)
18813 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18814 real_to_target (l, &r, mode);
18816 /* Do not use shift by 32 to avoid warning on 32bit systems. */
18817 if (HOST_BITS_PER_WIDE_INT >= 64)
18820 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
18821 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
18824 parts[0] = immed_double_const (l[0], l[1], DImode);
18826 if (upper_mode == SImode)
18827 parts[1] = gen_int_mode (l[2], SImode);
18828 else if (HOST_BITS_PER_WIDE_INT >= 64)
18831 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
18832 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
18835 parts[1] = immed_double_const (l[2], l[3], DImode);
18838 gcc_unreachable ();
18845 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
18846 Return false when normal moves are needed; true when all required
18847 insns have been emitted. Operands 2-4 contain the input values
18848 int the correct order; operands 5-7 contain the output values. */
18851 ix86_split_long_move (rtx operands[])
18856 int collisions = 0;
18857 enum machine_mode mode = GET_MODE (operands[0]);
18858 bool collisionparts[4];
18860 /* The DFmode expanders may ask us to move double.
18861 For 64bit target this is single move. By hiding the fact
18862 here we simplify i386.md splitters. */
18863 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
18865 /* Optimize constant pool reference to immediates. This is used by
18866 fp moves, that force all constants to memory to allow combining. */
18868 if (MEM_P (operands[1])
18869 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
18870 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
18871 operands[1] = get_pool_constant (XEXP (operands[1], 0));
18872 if (push_operand (operands[0], VOIDmode))
18874 operands[0] = copy_rtx (operands[0]);
18875 PUT_MODE (operands[0], Pmode);
18878 operands[0] = gen_lowpart (DImode, operands[0]);
18879 operands[1] = gen_lowpart (DImode, operands[1]);
18880 emit_move_insn (operands[0], operands[1]);
18884 /* The only non-offsettable memory we handle is push. */
18885 if (push_operand (operands[0], VOIDmode))
18888 gcc_assert (!MEM_P (operands[0])
18889 || offsettable_memref_p (operands[0]));
18891 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
18892 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
18894 /* When emitting push, take care for source operands on the stack. */
18895 if (push && MEM_P (operands[1])
18896 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
18898 rtx src_base = XEXP (part[1][nparts - 1], 0);
18900 /* Compensate for the stack decrement by 4. */
18901 if (!TARGET_64BIT && nparts == 3
18902 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
18903 src_base = plus_constant (src_base, 4);
18905 /* src_base refers to the stack pointer and is
18906 automatically decreased by emitted push. */
18907 for (i = 0; i < nparts; i++)
18908 part[1][i] = change_address (part[1][i],
18909 GET_MODE (part[1][i]), src_base);
18912 /* We need to do copy in the right order in case an address register
18913 of the source overlaps the destination. */
18914 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
18918 for (i = 0; i < nparts; i++)
18921 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
18922 if (collisionparts[i])
18926 /* Collision in the middle part can be handled by reordering. */
18927 if (collisions == 1 && nparts == 3 && collisionparts [1])
18929 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18930 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18932 else if (collisions == 1
18934 && (collisionparts [1] || collisionparts [2]))
18936 if (collisionparts [1])
18938 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18939 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18943 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
18944 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
18948 /* If there are more collisions, we can't handle it by reordering.
18949 Do an lea to the last part and use only one colliding move. */
18950 else if (collisions > 1)
18956 base = part[0][nparts - 1];
18958 /* Handle the case when the last part isn't valid for lea.
18959 Happens in 64-bit mode storing the 12-byte XFmode. */
18960 if (GET_MODE (base) != Pmode)
18961 base = gen_rtx_REG (Pmode, REGNO (base));
18963 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
18964 part[1][0] = replace_equiv_address (part[1][0], base);
18965 for (i = 1; i < nparts; i++)
18967 tmp = plus_constant (base, UNITS_PER_WORD * i);
18968 part[1][i] = replace_equiv_address (part[1][i], tmp);
18979 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
18980 emit_insn (gen_addsi3 (stack_pointer_rtx,
18981 stack_pointer_rtx, GEN_INT (-4)));
18982 emit_move_insn (part[0][2], part[1][2]);
18984 else if (nparts == 4)
18986 emit_move_insn (part[0][3], part[1][3]);
18987 emit_move_insn (part[0][2], part[1][2]);
18992 /* In 64bit mode we don't have 32bit push available. In case this is
18993 register, it is OK - we will just use larger counterpart. We also
18994 retype memory - these comes from attempt to avoid REX prefix on
18995 moving of second half of TFmode value. */
18996 if (GET_MODE (part[1][1]) == SImode)
18998 switch (GET_CODE (part[1][1]))
19001 part[1][1] = adjust_address (part[1][1], DImode, 0);
19005 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19009 gcc_unreachable ();
19012 if (GET_MODE (part[1][0]) == SImode)
19013 part[1][0] = part[1][1];
19016 emit_move_insn (part[0][1], part[1][1]);
19017 emit_move_insn (part[0][0], part[1][0]);
19021 /* Choose correct order to not overwrite the source before it is copied. */
19022 if ((REG_P (part[0][0])
19023 && REG_P (part[1][1])
19024 && (REGNO (part[0][0]) == REGNO (part[1][1])
19026 && REGNO (part[0][0]) == REGNO (part[1][2]))
19028 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19030 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19032 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19034 operands[2 + i] = part[0][j];
19035 operands[6 + i] = part[1][j];
19040 for (i = 0; i < nparts; i++)
19042 operands[2 + i] = part[0][i];
19043 operands[6 + i] = part[1][i];
19047 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19048 if (optimize_insn_for_size_p ())
19050 for (j = 0; j < nparts - 1; j++)
19051 if (CONST_INT_P (operands[6 + j])
19052 && operands[6 + j] != const0_rtx
19053 && REG_P (operands[2 + j]))
19054 for (i = j; i < nparts - 1; i++)
19055 if (CONST_INT_P (operands[7 + i])
19056 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19057 operands[7 + i] = operands[2 + j];
19060 for (i = 0; i < nparts; i++)
19061 emit_move_insn (operands[2 + i], operands[6 + i]);
19066 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19067 left shift by a constant, either using a single shift or
19068 a sequence of add instructions. */
19071 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19073 rtx (*insn)(rtx, rtx, rtx);
19076 || (count * ix86_cost->add <= ix86_cost->shift_const
19077 && !optimize_insn_for_size_p ()))
19079 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19080 while (count-- > 0)
19081 emit_insn (insn (operand, operand, operand));
19085 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19086 emit_insn (insn (operand, operand, GEN_INT (count)));
19091 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19093 rtx (*gen_ashl3)(rtx, rtx, rtx);
19094 rtx (*gen_shld)(rtx, rtx, rtx);
19095 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19097 rtx low[2], high[2];
19100 if (CONST_INT_P (operands[2]))
19102 split_double_mode (mode, operands, 2, low, high);
19103 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19105 if (count >= half_width)
19107 emit_move_insn (high[0], low[1]);
19108 emit_move_insn (low[0], const0_rtx);
19110 if (count > half_width)
19111 ix86_expand_ashl_const (high[0], count - half_width, mode);
19115 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19117 if (!rtx_equal_p (operands[0], operands[1]))
19118 emit_move_insn (operands[0], operands[1]);
19120 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19121 ix86_expand_ashl_const (low[0], count, mode);
19126 split_double_mode (mode, operands, 1, low, high);
19128 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19130 if (operands[1] == const1_rtx)
19132 /* Assuming we've chosen a QImode capable registers, then 1 << N
19133 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19134 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19136 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19138 ix86_expand_clear (low[0]);
19139 ix86_expand_clear (high[0]);
19140 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19142 d = gen_lowpart (QImode, low[0]);
19143 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19144 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19145 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19147 d = gen_lowpart (QImode, high[0]);
19148 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19149 s = gen_rtx_NE (QImode, flags, const0_rtx);
19150 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19153 /* Otherwise, we can get the same results by manually performing
19154 a bit extract operation on bit 5/6, and then performing the two
19155 shifts. The two methods of getting 0/1 into low/high are exactly
19156 the same size. Avoiding the shift in the bit extract case helps
19157 pentium4 a bit; no one else seems to care much either way. */
19160 enum machine_mode half_mode;
19161 rtx (*gen_lshr3)(rtx, rtx, rtx);
19162 rtx (*gen_and3)(rtx, rtx, rtx);
19163 rtx (*gen_xor3)(rtx, rtx, rtx);
19164 HOST_WIDE_INT bits;
19167 if (mode == DImode)
19169 half_mode = SImode;
19170 gen_lshr3 = gen_lshrsi3;
19171 gen_and3 = gen_andsi3;
19172 gen_xor3 = gen_xorsi3;
19177 half_mode = DImode;
19178 gen_lshr3 = gen_lshrdi3;
19179 gen_and3 = gen_anddi3;
19180 gen_xor3 = gen_xordi3;
19184 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19185 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19187 x = gen_lowpart (half_mode, operands[2]);
19188 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19190 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19191 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19192 emit_move_insn (low[0], high[0]);
19193 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19196 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19197 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19201 if (operands[1] == constm1_rtx)
19203 /* For -1 << N, we can avoid the shld instruction, because we
19204 know that we're shifting 0...31/63 ones into a -1. */
19205 emit_move_insn (low[0], constm1_rtx);
19206 if (optimize_insn_for_size_p ())
19207 emit_move_insn (high[0], low[0]);
19209 emit_move_insn (high[0], constm1_rtx);
19213 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19215 if (!rtx_equal_p (operands[0], operands[1]))
19216 emit_move_insn (operands[0], operands[1]);
19218 split_double_mode (mode, operands, 1, low, high);
19219 emit_insn (gen_shld (high[0], low[0], operands[2]));
19222 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19224 if (TARGET_CMOVE && scratch)
19226 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19227 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19229 ix86_expand_clear (scratch);
19230 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19234 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19235 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19237 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19242 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19244 rtx (*gen_ashr3)(rtx, rtx, rtx)
19245 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19246 rtx (*gen_shrd)(rtx, rtx, rtx);
19247 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19249 rtx low[2], high[2];
19252 if (CONST_INT_P (operands[2]))
19254 split_double_mode (mode, operands, 2, low, high);
19255 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19257 if (count == GET_MODE_BITSIZE (mode) - 1)
19259 emit_move_insn (high[0], high[1]);
19260 emit_insn (gen_ashr3 (high[0], high[0],
19261 GEN_INT (half_width - 1)));
19262 emit_move_insn (low[0], high[0]);
19265 else if (count >= half_width)
19267 emit_move_insn (low[0], high[1]);
19268 emit_move_insn (high[0], low[0]);
19269 emit_insn (gen_ashr3 (high[0], high[0],
19270 GEN_INT (half_width - 1)));
19272 if (count > half_width)
19273 emit_insn (gen_ashr3 (low[0], low[0],
19274 GEN_INT (count - half_width)));
19278 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19280 if (!rtx_equal_p (operands[0], operands[1]))
19281 emit_move_insn (operands[0], operands[1]);
19283 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19284 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19289 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19291 if (!rtx_equal_p (operands[0], operands[1]))
19292 emit_move_insn (operands[0], operands[1]);
19294 split_double_mode (mode, operands, 1, low, high);
19296 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19297 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19299 if (TARGET_CMOVE && scratch)
19301 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19302 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19304 emit_move_insn (scratch, high[0]);
19305 emit_insn (gen_ashr3 (scratch, scratch,
19306 GEN_INT (half_width - 1)));
19307 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19312 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19313 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19315 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19321 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19323 rtx (*gen_lshr3)(rtx, rtx, rtx)
19324 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19325 rtx (*gen_shrd)(rtx, rtx, rtx);
19326 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19328 rtx low[2], high[2];
19331 if (CONST_INT_P (operands[2]))
19333 split_double_mode (mode, operands, 2, low, high);
19334 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19336 if (count >= half_width)
19338 emit_move_insn (low[0], high[1]);
19339 ix86_expand_clear (high[0]);
19341 if (count > half_width)
19342 emit_insn (gen_lshr3 (low[0], low[0],
19343 GEN_INT (count - half_width)));
19347 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19349 if (!rtx_equal_p (operands[0], operands[1]))
19350 emit_move_insn (operands[0], operands[1]);
19352 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19353 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19358 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19360 if (!rtx_equal_p (operands[0], operands[1]))
19361 emit_move_insn (operands[0], operands[1]);
19363 split_double_mode (mode, operands, 1, low, high);
19365 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19366 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19368 if (TARGET_CMOVE && scratch)
19370 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19371 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19373 ix86_expand_clear (scratch);
19374 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19379 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19380 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19382 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19387 /* Predict just emitted jump instruction to be taken with probability PROB. */
19389 predict_jump (int prob)
19391 rtx insn = get_last_insn ();
19392 gcc_assert (JUMP_P (insn));
19393 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19396 /* Helper function for the string operations below. Dest VARIABLE whether
19397 it is aligned to VALUE bytes. If true, jump to the label. */
19399 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19401 rtx label = gen_label_rtx ();
19402 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19403 if (GET_MODE (variable) == DImode)
19404 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19406 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19407 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19410 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19412 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19416 /* Adjust COUNTER by the VALUE. */
19418 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
19420 rtx (*gen_add)(rtx, rtx, rtx)
19421 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
19423 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
19426 /* Zero extend possibly SImode EXP to Pmode register. */
19428 ix86_zero_extend_to_Pmode (rtx exp)
19431 if (GET_MODE (exp) == VOIDmode)
19432 return force_reg (Pmode, exp);
19433 if (GET_MODE (exp) == Pmode)
19434 return copy_to_mode_reg (Pmode, exp);
19435 r = gen_reg_rtx (Pmode);
19436 emit_insn (gen_zero_extendsidi2 (r, exp));
19440 /* Divide COUNTREG by SCALE. */
19442 scale_counter (rtx countreg, int scale)
19448 if (CONST_INT_P (countreg))
19449 return GEN_INT (INTVAL (countreg) / scale);
19450 gcc_assert (REG_P (countreg));
19452 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
19453 GEN_INT (exact_log2 (scale)),
19454 NULL, 1, OPTAB_DIRECT);
19458 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19459 DImode for constant loop counts. */
19461 static enum machine_mode
19462 counter_mode (rtx count_exp)
19464 if (GET_MODE (count_exp) != VOIDmode)
19465 return GET_MODE (count_exp);
19466 if (!CONST_INT_P (count_exp))
19468 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
19473 /* When SRCPTR is non-NULL, output simple loop to move memory
19474 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19475 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19476 equivalent loop to set memory by VALUE (supposed to be in MODE).
19478 The size is rounded down to whole number of chunk size moved at once.
19479 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19483 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
19484 rtx destptr, rtx srcptr, rtx value,
19485 rtx count, enum machine_mode mode, int unroll,
19488 rtx out_label, top_label, iter, tmp;
19489 enum machine_mode iter_mode = counter_mode (count);
19490 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
19491 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
19497 top_label = gen_label_rtx ();
19498 out_label = gen_label_rtx ();
19499 iter = gen_reg_rtx (iter_mode);
19501 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
19502 NULL, 1, OPTAB_DIRECT);
19503 /* Those two should combine. */
19504 if (piece_size == const1_rtx)
19506 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
19508 predict_jump (REG_BR_PROB_BASE * 10 / 100);
19510 emit_move_insn (iter, const0_rtx);
19512 emit_label (top_label);
19514 tmp = convert_modes (Pmode, iter_mode, iter, true);
19515 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
19516 destmem = change_address (destmem, mode, x_addr);
19520 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
19521 srcmem = change_address (srcmem, mode, y_addr);
19523 /* When unrolling for chips that reorder memory reads and writes,
19524 we can save registers by using single temporary.
19525 Also using 4 temporaries is overkill in 32bit mode. */
19526 if (!TARGET_64BIT && 0)
19528 for (i = 0; i < unroll; i++)
19533 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19535 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19537 emit_move_insn (destmem, srcmem);
19543 gcc_assert (unroll <= 4);
19544 for (i = 0; i < unroll; i++)
19546 tmpreg[i] = gen_reg_rtx (mode);
19550 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19552 emit_move_insn (tmpreg[i], srcmem);
19554 for (i = 0; i < unroll; i++)
19559 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19561 emit_move_insn (destmem, tmpreg[i]);
19566 for (i = 0; i < unroll; i++)
19570 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19571 emit_move_insn (destmem, value);
19574 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
19575 true, OPTAB_LIB_WIDEN);
19577 emit_move_insn (iter, tmp);
19579 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
19581 if (expected_size != -1)
19583 expected_size /= GET_MODE_SIZE (mode) * unroll;
19584 if (expected_size == 0)
19586 else if (expected_size > REG_BR_PROB_BASE)
19587 predict_jump (REG_BR_PROB_BASE - 1);
19589 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
19592 predict_jump (REG_BR_PROB_BASE * 80 / 100);
19593 iter = ix86_zero_extend_to_Pmode (iter);
19594 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
19595 true, OPTAB_LIB_WIDEN);
19596 if (tmp != destptr)
19597 emit_move_insn (destptr, tmp);
19600 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19601 true, OPTAB_LIB_WIDEN);
19603 emit_move_insn (srcptr, tmp);
19605 emit_label (out_label);
19608 /* Output "rep; mov" instruction.
19609 Arguments have same meaning as for previous function */
19611 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19612 rtx destptr, rtx srcptr,
19614 enum machine_mode mode)
19620 /* If the size is known, it is shorter to use rep movs. */
19621 if (mode == QImode && CONST_INT_P (count)
19622 && !(INTVAL (count) & 3))
19625 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19626 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19627 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19628 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19629 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19630 if (mode != QImode)
19632 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19633 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19634 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19635 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19636 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19637 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19641 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19642 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19644 if (CONST_INT_P (count))
19646 count = GEN_INT (INTVAL (count)
19647 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19648 destmem = shallow_copy_rtx (destmem);
19649 srcmem = shallow_copy_rtx (srcmem);
19650 set_mem_size (destmem, count);
19651 set_mem_size (srcmem, count);
19655 if (MEM_SIZE (destmem))
19656 set_mem_size (destmem, NULL_RTX);
19657 if (MEM_SIZE (srcmem))
19658 set_mem_size (srcmem, NULL_RTX);
19660 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19664 /* Output "rep; stos" instruction.
19665 Arguments have same meaning as for previous function */
19667 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19668 rtx count, enum machine_mode mode,
19674 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19675 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19676 value = force_reg (mode, gen_lowpart (mode, value));
19677 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19678 if (mode != QImode)
19680 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19681 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19682 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19685 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19686 if (orig_value == const0_rtx && CONST_INT_P (count))
19688 count = GEN_INT (INTVAL (count)
19689 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19690 destmem = shallow_copy_rtx (destmem);
19691 set_mem_size (destmem, count);
19693 else if (MEM_SIZE (destmem))
19694 set_mem_size (destmem, NULL_RTX);
19695 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19699 emit_strmov (rtx destmem, rtx srcmem,
19700 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19702 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19703 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19704 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19707 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19709 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19710 rtx destptr, rtx srcptr, rtx count, int max_size)
19713 if (CONST_INT_P (count))
19715 HOST_WIDE_INT countval = INTVAL (count);
19718 if ((countval & 0x10) && max_size > 16)
19722 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19723 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19726 gcc_unreachable ();
19729 if ((countval & 0x08) && max_size > 8)
19732 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19735 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19736 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19740 if ((countval & 0x04) && max_size > 4)
19742 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19745 if ((countval & 0x02) && max_size > 2)
19747 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19750 if ((countval & 0x01) && max_size > 1)
19752 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19759 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19760 count, 1, OPTAB_DIRECT);
19761 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19762 count, QImode, 1, 4);
19766 /* When there are stringops, we can cheaply increase dest and src pointers.
19767 Otherwise we save code size by maintaining offset (zero is readily
19768 available from preceding rep operation) and using x86 addressing modes.
19770 if (TARGET_SINGLE_STRINGOP)
19774 rtx label = ix86_expand_aligntest (count, 4, true);
19775 src = change_address (srcmem, SImode, srcptr);
19776 dest = change_address (destmem, SImode, destptr);
19777 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19778 emit_label (label);
19779 LABEL_NUSES (label) = 1;
19783 rtx label = ix86_expand_aligntest (count, 2, true);
19784 src = change_address (srcmem, HImode, srcptr);
19785 dest = change_address (destmem, HImode, destptr);
19786 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19787 emit_label (label);
19788 LABEL_NUSES (label) = 1;
19792 rtx label = ix86_expand_aligntest (count, 1, true);
19793 src = change_address (srcmem, QImode, srcptr);
19794 dest = change_address (destmem, QImode, destptr);
19795 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19796 emit_label (label);
19797 LABEL_NUSES (label) = 1;
19802 rtx offset = force_reg (Pmode, const0_rtx);
19807 rtx label = ix86_expand_aligntest (count, 4, true);
19808 src = change_address (srcmem, SImode, srcptr);
19809 dest = change_address (destmem, SImode, destptr);
19810 emit_move_insn (dest, src);
19811 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
19812 true, OPTAB_LIB_WIDEN);
19814 emit_move_insn (offset, tmp);
19815 emit_label (label);
19816 LABEL_NUSES (label) = 1;
19820 rtx label = ix86_expand_aligntest (count, 2, true);
19821 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19822 src = change_address (srcmem, HImode, tmp);
19823 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19824 dest = change_address (destmem, HImode, tmp);
19825 emit_move_insn (dest, src);
19826 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
19827 true, OPTAB_LIB_WIDEN);
19829 emit_move_insn (offset, tmp);
19830 emit_label (label);
19831 LABEL_NUSES (label) = 1;
19835 rtx label = ix86_expand_aligntest (count, 1, true);
19836 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19837 src = change_address (srcmem, QImode, tmp);
19838 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19839 dest = change_address (destmem, QImode, tmp);
19840 emit_move_insn (dest, src);
19841 emit_label (label);
19842 LABEL_NUSES (label) = 1;
19847 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19849 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
19850 rtx count, int max_size)
19853 expand_simple_binop (counter_mode (count), AND, count,
19854 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
19855 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
19856 gen_lowpart (QImode, value), count, QImode,
19860 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19862 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
19866 if (CONST_INT_P (count))
19868 HOST_WIDE_INT countval = INTVAL (count);
19871 if ((countval & 0x10) && max_size > 16)
19875 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19876 emit_insn (gen_strset (destptr, dest, value));
19877 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
19878 emit_insn (gen_strset (destptr, dest, value));
19881 gcc_unreachable ();
19884 if ((countval & 0x08) && max_size > 8)
19888 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19889 emit_insn (gen_strset (destptr, dest, value));
19893 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19894 emit_insn (gen_strset (destptr, dest, value));
19895 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
19896 emit_insn (gen_strset (destptr, dest, value));
19900 if ((countval & 0x04) && max_size > 4)
19902 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19903 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19906 if ((countval & 0x02) && max_size > 2)
19908 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
19909 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19912 if ((countval & 0x01) && max_size > 1)
19914 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
19915 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19922 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
19927 rtx label = ix86_expand_aligntest (count, 16, true);
19930 dest = change_address (destmem, DImode, destptr);
19931 emit_insn (gen_strset (destptr, dest, value));
19932 emit_insn (gen_strset (destptr, dest, value));
19936 dest = change_address (destmem, SImode, destptr);
19937 emit_insn (gen_strset (destptr, dest, value));
19938 emit_insn (gen_strset (destptr, dest, value));
19939 emit_insn (gen_strset (destptr, dest, value));
19940 emit_insn (gen_strset (destptr, dest, value));
19942 emit_label (label);
19943 LABEL_NUSES (label) = 1;
19947 rtx label = ix86_expand_aligntest (count, 8, true);
19950 dest = change_address (destmem, DImode, destptr);
19951 emit_insn (gen_strset (destptr, dest, value));
19955 dest = change_address (destmem, SImode, destptr);
19956 emit_insn (gen_strset (destptr, dest, value));
19957 emit_insn (gen_strset (destptr, dest, value));
19959 emit_label (label);
19960 LABEL_NUSES (label) = 1;
19964 rtx label = ix86_expand_aligntest (count, 4, true);
19965 dest = change_address (destmem, SImode, destptr);
19966 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19967 emit_label (label);
19968 LABEL_NUSES (label) = 1;
19972 rtx label = ix86_expand_aligntest (count, 2, true);
19973 dest = change_address (destmem, HImode, destptr);
19974 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19975 emit_label (label);
19976 LABEL_NUSES (label) = 1;
19980 rtx label = ix86_expand_aligntest (count, 1, true);
19981 dest = change_address (destmem, QImode, destptr);
19982 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19983 emit_label (label);
19984 LABEL_NUSES (label) = 1;
19988 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
19989 DESIRED_ALIGNMENT. */
19991 expand_movmem_prologue (rtx destmem, rtx srcmem,
19992 rtx destptr, rtx srcptr, rtx count,
19993 int align, int desired_alignment)
19995 if (align <= 1 && desired_alignment > 1)
19997 rtx label = ix86_expand_aligntest (destptr, 1, false);
19998 srcmem = change_address (srcmem, QImode, srcptr);
19999 destmem = change_address (destmem, QImode, destptr);
20000 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20001 ix86_adjust_counter (count, 1);
20002 emit_label (label);
20003 LABEL_NUSES (label) = 1;
20005 if (align <= 2 && desired_alignment > 2)
20007 rtx label = ix86_expand_aligntest (destptr, 2, false);
20008 srcmem = change_address (srcmem, HImode, srcptr);
20009 destmem = change_address (destmem, HImode, destptr);
20010 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20011 ix86_adjust_counter (count, 2);
20012 emit_label (label);
20013 LABEL_NUSES (label) = 1;
20015 if (align <= 4 && desired_alignment > 4)
20017 rtx label = ix86_expand_aligntest (destptr, 4, false);
20018 srcmem = change_address (srcmem, SImode, srcptr);
20019 destmem = change_address (destmem, SImode, destptr);
20020 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20021 ix86_adjust_counter (count, 4);
20022 emit_label (label);
20023 LABEL_NUSES (label) = 1;
20025 gcc_assert (desired_alignment <= 8);
20028 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20029 ALIGN_BYTES is how many bytes need to be copied. */
20031 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20032 int desired_align, int align_bytes)
20035 rtx src_size, dst_size;
20037 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20038 if (src_align_bytes >= 0)
20039 src_align_bytes = desired_align - src_align_bytes;
20040 src_size = MEM_SIZE (src);
20041 dst_size = MEM_SIZE (dst);
20042 if (align_bytes & 1)
20044 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20045 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20047 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20049 if (align_bytes & 2)
20051 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20052 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20053 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20054 set_mem_align (dst, 2 * BITS_PER_UNIT);
20055 if (src_align_bytes >= 0
20056 && (src_align_bytes & 1) == (align_bytes & 1)
20057 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20058 set_mem_align (src, 2 * BITS_PER_UNIT);
20060 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20062 if (align_bytes & 4)
20064 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20065 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20066 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20067 set_mem_align (dst, 4 * BITS_PER_UNIT);
20068 if (src_align_bytes >= 0)
20070 unsigned int src_align = 0;
20071 if ((src_align_bytes & 3) == (align_bytes & 3))
20073 else if ((src_align_bytes & 1) == (align_bytes & 1))
20075 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20076 set_mem_align (src, src_align * BITS_PER_UNIT);
20079 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20081 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20082 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20083 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20084 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20085 if (src_align_bytes >= 0)
20087 unsigned int src_align = 0;
20088 if ((src_align_bytes & 7) == (align_bytes & 7))
20090 else if ((src_align_bytes & 3) == (align_bytes & 3))
20092 else if ((src_align_bytes & 1) == (align_bytes & 1))
20094 if (src_align > (unsigned int) desired_align)
20095 src_align = desired_align;
20096 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20097 set_mem_align (src, src_align * BITS_PER_UNIT);
20100 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20102 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
20107 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20108 DESIRED_ALIGNMENT. */
20110 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20111 int align, int desired_alignment)
20113 if (align <= 1 && desired_alignment > 1)
20115 rtx label = ix86_expand_aligntest (destptr, 1, false);
20116 destmem = change_address (destmem, QImode, destptr);
20117 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20118 ix86_adjust_counter (count, 1);
20119 emit_label (label);
20120 LABEL_NUSES (label) = 1;
20122 if (align <= 2 && desired_alignment > 2)
20124 rtx label = ix86_expand_aligntest (destptr, 2, false);
20125 destmem = change_address (destmem, HImode, destptr);
20126 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20127 ix86_adjust_counter (count, 2);
20128 emit_label (label);
20129 LABEL_NUSES (label) = 1;
20131 if (align <= 4 && desired_alignment > 4)
20133 rtx label = ix86_expand_aligntest (destptr, 4, false);
20134 destmem = change_address (destmem, SImode, destptr);
20135 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20136 ix86_adjust_counter (count, 4);
20137 emit_label (label);
20138 LABEL_NUSES (label) = 1;
20140 gcc_assert (desired_alignment <= 8);
20143 /* Set enough from DST to align DST known to by aligned by ALIGN to
20144 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20146 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20147 int desired_align, int align_bytes)
20150 rtx dst_size = MEM_SIZE (dst);
20151 if (align_bytes & 1)
20153 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20155 emit_insn (gen_strset (destreg, dst,
20156 gen_lowpart (QImode, value)));
20158 if (align_bytes & 2)
20160 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20161 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20162 set_mem_align (dst, 2 * BITS_PER_UNIT);
20164 emit_insn (gen_strset (destreg, dst,
20165 gen_lowpart (HImode, value)));
20167 if (align_bytes & 4)
20169 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20170 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20171 set_mem_align (dst, 4 * BITS_PER_UNIT);
20173 emit_insn (gen_strset (destreg, dst,
20174 gen_lowpart (SImode, value)));
20176 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20177 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20178 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20180 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20184 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20185 static enum stringop_alg
20186 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20187 int *dynamic_check)
20189 const struct stringop_algs * algs;
20190 bool optimize_for_speed;
20191 /* Algorithms using the rep prefix want at least edi and ecx;
20192 additionally, memset wants eax and memcpy wants esi. Don't
20193 consider such algorithms if the user has appropriated those
20194 registers for their own purposes. */
20195 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20197 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20199 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20200 || (alg != rep_prefix_1_byte \
20201 && alg != rep_prefix_4_byte \
20202 && alg != rep_prefix_8_byte))
20203 const struct processor_costs *cost;
20205 /* Even if the string operation call is cold, we still might spend a lot
20206 of time processing large blocks. */
20207 if (optimize_function_for_size_p (cfun)
20208 || (optimize_insn_for_size_p ()
20209 && expected_size != -1 && expected_size < 256))
20210 optimize_for_speed = false;
20212 optimize_for_speed = true;
20214 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20216 *dynamic_check = -1;
20218 algs = &cost->memset[TARGET_64BIT != 0];
20220 algs = &cost->memcpy[TARGET_64BIT != 0];
20221 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
20222 return ix86_stringop_alg;
20223 /* rep; movq or rep; movl is the smallest variant. */
20224 else if (!optimize_for_speed)
20226 if (!count || (count & 3))
20227 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20229 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20231 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20233 else if (expected_size != -1 && expected_size < 4)
20234 return loop_1_byte;
20235 else if (expected_size != -1)
20238 enum stringop_alg alg = libcall;
20239 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20241 /* We get here if the algorithms that were not libcall-based
20242 were rep-prefix based and we are unable to use rep prefixes
20243 based on global register usage. Break out of the loop and
20244 use the heuristic below. */
20245 if (algs->size[i].max == 0)
20247 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20249 enum stringop_alg candidate = algs->size[i].alg;
20251 if (candidate != libcall && ALG_USABLE_P (candidate))
20253 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20254 last non-libcall inline algorithm. */
20255 if (TARGET_INLINE_ALL_STRINGOPS)
20257 /* When the current size is best to be copied by a libcall,
20258 but we are still forced to inline, run the heuristic below
20259 that will pick code for medium sized blocks. */
20260 if (alg != libcall)
20264 else if (ALG_USABLE_P (candidate))
20268 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20270 /* When asked to inline the call anyway, try to pick meaningful choice.
20271 We look for maximal size of block that is faster to copy by hand and
20272 take blocks of at most of that size guessing that average size will
20273 be roughly half of the block.
20275 If this turns out to be bad, we might simply specify the preferred
20276 choice in ix86_costs. */
20277 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20278 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20281 enum stringop_alg alg;
20283 bool any_alg_usable_p = true;
20285 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20287 enum stringop_alg candidate = algs->size[i].alg;
20288 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20290 if (candidate != libcall && candidate
20291 && ALG_USABLE_P (candidate))
20292 max = algs->size[i].max;
20294 /* If there aren't any usable algorithms, then recursing on
20295 smaller sizes isn't going to find anything. Just return the
20296 simple byte-at-a-time copy loop. */
20297 if (!any_alg_usable_p)
20299 /* Pick something reasonable. */
20300 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20301 *dynamic_check = 128;
20302 return loop_1_byte;
20306 alg = decide_alg (count, max / 2, memset, dynamic_check);
20307 gcc_assert (*dynamic_check == -1);
20308 gcc_assert (alg != libcall);
20309 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20310 *dynamic_check = max;
20313 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20314 #undef ALG_USABLE_P
20317 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20318 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20320 decide_alignment (int align,
20321 enum stringop_alg alg,
20324 int desired_align = 0;
20328 gcc_unreachable ();
20330 case unrolled_loop:
20331 desired_align = GET_MODE_SIZE (Pmode);
20333 case rep_prefix_8_byte:
20336 case rep_prefix_4_byte:
20337 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20338 copying whole cacheline at once. */
20339 if (TARGET_PENTIUMPRO)
20344 case rep_prefix_1_byte:
20345 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20346 copying whole cacheline at once. */
20347 if (TARGET_PENTIUMPRO)
20361 if (desired_align < align)
20362 desired_align = align;
20363 if (expected_size != -1 && expected_size < 4)
20364 desired_align = align;
20365 return desired_align;
20368 /* Return the smallest power of 2 greater than VAL. */
20370 smallest_pow2_greater_than (int val)
20378 /* Expand string move (memcpy) operation. Use i386 string operations
20379 when profitable. expand_setmem contains similar code. The code
20380 depends upon architecture, block size and alignment, but always has
20381 the same overall structure:
20383 1) Prologue guard: Conditional that jumps up to epilogues for small
20384 blocks that can be handled by epilogue alone. This is faster
20385 but also needed for correctness, since prologue assume the block
20386 is larger than the desired alignment.
20388 Optional dynamic check for size and libcall for large
20389 blocks is emitted here too, with -minline-stringops-dynamically.
20391 2) Prologue: copy first few bytes in order to get destination
20392 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
20393 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
20394 copied. We emit either a jump tree on power of two sized
20395 blocks, or a byte loop.
20397 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20398 with specified algorithm.
20400 4) Epilogue: code copying tail of the block that is too small to be
20401 handled by main body (or up to size guarded by prologue guard). */
20404 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20405 rtx expected_align_exp, rtx expected_size_exp)
20411 rtx jump_around_label = NULL;
20412 HOST_WIDE_INT align = 1;
20413 unsigned HOST_WIDE_INT count = 0;
20414 HOST_WIDE_INT expected_size = -1;
20415 int size_needed = 0, epilogue_size_needed;
20416 int desired_align = 0, align_bytes = 0;
20417 enum stringop_alg alg;
20419 bool need_zero_guard = false;
20421 if (CONST_INT_P (align_exp))
20422 align = INTVAL (align_exp);
20423 /* i386 can do misaligned access on reasonably increased cost. */
20424 if (CONST_INT_P (expected_align_exp)
20425 && INTVAL (expected_align_exp) > align)
20426 align = INTVAL (expected_align_exp);
20427 /* ALIGN is the minimum of destination and source alignment, but we care here
20428 just about destination alignment. */
20429 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
20430 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
20432 if (CONST_INT_P (count_exp))
20433 count = expected_size = INTVAL (count_exp);
20434 if (CONST_INT_P (expected_size_exp) && count == 0)
20435 expected_size = INTVAL (expected_size_exp);
20437 /* Make sure we don't need to care about overflow later on. */
20438 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20441 /* Step 0: Decide on preferred algorithm, desired alignment and
20442 size of chunks to be copied by main loop. */
20444 alg = decide_alg (count, expected_size, false, &dynamic_check);
20445 desired_align = decide_alignment (align, alg, expected_size);
20447 if (!TARGET_ALIGN_STRINGOPS)
20448 align = desired_align;
20450 if (alg == libcall)
20452 gcc_assert (alg != no_stringop);
20454 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
20455 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20456 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
20461 gcc_unreachable ();
20463 need_zero_guard = true;
20464 size_needed = GET_MODE_SIZE (Pmode);
20466 case unrolled_loop:
20467 need_zero_guard = true;
20468 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
20470 case rep_prefix_8_byte:
20473 case rep_prefix_4_byte:
20476 case rep_prefix_1_byte:
20480 need_zero_guard = true;
20485 epilogue_size_needed = size_needed;
20487 /* Step 1: Prologue guard. */
20489 /* Alignment code needs count to be in register. */
20490 if (CONST_INT_P (count_exp) && desired_align > align)
20492 if (INTVAL (count_exp) > desired_align
20493 && INTVAL (count_exp) > size_needed)
20496 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20497 if (align_bytes <= 0)
20500 align_bytes = desired_align - align_bytes;
20502 if (align_bytes == 0)
20503 count_exp = force_reg (counter_mode (count_exp), count_exp);
20505 gcc_assert (desired_align >= 1 && align >= 1);
20507 /* Ensure that alignment prologue won't copy past end of block. */
20508 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20510 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20511 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20512 Make sure it is power of 2. */
20513 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20517 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20519 /* If main algorithm works on QImode, no epilogue is needed.
20520 For small sizes just don't align anything. */
20521 if (size_needed == 1)
20522 desired_align = align;
20529 label = gen_label_rtx ();
20530 emit_cmp_and_jump_insns (count_exp,
20531 GEN_INT (epilogue_size_needed),
20532 LTU, 0, counter_mode (count_exp), 1, label);
20533 if (expected_size == -1 || expected_size < epilogue_size_needed)
20534 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20536 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20540 /* Emit code to decide on runtime whether library call or inline should be
20542 if (dynamic_check != -1)
20544 if (CONST_INT_P (count_exp))
20546 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
20548 emit_block_move_via_libcall (dst, src, count_exp, false);
20549 count_exp = const0_rtx;
20555 rtx hot_label = gen_label_rtx ();
20556 jump_around_label = gen_label_rtx ();
20557 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20558 LEU, 0, GET_MODE (count_exp), 1, hot_label);
20559 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20560 emit_block_move_via_libcall (dst, src, count_exp, false);
20561 emit_jump (jump_around_label);
20562 emit_label (hot_label);
20566 /* Step 2: Alignment prologue. */
20568 if (desired_align > align)
20570 if (align_bytes == 0)
20572 /* Except for the first move in epilogue, we no longer know
20573 constant offset in aliasing info. It don't seems to worth
20574 the pain to maintain it for the first move, so throw away
20576 src = change_address (src, BLKmode, srcreg);
20577 dst = change_address (dst, BLKmode, destreg);
20578 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
20583 /* If we know how many bytes need to be stored before dst is
20584 sufficiently aligned, maintain aliasing info accurately. */
20585 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
20586 desired_align, align_bytes);
20587 count_exp = plus_constant (count_exp, -align_bytes);
20588 count -= align_bytes;
20590 if (need_zero_guard
20591 && (count < (unsigned HOST_WIDE_INT) size_needed
20592 || (align_bytes == 0
20593 && count < ((unsigned HOST_WIDE_INT) size_needed
20594 + desired_align - align))))
20596 /* It is possible that we copied enough so the main loop will not
20598 gcc_assert (size_needed > 1);
20599 if (label == NULL_RTX)
20600 label = gen_label_rtx ();
20601 emit_cmp_and_jump_insns (count_exp,
20602 GEN_INT (size_needed),
20603 LTU, 0, counter_mode (count_exp), 1, label);
20604 if (expected_size == -1
20605 || expected_size < (desired_align - align) / 2 + size_needed)
20606 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20608 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20611 if (label && size_needed == 1)
20613 emit_label (label);
20614 LABEL_NUSES (label) = 1;
20616 epilogue_size_needed = 1;
20618 else if (label == NULL_RTX)
20619 epilogue_size_needed = size_needed;
20621 /* Step 3: Main loop. */
20627 gcc_unreachable ();
20629 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20630 count_exp, QImode, 1, expected_size);
20633 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20634 count_exp, Pmode, 1, expected_size);
20636 case unrolled_loop:
20637 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20638 registers for 4 temporaries anyway. */
20639 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20640 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20643 case rep_prefix_8_byte:
20644 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20647 case rep_prefix_4_byte:
20648 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20651 case rep_prefix_1_byte:
20652 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20656 /* Adjust properly the offset of src and dest memory for aliasing. */
20657 if (CONST_INT_P (count_exp))
20659 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20660 (count / size_needed) * size_needed);
20661 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20662 (count / size_needed) * size_needed);
20666 src = change_address (src, BLKmode, srcreg);
20667 dst = change_address (dst, BLKmode, destreg);
20670 /* Step 4: Epilogue to copy the remaining bytes. */
20674 /* When the main loop is done, COUNT_EXP might hold original count,
20675 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20676 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20677 bytes. Compensate if needed. */
20679 if (size_needed < epilogue_size_needed)
20682 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20683 GEN_INT (size_needed - 1), count_exp, 1,
20685 if (tmp != count_exp)
20686 emit_move_insn (count_exp, tmp);
20688 emit_label (label);
20689 LABEL_NUSES (label) = 1;
20692 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20693 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20694 epilogue_size_needed);
20695 if (jump_around_label)
20696 emit_label (jump_around_label);
20700 /* Helper function for memcpy. For QImode value 0xXY produce
20701 0xXYXYXYXY of wide specified by MODE. This is essentially
20702 a * 0x10101010, but we can do slightly better than
20703 synth_mult by unwinding the sequence by hand on CPUs with
20706 promote_duplicated_reg (enum machine_mode mode, rtx val)
20708 enum machine_mode valmode = GET_MODE (val);
20710 int nops = mode == DImode ? 3 : 2;
20712 gcc_assert (mode == SImode || mode == DImode);
20713 if (val == const0_rtx)
20714 return copy_to_mode_reg (mode, const0_rtx);
20715 if (CONST_INT_P (val))
20717 HOST_WIDE_INT v = INTVAL (val) & 255;
20721 if (mode == DImode)
20722 v |= (v << 16) << 16;
20723 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20726 if (valmode == VOIDmode)
20728 if (valmode != QImode)
20729 val = gen_lowpart (QImode, val);
20730 if (mode == QImode)
20732 if (!TARGET_PARTIAL_REG_STALL)
20734 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20735 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20736 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20737 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20739 rtx reg = convert_modes (mode, QImode, val, true);
20740 tmp = promote_duplicated_reg (mode, const1_rtx);
20741 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20746 rtx reg = convert_modes (mode, QImode, val, true);
20748 if (!TARGET_PARTIAL_REG_STALL)
20749 if (mode == SImode)
20750 emit_insn (gen_movsi_insv_1 (reg, reg));
20752 emit_insn (gen_movdi_insv_1 (reg, reg));
20755 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20756 NULL, 1, OPTAB_DIRECT);
20758 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20760 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20761 NULL, 1, OPTAB_DIRECT);
20762 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20763 if (mode == SImode)
20765 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20766 NULL, 1, OPTAB_DIRECT);
20767 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20772 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20773 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20774 alignment from ALIGN to DESIRED_ALIGN. */
20776 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20781 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20782 promoted_val = promote_duplicated_reg (DImode, val);
20783 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20784 promoted_val = promote_duplicated_reg (SImode, val);
20785 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
20786 promoted_val = promote_duplicated_reg (HImode, val);
20788 promoted_val = val;
20790 return promoted_val;
20793 /* Expand string clear operation (bzero). Use i386 string operations when
20794 profitable. See expand_movmem comment for explanation of individual
20795 steps performed. */
20797 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
20798 rtx expected_align_exp, rtx expected_size_exp)
20803 rtx jump_around_label = NULL;
20804 HOST_WIDE_INT align = 1;
20805 unsigned HOST_WIDE_INT count = 0;
20806 HOST_WIDE_INT expected_size = -1;
20807 int size_needed = 0, epilogue_size_needed;
20808 int desired_align = 0, align_bytes = 0;
20809 enum stringop_alg alg;
20810 rtx promoted_val = NULL;
20811 bool force_loopy_epilogue = false;
20813 bool need_zero_guard = false;
20815 if (CONST_INT_P (align_exp))
20816 align = INTVAL (align_exp);
20817 /* i386 can do misaligned access on reasonably increased cost. */
20818 if (CONST_INT_P (expected_align_exp)
20819 && INTVAL (expected_align_exp) > align)
20820 align = INTVAL (expected_align_exp);
20821 if (CONST_INT_P (count_exp))
20822 count = expected_size = INTVAL (count_exp);
20823 if (CONST_INT_P (expected_size_exp) && count == 0)
20824 expected_size = INTVAL (expected_size_exp);
20826 /* Make sure we don't need to care about overflow later on. */
20827 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20830 /* Step 0: Decide on preferred algorithm, desired alignment and
20831 size of chunks to be copied by main loop. */
20833 alg = decide_alg (count, expected_size, true, &dynamic_check);
20834 desired_align = decide_alignment (align, alg, expected_size);
20836 if (!TARGET_ALIGN_STRINGOPS)
20837 align = desired_align;
20839 if (alg == libcall)
20841 gcc_assert (alg != no_stringop);
20843 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
20844 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20849 gcc_unreachable ();
20851 need_zero_guard = true;
20852 size_needed = GET_MODE_SIZE (Pmode);
20854 case unrolled_loop:
20855 need_zero_guard = true;
20856 size_needed = GET_MODE_SIZE (Pmode) * 4;
20858 case rep_prefix_8_byte:
20861 case rep_prefix_4_byte:
20864 case rep_prefix_1_byte:
20868 need_zero_guard = true;
20872 epilogue_size_needed = size_needed;
20874 /* Step 1: Prologue guard. */
20876 /* Alignment code needs count to be in register. */
20877 if (CONST_INT_P (count_exp) && desired_align > align)
20879 if (INTVAL (count_exp) > desired_align
20880 && INTVAL (count_exp) > size_needed)
20883 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20884 if (align_bytes <= 0)
20887 align_bytes = desired_align - align_bytes;
20889 if (align_bytes == 0)
20891 enum machine_mode mode = SImode;
20892 if (TARGET_64BIT && (count & ~0xffffffff))
20894 count_exp = force_reg (mode, count_exp);
20897 /* Do the cheap promotion to allow better CSE across the
20898 main loop and epilogue (ie one load of the big constant in the
20899 front of all code. */
20900 if (CONST_INT_P (val_exp))
20901 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20902 desired_align, align);
20903 /* Ensure that alignment prologue won't copy past end of block. */
20904 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20906 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20907 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
20908 Make sure it is power of 2. */
20909 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20911 /* To improve performance of small blocks, we jump around the VAL
20912 promoting mode. This mean that if the promoted VAL is not constant,
20913 we might not use it in the epilogue and have to use byte
20915 if (epilogue_size_needed > 2 && !promoted_val)
20916 force_loopy_epilogue = true;
20919 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20921 /* If main algorithm works on QImode, no epilogue is needed.
20922 For small sizes just don't align anything. */
20923 if (size_needed == 1)
20924 desired_align = align;
20931 label = gen_label_rtx ();
20932 emit_cmp_and_jump_insns (count_exp,
20933 GEN_INT (epilogue_size_needed),
20934 LTU, 0, counter_mode (count_exp), 1, label);
20935 if (expected_size == -1 || expected_size <= epilogue_size_needed)
20936 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20938 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20941 if (dynamic_check != -1)
20943 rtx hot_label = gen_label_rtx ();
20944 jump_around_label = gen_label_rtx ();
20945 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20946 LEU, 0, counter_mode (count_exp), 1, hot_label);
20947 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20948 set_storage_via_libcall (dst, count_exp, val_exp, false);
20949 emit_jump (jump_around_label);
20950 emit_label (hot_label);
20953 /* Step 2: Alignment prologue. */
20955 /* Do the expensive promotion once we branched off the small blocks. */
20957 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20958 desired_align, align);
20959 gcc_assert (desired_align >= 1 && align >= 1);
20961 if (desired_align > align)
20963 if (align_bytes == 0)
20965 /* Except for the first move in epilogue, we no longer know
20966 constant offset in aliasing info. It don't seems to worth
20967 the pain to maintain it for the first move, so throw away
20969 dst = change_address (dst, BLKmode, destreg);
20970 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
20975 /* If we know how many bytes need to be stored before dst is
20976 sufficiently aligned, maintain aliasing info accurately. */
20977 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
20978 desired_align, align_bytes);
20979 count_exp = plus_constant (count_exp, -align_bytes);
20980 count -= align_bytes;
20982 if (need_zero_guard
20983 && (count < (unsigned HOST_WIDE_INT) size_needed
20984 || (align_bytes == 0
20985 && count < ((unsigned HOST_WIDE_INT) size_needed
20986 + desired_align - align))))
20988 /* It is possible that we copied enough so the main loop will not
20990 gcc_assert (size_needed > 1);
20991 if (label == NULL_RTX)
20992 label = gen_label_rtx ();
20993 emit_cmp_and_jump_insns (count_exp,
20994 GEN_INT (size_needed),
20995 LTU, 0, counter_mode (count_exp), 1, label);
20996 if (expected_size == -1
20997 || expected_size < (desired_align - align) / 2 + size_needed)
20998 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21000 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21003 if (label && size_needed == 1)
21005 emit_label (label);
21006 LABEL_NUSES (label) = 1;
21008 promoted_val = val_exp;
21009 epilogue_size_needed = 1;
21011 else if (label == NULL_RTX)
21012 epilogue_size_needed = size_needed;
21014 /* Step 3: Main loop. */
21020 gcc_unreachable ();
21022 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21023 count_exp, QImode, 1, expected_size);
21026 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21027 count_exp, Pmode, 1, expected_size);
21029 case unrolled_loop:
21030 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21031 count_exp, Pmode, 4, expected_size);
21033 case rep_prefix_8_byte:
21034 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21037 case rep_prefix_4_byte:
21038 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21041 case rep_prefix_1_byte:
21042 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21046 /* Adjust properly the offset of src and dest memory for aliasing. */
21047 if (CONST_INT_P (count_exp))
21048 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21049 (count / size_needed) * size_needed);
21051 dst = change_address (dst, BLKmode, destreg);
21053 /* Step 4: Epilogue to copy the remaining bytes. */
21057 /* When the main loop is done, COUNT_EXP might hold original count,
21058 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21059 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21060 bytes. Compensate if needed. */
21062 if (size_needed < epilogue_size_needed)
21065 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21066 GEN_INT (size_needed - 1), count_exp, 1,
21068 if (tmp != count_exp)
21069 emit_move_insn (count_exp, tmp);
21071 emit_label (label);
21072 LABEL_NUSES (label) = 1;
21075 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21077 if (force_loopy_epilogue)
21078 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21079 epilogue_size_needed);
21081 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21082 epilogue_size_needed);
21084 if (jump_around_label)
21085 emit_label (jump_around_label);
21089 /* Expand the appropriate insns for doing strlen if not just doing
21092 out = result, initialized with the start address
21093 align_rtx = alignment of the address.
21094 scratch = scratch register, initialized with the startaddress when
21095 not aligned, otherwise undefined
21097 This is just the body. It needs the initializations mentioned above and
21098 some address computing at the end. These things are done in i386.md. */
21101 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21105 rtx align_2_label = NULL_RTX;
21106 rtx align_3_label = NULL_RTX;
21107 rtx align_4_label = gen_label_rtx ();
21108 rtx end_0_label = gen_label_rtx ();
21110 rtx tmpreg = gen_reg_rtx (SImode);
21111 rtx scratch = gen_reg_rtx (SImode);
21115 if (CONST_INT_P (align_rtx))
21116 align = INTVAL (align_rtx);
21118 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21120 /* Is there a known alignment and is it less than 4? */
21123 rtx scratch1 = gen_reg_rtx (Pmode);
21124 emit_move_insn (scratch1, out);
21125 /* Is there a known alignment and is it not 2? */
21128 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21129 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21131 /* Leave just the 3 lower bits. */
21132 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21133 NULL_RTX, 0, OPTAB_WIDEN);
21135 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21136 Pmode, 1, align_4_label);
21137 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21138 Pmode, 1, align_2_label);
21139 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21140 Pmode, 1, align_3_label);
21144 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21145 check if is aligned to 4 - byte. */
21147 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21148 NULL_RTX, 0, OPTAB_WIDEN);
21150 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21151 Pmode, 1, align_4_label);
21154 mem = change_address (src, QImode, out);
21156 /* Now compare the bytes. */
21158 /* Compare the first n unaligned byte on a byte per byte basis. */
21159 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21160 QImode, 1, end_0_label);
21162 /* Increment the address. */
21163 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21165 /* Not needed with an alignment of 2 */
21168 emit_label (align_2_label);
21170 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21173 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21175 emit_label (align_3_label);
21178 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21181 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21184 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21185 align this loop. It gives only huge programs, but does not help to
21187 emit_label (align_4_label);
21189 mem = change_address (src, SImode, out);
21190 emit_move_insn (scratch, mem);
21191 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21193 /* This formula yields a nonzero result iff one of the bytes is zero.
21194 This saves three branches inside loop and many cycles. */
21196 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21197 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21198 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21199 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21200 gen_int_mode (0x80808080, SImode)));
21201 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21206 rtx reg = gen_reg_rtx (SImode);
21207 rtx reg2 = gen_reg_rtx (Pmode);
21208 emit_move_insn (reg, tmpreg);
21209 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21211 /* If zero is not in the first two bytes, move two bytes forward. */
21212 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21213 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21214 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21215 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21216 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21219 /* Emit lea manually to avoid clobbering of flags. */
21220 emit_insn (gen_rtx_SET (SImode, reg2,
21221 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21223 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21224 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21225 emit_insn (gen_rtx_SET (VOIDmode, out,
21226 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21232 rtx end_2_label = gen_label_rtx ();
21233 /* Is zero in the first two bytes? */
21235 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21236 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21237 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21238 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21239 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21241 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21242 JUMP_LABEL (tmp) = end_2_label;
21244 /* Not in the first two. Move two bytes forward. */
21245 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21246 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21248 emit_label (end_2_label);
21252 /* Avoid branch in fixing the byte. */
21253 tmpreg = gen_lowpart (QImode, tmpreg);
21254 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21255 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21256 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21257 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21259 emit_label (end_0_label);
21262 /* Expand strlen. */
21265 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21267 rtx addr, scratch1, scratch2, scratch3, scratch4;
21269 /* The generic case of strlen expander is long. Avoid it's
21270 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21272 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21273 && !TARGET_INLINE_ALL_STRINGOPS
21274 && !optimize_insn_for_size_p ()
21275 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21278 addr = force_reg (Pmode, XEXP (src, 0));
21279 scratch1 = gen_reg_rtx (Pmode);
21281 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21282 && !optimize_insn_for_size_p ())
21284 /* Well it seems that some optimizer does not combine a call like
21285 foo(strlen(bar), strlen(bar));
21286 when the move and the subtraction is done here. It does calculate
21287 the length just once when these instructions are done inside of
21288 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21289 often used and I use one fewer register for the lifetime of
21290 output_strlen_unroll() this is better. */
21292 emit_move_insn (out, addr);
21294 ix86_expand_strlensi_unroll_1 (out, src, align);
21296 /* strlensi_unroll_1 returns the address of the zero at the end of
21297 the string, like memchr(), so compute the length by subtracting
21298 the start address. */
21299 emit_insn (ix86_gen_sub3 (out, out, addr));
21305 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21306 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21309 scratch2 = gen_reg_rtx (Pmode);
21310 scratch3 = gen_reg_rtx (Pmode);
21311 scratch4 = force_reg (Pmode, constm1_rtx);
21313 emit_move_insn (scratch3, addr);
21314 eoschar = force_reg (QImode, eoschar);
21316 src = replace_equiv_address_nv (src, scratch3);
21318 /* If .md starts supporting :P, this can be done in .md. */
21319 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21320 scratch4), UNSPEC_SCAS);
21321 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21322 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21323 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21328 /* For given symbol (function) construct code to compute address of it's PLT
21329 entry in large x86-64 PIC model. */
21331 construct_plt_address (rtx symbol)
21333 rtx tmp = gen_reg_rtx (Pmode);
21334 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21336 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21337 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21339 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21340 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21345 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21347 rtx pop, bool sibcall)
21349 rtx use = NULL, call;
21351 if (pop == const0_rtx)
21353 gcc_assert (!TARGET_64BIT || !pop);
21355 if (TARGET_MACHO && !TARGET_64BIT)
21358 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21359 fnaddr = machopic_indirect_call_target (fnaddr);
21364 /* Static functions and indirect calls don't need the pic register. */
21365 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21366 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21367 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21368 use_reg (&use, pic_offset_table_rtx);
21371 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21373 rtx al = gen_rtx_REG (QImode, AX_REG);
21374 emit_move_insn (al, callarg2);
21375 use_reg (&use, al);
21378 if (ix86_cmodel == CM_LARGE_PIC
21380 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21381 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21382 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21384 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21385 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21387 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
21388 fnaddr = gen_rtx_MEM (QImode, fnaddr);
21391 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21393 call = gen_rtx_SET (VOIDmode, retval, call);
21396 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21397 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21398 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
21400 if (TARGET_64BIT_MS_ABI
21401 && (!callarg2 || INTVAL (callarg2) != -2))
21403 /* We need to represent that SI and DI registers are clobbered
21405 static int clobbered_registers[] = {
21406 XMM6_REG, XMM7_REG, XMM8_REG,
21407 XMM9_REG, XMM10_REG, XMM11_REG,
21408 XMM12_REG, XMM13_REG, XMM14_REG,
21409 XMM15_REG, SI_REG, DI_REG
21412 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
21413 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
21414 UNSPEC_MS_TO_SYSV_CALL);
21418 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
21419 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
21422 (SSE_REGNO_P (clobbered_registers[i])
21424 clobbered_registers[i]));
21426 call = gen_rtx_PARALLEL (VOIDmode,
21427 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
21431 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
21432 if (TARGET_VZEROUPPER)
21437 if (cfun->machine->callee_pass_avx256_p)
21439 if (cfun->machine->callee_return_avx256_p)
21440 avx256 = callee_return_pass_avx256;
21442 avx256 = callee_pass_avx256;
21444 else if (cfun->machine->callee_return_avx256_p)
21445 avx256 = callee_return_avx256;
21447 avx256 = call_no_avx256;
21449 if (reload_completed)
21450 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
21453 unspec = gen_rtx_UNSPEC (VOIDmode,
21454 gen_rtvec (1, GEN_INT (avx256)),
21455 UNSPEC_CALL_NEEDS_VZEROUPPER);
21456 call = gen_rtx_PARALLEL (VOIDmode,
21457 gen_rtvec (2, call, unspec));
21461 call = emit_call_insn (call);
21463 CALL_INSN_FUNCTION_USAGE (call) = use;
21469 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
21471 rtx call = XVECEXP (PATTERN (insn), 0, 0);
21472 emit_insn (gen_avx_vzeroupper (vzeroupper));
21473 emit_call_insn (call);
21476 /* Output the assembly for a call instruction. */
21479 ix86_output_call_insn (rtx insn, rtx call_op)
21481 bool direct_p = constant_call_address_operand (call_op, Pmode);
21482 bool seh_nop_p = false;
21485 if (SIBLING_CALL_P (insn))
21489 /* SEH epilogue detection requires the indirect branch case
21490 to include REX.W. */
21491 else if (TARGET_SEH)
21492 xasm = "rex.W jmp %A0";
21496 output_asm_insn (xasm, &call_op);
21500 /* SEH unwinding can require an extra nop to be emitted in several
21501 circumstances. Determine if we have one of those. */
21506 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
21508 /* If we get to another real insn, we don't need the nop. */
21512 /* If we get to the epilogue note, prevent a catch region from
21513 being adjacent to the standard epilogue sequence. If non-
21514 call-exceptions, we'll have done this during epilogue emission. */
21515 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
21516 && !flag_non_call_exceptions
21517 && !can_throw_internal (insn))
21524 /* If we didn't find a real insn following the call, prevent the
21525 unwinder from looking into the next function. */
21531 xasm = "call\t%P0";
21533 xasm = "call\t%A0";
21535 output_asm_insn (xasm, &call_op);
21543 /* Clear stack slot assignments remembered from previous functions.
21544 This is called from INIT_EXPANDERS once before RTL is emitted for each
21547 static struct machine_function *
21548 ix86_init_machine_status (void)
21550 struct machine_function *f;
21552 f = ggc_alloc_cleared_machine_function ();
21553 f->use_fast_prologue_epilogue_nregs = -1;
21554 f->tls_descriptor_call_expanded_p = 0;
21555 f->call_abi = ix86_abi;
21560 /* Return a MEM corresponding to a stack slot with mode MODE.
21561 Allocate a new slot if necessary.
21563 The RTL for a function can have several slots available: N is
21564 which slot to use. */
21567 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
21569 struct stack_local_entry *s;
21571 gcc_assert (n < MAX_386_STACK_LOCALS);
21573 /* Virtual slot is valid only before vregs are instantiated. */
21574 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
21576 for (s = ix86_stack_locals; s; s = s->next)
21577 if (s->mode == mode && s->n == n)
21578 return copy_rtx (s->rtl);
21580 s = ggc_alloc_stack_local_entry ();
21583 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21585 s->next = ix86_stack_locals;
21586 ix86_stack_locals = s;
21590 /* Calculate the length of the memory address in the instruction
21591 encoding. Does not include the one-byte modrm, opcode, or prefix. */
21594 memory_address_length (rtx addr)
21596 struct ix86_address parts;
21597 rtx base, index, disp;
21601 if (GET_CODE (addr) == PRE_DEC
21602 || GET_CODE (addr) == POST_INC
21603 || GET_CODE (addr) == PRE_MODIFY
21604 || GET_CODE (addr) == POST_MODIFY)
21607 ok = ix86_decompose_address (addr, &parts);
21610 if (parts.base && GET_CODE (parts.base) == SUBREG)
21611 parts.base = SUBREG_REG (parts.base);
21612 if (parts.index && GET_CODE (parts.index) == SUBREG)
21613 parts.index = SUBREG_REG (parts.index);
21616 index = parts.index;
21621 - esp as the base always wants an index,
21622 - ebp as the base always wants a displacement,
21623 - r12 as the base always wants an index,
21624 - r13 as the base always wants a displacement. */
21626 /* Register Indirect. */
21627 if (base && !index && !disp)
21629 /* esp (for its index) and ebp (for its displacement) need
21630 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
21633 && (addr == arg_pointer_rtx
21634 || addr == frame_pointer_rtx
21635 || REGNO (addr) == SP_REG
21636 || REGNO (addr) == BP_REG
21637 || REGNO (addr) == R12_REG
21638 || REGNO (addr) == R13_REG))
21642 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21643 is not disp32, but disp32(%rip), so for disp32
21644 SIB byte is needed, unless print_operand_address
21645 optimizes it into disp32(%rip) or (%rip) is implied
21647 else if (disp && !base && !index)
21654 if (GET_CODE (disp) == CONST)
21655 symbol = XEXP (disp, 0);
21656 if (GET_CODE (symbol) == PLUS
21657 && CONST_INT_P (XEXP (symbol, 1)))
21658 symbol = XEXP (symbol, 0);
21660 if (GET_CODE (symbol) != LABEL_REF
21661 && (GET_CODE (symbol) != SYMBOL_REF
21662 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
21663 && (GET_CODE (symbol) != UNSPEC
21664 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
21665 && XINT (symbol, 1) != UNSPEC_PCREL
21666 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
21673 /* Find the length of the displacement constant. */
21676 if (base && satisfies_constraint_K (disp))
21681 /* ebp always wants a displacement. Similarly r13. */
21682 else if (base && REG_P (base)
21683 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
21686 /* An index requires the two-byte modrm form.... */
21688 /* ...like esp (or r12), which always wants an index. */
21689 || base == arg_pointer_rtx
21690 || base == frame_pointer_rtx
21691 || (base && REG_P (base)
21692 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
21709 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21710 is set, expect that insn have 8bit immediate alternative. */
21712 ix86_attr_length_immediate_default (rtx insn, bool shortform)
21716 extract_insn_cached (insn);
21717 for (i = recog_data.n_operands - 1; i >= 0; --i)
21718 if (CONSTANT_P (recog_data.operand[i]))
21720 enum attr_mode mode = get_attr_mode (insn);
21723 if (shortform && CONST_INT_P (recog_data.operand[i]))
21725 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21732 ival = trunc_int_for_mode (ival, HImode);
21735 ival = trunc_int_for_mode (ival, SImode);
21740 if (IN_RANGE (ival, -128, 127))
21757 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21762 fatal_insn ("unknown insn mode", insn);
21767 /* Compute default value for "length_address" attribute. */
21769 ix86_attr_length_address_default (rtx insn)
21773 if (get_attr_type (insn) == TYPE_LEA)
21775 rtx set = PATTERN (insn), addr;
21777 if (GET_CODE (set) == PARALLEL)
21778 set = XVECEXP (set, 0, 0);
21780 gcc_assert (GET_CODE (set) == SET);
21782 addr = SET_SRC (set);
21783 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
21785 if (GET_CODE (addr) == ZERO_EXTEND)
21786 addr = XEXP (addr, 0);
21787 if (GET_CODE (addr) == SUBREG)
21788 addr = SUBREG_REG (addr);
21791 return memory_address_length (addr);
21794 extract_insn_cached (insn);
21795 for (i = recog_data.n_operands - 1; i >= 0; --i)
21796 if (MEM_P (recog_data.operand[i]))
21798 constrain_operands_cached (reload_completed);
21799 if (which_alternative != -1)
21801 const char *constraints = recog_data.constraints[i];
21802 int alt = which_alternative;
21804 while (*constraints == '=' || *constraints == '+')
21807 while (*constraints++ != ',')
21809 /* Skip ignored operands. */
21810 if (*constraints == 'X')
21813 return memory_address_length (XEXP (recog_data.operand[i], 0));
21818 /* Compute default value for "length_vex" attribute. It includes
21819 2 or 3 byte VEX prefix and 1 opcode byte. */
21822 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
21826 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
21827 byte VEX prefix. */
21828 if (!has_0f_opcode || has_vex_w)
21831 /* We can always use 2 byte VEX prefix in 32bit. */
21835 extract_insn_cached (insn);
21837 for (i = recog_data.n_operands - 1; i >= 0; --i)
21838 if (REG_P (recog_data.operand[i]))
21840 /* REX.W bit uses 3 byte VEX prefix. */
21841 if (GET_MODE (recog_data.operand[i]) == DImode
21842 && GENERAL_REG_P (recog_data.operand[i]))
21847 /* REX.X or REX.B bits use 3 byte VEX prefix. */
21848 if (MEM_P (recog_data.operand[i])
21849 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
21856 /* Return the maximum number of instructions a cpu can issue. */
21859 ix86_issue_rate (void)
21863 case PROCESSOR_PENTIUM:
21864 case PROCESSOR_ATOM:
21868 case PROCESSOR_PENTIUMPRO:
21869 case PROCESSOR_PENTIUM4:
21870 case PROCESSOR_CORE2_32:
21871 case PROCESSOR_CORE2_64:
21872 case PROCESSOR_COREI7_32:
21873 case PROCESSOR_COREI7_64:
21874 case PROCESSOR_ATHLON:
21876 case PROCESSOR_AMDFAM10:
21877 case PROCESSOR_NOCONA:
21878 case PROCESSOR_GENERIC32:
21879 case PROCESSOR_GENERIC64:
21880 case PROCESSOR_BDVER1:
21881 case PROCESSOR_BTVER1:
21889 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
21890 by DEP_INSN and nothing set by DEP_INSN. */
21893 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
21897 /* Simplify the test for uninteresting insns. */
21898 if (insn_type != TYPE_SETCC
21899 && insn_type != TYPE_ICMOV
21900 && insn_type != TYPE_FCMOV
21901 && insn_type != TYPE_IBR)
21904 if ((set = single_set (dep_insn)) != 0)
21906 set = SET_DEST (set);
21909 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
21910 && XVECLEN (PATTERN (dep_insn), 0) == 2
21911 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
21912 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
21914 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21915 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21920 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
21923 /* This test is true if the dependent insn reads the flags but
21924 not any other potentially set register. */
21925 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
21928 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
21934 /* Return true iff USE_INSN has a memory address with operands set by
21938 ix86_agi_dependent (rtx set_insn, rtx use_insn)
21941 extract_insn_cached (use_insn);
21942 for (i = recog_data.n_operands - 1; i >= 0; --i)
21943 if (MEM_P (recog_data.operand[i]))
21945 rtx addr = XEXP (recog_data.operand[i], 0);
21946 return modified_in_p (addr, set_insn) != 0;
21952 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
21954 enum attr_type insn_type, dep_insn_type;
21955 enum attr_memory memory;
21957 int dep_insn_code_number;
21959 /* Anti and output dependencies have zero cost on all CPUs. */
21960 if (REG_NOTE_KIND (link) != 0)
21963 dep_insn_code_number = recog_memoized (dep_insn);
21965 /* If we can't recognize the insns, we can't really do anything. */
21966 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
21969 insn_type = get_attr_type (insn);
21970 dep_insn_type = get_attr_type (dep_insn);
21974 case PROCESSOR_PENTIUM:
21975 /* Address Generation Interlock adds a cycle of latency. */
21976 if (insn_type == TYPE_LEA)
21978 rtx addr = PATTERN (insn);
21980 if (GET_CODE (addr) == PARALLEL)
21981 addr = XVECEXP (addr, 0, 0);
21983 gcc_assert (GET_CODE (addr) == SET);
21985 addr = SET_SRC (addr);
21986 if (modified_in_p (addr, dep_insn))
21989 else if (ix86_agi_dependent (dep_insn, insn))
21992 /* ??? Compares pair with jump/setcc. */
21993 if (ix86_flags_dependent (insn, dep_insn, insn_type))
21996 /* Floating point stores require value to be ready one cycle earlier. */
21997 if (insn_type == TYPE_FMOV
21998 && get_attr_memory (insn) == MEMORY_STORE
21999 && !ix86_agi_dependent (dep_insn, insn))
22003 case PROCESSOR_PENTIUMPRO:
22004 memory = get_attr_memory (insn);
22006 /* INT->FP conversion is expensive. */
22007 if (get_attr_fp_int_src (dep_insn))
22010 /* There is one cycle extra latency between an FP op and a store. */
22011 if (insn_type == TYPE_FMOV
22012 && (set = single_set (dep_insn)) != NULL_RTX
22013 && (set2 = single_set (insn)) != NULL_RTX
22014 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22015 && MEM_P (SET_DEST (set2)))
22018 /* Show ability of reorder buffer to hide latency of load by executing
22019 in parallel with previous instruction in case
22020 previous instruction is not needed to compute the address. */
22021 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22022 && !ix86_agi_dependent (dep_insn, insn))
22024 /* Claim moves to take one cycle, as core can issue one load
22025 at time and the next load can start cycle later. */
22026 if (dep_insn_type == TYPE_IMOV
22027 || dep_insn_type == TYPE_FMOV)
22035 memory = get_attr_memory (insn);
22037 /* The esp dependency is resolved before the instruction is really
22039 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22040 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22043 /* INT->FP conversion is expensive. */
22044 if (get_attr_fp_int_src (dep_insn))
22047 /* Show ability of reorder buffer to hide latency of load by executing
22048 in parallel with previous instruction in case
22049 previous instruction is not needed to compute the address. */
22050 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22051 && !ix86_agi_dependent (dep_insn, insn))
22053 /* Claim moves to take one cycle, as core can issue one load
22054 at time and the next load can start cycle later. */
22055 if (dep_insn_type == TYPE_IMOV
22056 || dep_insn_type == TYPE_FMOV)
22065 case PROCESSOR_ATHLON:
22067 case PROCESSOR_AMDFAM10:
22068 case PROCESSOR_BDVER1:
22069 case PROCESSOR_BTVER1:
22070 case PROCESSOR_ATOM:
22071 case PROCESSOR_GENERIC32:
22072 case PROCESSOR_GENERIC64:
22073 memory = get_attr_memory (insn);
22075 /* Show ability of reorder buffer to hide latency of load by executing
22076 in parallel with previous instruction in case
22077 previous instruction is not needed to compute the address. */
22078 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22079 && !ix86_agi_dependent (dep_insn, insn))
22081 enum attr_unit unit = get_attr_unit (insn);
22084 /* Because of the difference between the length of integer and
22085 floating unit pipeline preparation stages, the memory operands
22086 for floating point are cheaper.
22088 ??? For Athlon it the difference is most probably 2. */
22089 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22092 loadcost = TARGET_ATHLON ? 2 : 0;
22094 if (cost >= loadcost)
22107 /* How many alternative schedules to try. This should be as wide as the
22108 scheduling freedom in the DFA, but no wider. Making this value too
22109 large results extra work for the scheduler. */
22112 ia32_multipass_dfa_lookahead (void)
22116 case PROCESSOR_PENTIUM:
22119 case PROCESSOR_PENTIUMPRO:
22123 case PROCESSOR_CORE2_32:
22124 case PROCESSOR_CORE2_64:
22125 case PROCESSOR_COREI7_32:
22126 case PROCESSOR_COREI7_64:
22127 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22128 as many instructions can be executed on a cycle, i.e.,
22129 issue_rate. I wonder why tuning for many CPUs does not do this. */
22130 return ix86_issue_rate ();
22139 /* Model decoder of Core 2/i7.
22140 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22141 track the instruction fetch block boundaries and make sure that long
22142 (9+ bytes) instructions are assigned to D0. */
22144 /* Maximum length of an insn that can be handled by
22145 a secondary decoder unit. '8' for Core 2/i7. */
22146 static int core2i7_secondary_decoder_max_insn_size;
22148 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22149 '16' for Core 2/i7. */
22150 static int core2i7_ifetch_block_size;
22152 /* Maximum number of instructions decoder can handle per cycle.
22153 '6' for Core 2/i7. */
22154 static int core2i7_ifetch_block_max_insns;
22156 typedef struct ix86_first_cycle_multipass_data_ *
22157 ix86_first_cycle_multipass_data_t;
22158 typedef const struct ix86_first_cycle_multipass_data_ *
22159 const_ix86_first_cycle_multipass_data_t;
22161 /* A variable to store target state across calls to max_issue within
22163 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22164 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22166 /* Initialize DATA. */
22168 core2i7_first_cycle_multipass_init (void *_data)
22170 ix86_first_cycle_multipass_data_t data
22171 = (ix86_first_cycle_multipass_data_t) _data;
22173 data->ifetch_block_len = 0;
22174 data->ifetch_block_n_insns = 0;
22175 data->ready_try_change = NULL;
22176 data->ready_try_change_size = 0;
22179 /* Advancing the cycle; reset ifetch block counts. */
22181 core2i7_dfa_post_advance_cycle (void)
22183 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22185 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22187 data->ifetch_block_len = 0;
22188 data->ifetch_block_n_insns = 0;
22191 static int min_insn_size (rtx);
22193 /* Filter out insns from ready_try that the core will not be able to issue
22194 on current cycle due to decoder. */
22196 core2i7_first_cycle_multipass_filter_ready_try
22197 (const_ix86_first_cycle_multipass_data_t data,
22198 char *ready_try, int n_ready, bool first_cycle_insn_p)
22205 if (ready_try[n_ready])
22208 insn = get_ready_element (n_ready);
22209 insn_size = min_insn_size (insn);
22211 if (/* If this is a too long an insn for a secondary decoder ... */
22212 (!first_cycle_insn_p
22213 && insn_size > core2i7_secondary_decoder_max_insn_size)
22214 /* ... or it would not fit into the ifetch block ... */
22215 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22216 /* ... or the decoder is full already ... */
22217 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22218 /* ... mask the insn out. */
22220 ready_try[n_ready] = 1;
22222 if (data->ready_try_change)
22223 SET_BIT (data->ready_try_change, n_ready);
22228 /* Prepare for a new round of multipass lookahead scheduling. */
22230 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22231 bool first_cycle_insn_p)
22233 ix86_first_cycle_multipass_data_t data
22234 = (ix86_first_cycle_multipass_data_t) _data;
22235 const_ix86_first_cycle_multipass_data_t prev_data
22236 = ix86_first_cycle_multipass_data;
22238 /* Restore the state from the end of the previous round. */
22239 data->ifetch_block_len = prev_data->ifetch_block_len;
22240 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22242 /* Filter instructions that cannot be issued on current cycle due to
22243 decoder restrictions. */
22244 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22245 first_cycle_insn_p);
22248 /* INSN is being issued in current solution. Account for its impact on
22249 the decoder model. */
22251 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22252 rtx insn, const void *_prev_data)
22254 ix86_first_cycle_multipass_data_t data
22255 = (ix86_first_cycle_multipass_data_t) _data;
22256 const_ix86_first_cycle_multipass_data_t prev_data
22257 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22259 int insn_size = min_insn_size (insn);
22261 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22262 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22263 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22264 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22266 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22267 if (!data->ready_try_change)
22269 data->ready_try_change = sbitmap_alloc (n_ready);
22270 data->ready_try_change_size = n_ready;
22272 else if (data->ready_try_change_size < n_ready)
22274 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22276 data->ready_try_change_size = n_ready;
22278 sbitmap_zero (data->ready_try_change);
22280 /* Filter out insns from ready_try that the core will not be able to issue
22281 on current cycle due to decoder. */
22282 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22286 /* Revert the effect on ready_try. */
22288 core2i7_first_cycle_multipass_backtrack (const void *_data,
22290 int n_ready ATTRIBUTE_UNUSED)
22292 const_ix86_first_cycle_multipass_data_t data
22293 = (const_ix86_first_cycle_multipass_data_t) _data;
22294 unsigned int i = 0;
22295 sbitmap_iterator sbi;
22297 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22298 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22304 /* Save the result of multipass lookahead scheduling for the next round. */
22306 core2i7_first_cycle_multipass_end (const void *_data)
22308 const_ix86_first_cycle_multipass_data_t data
22309 = (const_ix86_first_cycle_multipass_data_t) _data;
22310 ix86_first_cycle_multipass_data_t next_data
22311 = ix86_first_cycle_multipass_data;
22315 next_data->ifetch_block_len = data->ifetch_block_len;
22316 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22320 /* Deallocate target data. */
22322 core2i7_first_cycle_multipass_fini (void *_data)
22324 ix86_first_cycle_multipass_data_t data
22325 = (ix86_first_cycle_multipass_data_t) _data;
22327 if (data->ready_try_change)
22329 sbitmap_free (data->ready_try_change);
22330 data->ready_try_change = NULL;
22331 data->ready_try_change_size = 0;
22335 /* Prepare for scheduling pass. */
22337 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22338 int verbose ATTRIBUTE_UNUSED,
22339 int max_uid ATTRIBUTE_UNUSED)
22341 /* Install scheduling hooks for current CPU. Some of these hooks are used
22342 in time-critical parts of the scheduler, so we only set them up when
22343 they are actually used. */
22346 case PROCESSOR_CORE2_32:
22347 case PROCESSOR_CORE2_64:
22348 case PROCESSOR_COREI7_32:
22349 case PROCESSOR_COREI7_64:
22350 targetm.sched.dfa_post_advance_cycle
22351 = core2i7_dfa_post_advance_cycle;
22352 targetm.sched.first_cycle_multipass_init
22353 = core2i7_first_cycle_multipass_init;
22354 targetm.sched.first_cycle_multipass_begin
22355 = core2i7_first_cycle_multipass_begin;
22356 targetm.sched.first_cycle_multipass_issue
22357 = core2i7_first_cycle_multipass_issue;
22358 targetm.sched.first_cycle_multipass_backtrack
22359 = core2i7_first_cycle_multipass_backtrack;
22360 targetm.sched.first_cycle_multipass_end
22361 = core2i7_first_cycle_multipass_end;
22362 targetm.sched.first_cycle_multipass_fini
22363 = core2i7_first_cycle_multipass_fini;
22365 /* Set decoder parameters. */
22366 core2i7_secondary_decoder_max_insn_size = 8;
22367 core2i7_ifetch_block_size = 16;
22368 core2i7_ifetch_block_max_insns = 6;
22372 targetm.sched.dfa_post_advance_cycle = NULL;
22373 targetm.sched.first_cycle_multipass_init = NULL;
22374 targetm.sched.first_cycle_multipass_begin = NULL;
22375 targetm.sched.first_cycle_multipass_issue = NULL;
22376 targetm.sched.first_cycle_multipass_backtrack = NULL;
22377 targetm.sched.first_cycle_multipass_end = NULL;
22378 targetm.sched.first_cycle_multipass_fini = NULL;
22384 /* Compute the alignment given to a constant that is being placed in memory.
22385 EXP is the constant and ALIGN is the alignment that the object would
22387 The value of this function is used instead of that alignment to align
22391 ix86_constant_alignment (tree exp, int align)
22393 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
22394 || TREE_CODE (exp) == INTEGER_CST)
22396 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
22398 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
22401 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
22402 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
22403 return BITS_PER_WORD;
22408 /* Compute the alignment for a static variable.
22409 TYPE is the data type, and ALIGN is the alignment that
22410 the object would ordinarily have. The value of this function is used
22411 instead of that alignment to align the object. */
22414 ix86_data_alignment (tree type, int align)
22416 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
22418 if (AGGREGATE_TYPE_P (type)
22419 && TYPE_SIZE (type)
22420 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22421 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
22422 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
22423 && align < max_align)
22426 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22427 to 16byte boundary. */
22430 if (AGGREGATE_TYPE_P (type)
22431 && TYPE_SIZE (type)
22432 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22433 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
22434 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22438 if (TREE_CODE (type) == ARRAY_TYPE)
22440 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22442 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22445 else if (TREE_CODE (type) == COMPLEX_TYPE)
22448 if (TYPE_MODE (type) == DCmode && align < 64)
22450 if ((TYPE_MODE (type) == XCmode
22451 || TYPE_MODE (type) == TCmode) && align < 128)
22454 else if ((TREE_CODE (type) == RECORD_TYPE
22455 || TREE_CODE (type) == UNION_TYPE
22456 || TREE_CODE (type) == QUAL_UNION_TYPE)
22457 && TYPE_FIELDS (type))
22459 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22461 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22464 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22465 || TREE_CODE (type) == INTEGER_TYPE)
22467 if (TYPE_MODE (type) == DFmode && align < 64)
22469 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22476 /* Compute the alignment for a local variable or a stack slot. EXP is
22477 the data type or decl itself, MODE is the widest mode available and
22478 ALIGN is the alignment that the object would ordinarily have. The
22479 value of this macro is used instead of that alignment to align the
22483 ix86_local_alignment (tree exp, enum machine_mode mode,
22484 unsigned int align)
22488 if (exp && DECL_P (exp))
22490 type = TREE_TYPE (exp);
22499 /* Don't do dynamic stack realignment for long long objects with
22500 -mpreferred-stack-boundary=2. */
22503 && ix86_preferred_stack_boundary < 64
22504 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
22505 && (!type || !TYPE_USER_ALIGN (type))
22506 && (!decl || !DECL_USER_ALIGN (decl)))
22509 /* If TYPE is NULL, we are allocating a stack slot for caller-save
22510 register in MODE. We will return the largest alignment of XF
22514 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
22515 align = GET_MODE_ALIGNMENT (DFmode);
22519 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22520 to 16byte boundary. Exact wording is:
22522 An array uses the same alignment as its elements, except that a local or
22523 global array variable of length at least 16 bytes or
22524 a C99 variable-length array variable always has alignment of at least 16 bytes.
22526 This was added to allow use of aligned SSE instructions at arrays. This
22527 rule is meant for static storage (where compiler can not do the analysis
22528 by itself). We follow it for automatic variables only when convenient.
22529 We fully control everything in the function compiled and functions from
22530 other unit can not rely on the alignment.
22532 Exclude va_list type. It is the common case of local array where
22533 we can not benefit from the alignment. */
22534 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
22537 if (AGGREGATE_TYPE_P (type)
22538 && (va_list_type_node == NULL_TREE
22539 || (TYPE_MAIN_VARIANT (type)
22540 != TYPE_MAIN_VARIANT (va_list_type_node)))
22541 && TYPE_SIZE (type)
22542 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22543 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
22544 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22547 if (TREE_CODE (type) == ARRAY_TYPE)
22549 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22551 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22554 else if (TREE_CODE (type) == COMPLEX_TYPE)
22556 if (TYPE_MODE (type) == DCmode && align < 64)
22558 if ((TYPE_MODE (type) == XCmode
22559 || TYPE_MODE (type) == TCmode) && align < 128)
22562 else if ((TREE_CODE (type) == RECORD_TYPE
22563 || TREE_CODE (type) == UNION_TYPE
22564 || TREE_CODE (type) == QUAL_UNION_TYPE)
22565 && TYPE_FIELDS (type))
22567 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22569 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22572 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22573 || TREE_CODE (type) == INTEGER_TYPE)
22576 if (TYPE_MODE (type) == DFmode && align < 64)
22578 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22584 /* Compute the minimum required alignment for dynamic stack realignment
22585 purposes for a local variable, parameter or a stack slot. EXP is
22586 the data type or decl itself, MODE is its mode and ALIGN is the
22587 alignment that the object would ordinarily have. */
22590 ix86_minimum_alignment (tree exp, enum machine_mode mode,
22591 unsigned int align)
22595 if (exp && DECL_P (exp))
22597 type = TREE_TYPE (exp);
22606 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
22609 /* Don't do dynamic stack realignment for long long objects with
22610 -mpreferred-stack-boundary=2. */
22611 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
22612 && (!type || !TYPE_USER_ALIGN (type))
22613 && (!decl || !DECL_USER_ALIGN (decl)))
22619 /* Find a location for the static chain incoming to a nested function.
22620 This is a register, unless all free registers are used by arguments. */
22623 ix86_static_chain (const_tree fndecl, bool incoming_p)
22627 if (!DECL_STATIC_CHAIN (fndecl))
22632 /* We always use R10 in 64-bit mode. */
22640 /* By default in 32-bit mode we use ECX to pass the static chain. */
22643 fntype = TREE_TYPE (fndecl);
22644 ccvt = ix86_get_callcvt (fntype);
22645 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
22647 /* Fastcall functions use ecx/edx for arguments, which leaves
22648 us with EAX for the static chain.
22649 Thiscall functions use ecx for arguments, which also
22650 leaves us with EAX for the static chain. */
22653 else if (ix86_function_regparm (fntype, fndecl) == 3)
22655 /* For regparm 3, we have no free call-clobbered registers in
22656 which to store the static chain. In order to implement this,
22657 we have the trampoline push the static chain to the stack.
22658 However, we can't push a value below the return address when
22659 we call the nested function directly, so we have to use an
22660 alternate entry point. For this we use ESI, and have the
22661 alternate entry point push ESI, so that things appear the
22662 same once we're executing the nested function. */
22665 if (fndecl == current_function_decl)
22666 ix86_static_chain_on_stack = true;
22667 return gen_frame_mem (SImode,
22668 plus_constant (arg_pointer_rtx, -8));
22674 return gen_rtx_REG (Pmode, regno);
22677 /* Emit RTL insns to initialize the variable parts of a trampoline.
22678 FNDECL is the decl of the target address; M_TRAMP is a MEM for
22679 the trampoline, and CHAIN_VALUE is an RTX for the static chain
22680 to be passed to the target function. */
22683 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
22689 fnaddr = XEXP (DECL_RTL (fndecl), 0);
22695 /* Load the function address to r11. Try to load address using
22696 the shorter movl instead of movabs. We may want to support
22697 movq for kernel mode, but kernel does not use trampolines at
22699 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
22701 fnaddr = copy_to_mode_reg (DImode, fnaddr);
22703 mem = adjust_address (m_tramp, HImode, offset);
22704 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
22706 mem = adjust_address (m_tramp, SImode, offset + 2);
22707 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
22712 mem = adjust_address (m_tramp, HImode, offset);
22713 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
22715 mem = adjust_address (m_tramp, DImode, offset + 2);
22716 emit_move_insn (mem, fnaddr);
22720 /* Load static chain using movabs to r10. Use the
22721 shorter movl instead of movabs for x32. */
22733 mem = adjust_address (m_tramp, HImode, offset);
22734 emit_move_insn (mem, gen_int_mode (opcode, HImode));
22736 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
22737 emit_move_insn (mem, chain_value);
22740 /* Jump to r11; the last (unused) byte is a nop, only there to
22741 pad the write out to a single 32-bit store. */
22742 mem = adjust_address (m_tramp, SImode, offset);
22743 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
22750 /* Depending on the static chain location, either load a register
22751 with a constant, or push the constant to the stack. All of the
22752 instructions are the same size. */
22753 chain = ix86_static_chain (fndecl, true);
22756 switch (REGNO (chain))
22759 opcode = 0xb8; break;
22761 opcode = 0xb9; break;
22763 gcc_unreachable ();
22769 mem = adjust_address (m_tramp, QImode, offset);
22770 emit_move_insn (mem, gen_int_mode (opcode, QImode));
22772 mem = adjust_address (m_tramp, SImode, offset + 1);
22773 emit_move_insn (mem, chain_value);
22776 mem = adjust_address (m_tramp, QImode, offset);
22777 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
22779 mem = adjust_address (m_tramp, SImode, offset + 1);
22781 /* Compute offset from the end of the jmp to the target function.
22782 In the case in which the trampoline stores the static chain on
22783 the stack, we need to skip the first insn which pushes the
22784 (call-saved) register static chain; this push is 1 byte. */
22786 disp = expand_binop (SImode, sub_optab, fnaddr,
22787 plus_constant (XEXP (m_tramp, 0),
22788 offset - (MEM_P (chain) ? 1 : 0)),
22789 NULL_RTX, 1, OPTAB_DIRECT);
22790 emit_move_insn (mem, disp);
22793 gcc_assert (offset <= TRAMPOLINE_SIZE);
22795 #ifdef HAVE_ENABLE_EXECUTE_STACK
22796 #ifdef CHECK_EXECUTE_STACK_ENABLED
22797 if (CHECK_EXECUTE_STACK_ENABLED)
22799 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
22800 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
22804 /* The following file contains several enumerations and data structures
22805 built from the definitions in i386-builtin-types.def. */
22807 #include "i386-builtin-types.inc"
22809 /* Table for the ix86 builtin non-function types. */
22810 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
22812 /* Retrieve an element from the above table, building some of
22813 the types lazily. */
22816 ix86_get_builtin_type (enum ix86_builtin_type tcode)
22818 unsigned int index;
22821 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
22823 type = ix86_builtin_type_tab[(int) tcode];
22827 gcc_assert (tcode > IX86_BT_LAST_PRIM);
22828 if (tcode <= IX86_BT_LAST_VECT)
22830 enum machine_mode mode;
22832 index = tcode - IX86_BT_LAST_PRIM - 1;
22833 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
22834 mode = ix86_builtin_type_vect_mode[index];
22836 type = build_vector_type_for_mode (itype, mode);
22842 index = tcode - IX86_BT_LAST_VECT - 1;
22843 if (tcode <= IX86_BT_LAST_PTR)
22844 quals = TYPE_UNQUALIFIED;
22846 quals = TYPE_QUAL_CONST;
22848 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
22849 if (quals != TYPE_UNQUALIFIED)
22850 itype = build_qualified_type (itype, quals);
22852 type = build_pointer_type (itype);
22855 ix86_builtin_type_tab[(int) tcode] = type;
22859 /* Table for the ix86 builtin function types. */
22860 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
22862 /* Retrieve an element from the above table, building some of
22863 the types lazily. */
22866 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
22870 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
22872 type = ix86_builtin_func_type_tab[(int) tcode];
22876 if (tcode <= IX86_BT_LAST_FUNC)
22878 unsigned start = ix86_builtin_func_start[(int) tcode];
22879 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
22880 tree rtype, atype, args = void_list_node;
22883 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
22884 for (i = after - 1; i > start; --i)
22886 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
22887 args = tree_cons (NULL, atype, args);
22890 type = build_function_type (rtype, args);
22894 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
22895 enum ix86_builtin_func_type icode;
22897 icode = ix86_builtin_func_alias_base[index];
22898 type = ix86_get_builtin_func_type (icode);
22901 ix86_builtin_func_type_tab[(int) tcode] = type;
22906 /* Codes for all the SSE/MMX builtins. */
22909 IX86_BUILTIN_ADDPS,
22910 IX86_BUILTIN_ADDSS,
22911 IX86_BUILTIN_DIVPS,
22912 IX86_BUILTIN_DIVSS,
22913 IX86_BUILTIN_MULPS,
22914 IX86_BUILTIN_MULSS,
22915 IX86_BUILTIN_SUBPS,
22916 IX86_BUILTIN_SUBSS,
22918 IX86_BUILTIN_CMPEQPS,
22919 IX86_BUILTIN_CMPLTPS,
22920 IX86_BUILTIN_CMPLEPS,
22921 IX86_BUILTIN_CMPGTPS,
22922 IX86_BUILTIN_CMPGEPS,
22923 IX86_BUILTIN_CMPNEQPS,
22924 IX86_BUILTIN_CMPNLTPS,
22925 IX86_BUILTIN_CMPNLEPS,
22926 IX86_BUILTIN_CMPNGTPS,
22927 IX86_BUILTIN_CMPNGEPS,
22928 IX86_BUILTIN_CMPORDPS,
22929 IX86_BUILTIN_CMPUNORDPS,
22930 IX86_BUILTIN_CMPEQSS,
22931 IX86_BUILTIN_CMPLTSS,
22932 IX86_BUILTIN_CMPLESS,
22933 IX86_BUILTIN_CMPNEQSS,
22934 IX86_BUILTIN_CMPNLTSS,
22935 IX86_BUILTIN_CMPNLESS,
22936 IX86_BUILTIN_CMPNGTSS,
22937 IX86_BUILTIN_CMPNGESS,
22938 IX86_BUILTIN_CMPORDSS,
22939 IX86_BUILTIN_CMPUNORDSS,
22941 IX86_BUILTIN_COMIEQSS,
22942 IX86_BUILTIN_COMILTSS,
22943 IX86_BUILTIN_COMILESS,
22944 IX86_BUILTIN_COMIGTSS,
22945 IX86_BUILTIN_COMIGESS,
22946 IX86_BUILTIN_COMINEQSS,
22947 IX86_BUILTIN_UCOMIEQSS,
22948 IX86_BUILTIN_UCOMILTSS,
22949 IX86_BUILTIN_UCOMILESS,
22950 IX86_BUILTIN_UCOMIGTSS,
22951 IX86_BUILTIN_UCOMIGESS,
22952 IX86_BUILTIN_UCOMINEQSS,
22954 IX86_BUILTIN_CVTPI2PS,
22955 IX86_BUILTIN_CVTPS2PI,
22956 IX86_BUILTIN_CVTSI2SS,
22957 IX86_BUILTIN_CVTSI642SS,
22958 IX86_BUILTIN_CVTSS2SI,
22959 IX86_BUILTIN_CVTSS2SI64,
22960 IX86_BUILTIN_CVTTPS2PI,
22961 IX86_BUILTIN_CVTTSS2SI,
22962 IX86_BUILTIN_CVTTSS2SI64,
22964 IX86_BUILTIN_MAXPS,
22965 IX86_BUILTIN_MAXSS,
22966 IX86_BUILTIN_MINPS,
22967 IX86_BUILTIN_MINSS,
22969 IX86_BUILTIN_LOADUPS,
22970 IX86_BUILTIN_STOREUPS,
22971 IX86_BUILTIN_MOVSS,
22973 IX86_BUILTIN_MOVHLPS,
22974 IX86_BUILTIN_MOVLHPS,
22975 IX86_BUILTIN_LOADHPS,
22976 IX86_BUILTIN_LOADLPS,
22977 IX86_BUILTIN_STOREHPS,
22978 IX86_BUILTIN_STORELPS,
22980 IX86_BUILTIN_MASKMOVQ,
22981 IX86_BUILTIN_MOVMSKPS,
22982 IX86_BUILTIN_PMOVMSKB,
22984 IX86_BUILTIN_MOVNTPS,
22985 IX86_BUILTIN_MOVNTQ,
22987 IX86_BUILTIN_LOADDQU,
22988 IX86_BUILTIN_STOREDQU,
22990 IX86_BUILTIN_PACKSSWB,
22991 IX86_BUILTIN_PACKSSDW,
22992 IX86_BUILTIN_PACKUSWB,
22994 IX86_BUILTIN_PADDB,
22995 IX86_BUILTIN_PADDW,
22996 IX86_BUILTIN_PADDD,
22997 IX86_BUILTIN_PADDQ,
22998 IX86_BUILTIN_PADDSB,
22999 IX86_BUILTIN_PADDSW,
23000 IX86_BUILTIN_PADDUSB,
23001 IX86_BUILTIN_PADDUSW,
23002 IX86_BUILTIN_PSUBB,
23003 IX86_BUILTIN_PSUBW,
23004 IX86_BUILTIN_PSUBD,
23005 IX86_BUILTIN_PSUBQ,
23006 IX86_BUILTIN_PSUBSB,
23007 IX86_BUILTIN_PSUBSW,
23008 IX86_BUILTIN_PSUBUSB,
23009 IX86_BUILTIN_PSUBUSW,
23012 IX86_BUILTIN_PANDN,
23016 IX86_BUILTIN_PAVGB,
23017 IX86_BUILTIN_PAVGW,
23019 IX86_BUILTIN_PCMPEQB,
23020 IX86_BUILTIN_PCMPEQW,
23021 IX86_BUILTIN_PCMPEQD,
23022 IX86_BUILTIN_PCMPGTB,
23023 IX86_BUILTIN_PCMPGTW,
23024 IX86_BUILTIN_PCMPGTD,
23026 IX86_BUILTIN_PMADDWD,
23028 IX86_BUILTIN_PMAXSW,
23029 IX86_BUILTIN_PMAXUB,
23030 IX86_BUILTIN_PMINSW,
23031 IX86_BUILTIN_PMINUB,
23033 IX86_BUILTIN_PMULHUW,
23034 IX86_BUILTIN_PMULHW,
23035 IX86_BUILTIN_PMULLW,
23037 IX86_BUILTIN_PSADBW,
23038 IX86_BUILTIN_PSHUFW,
23040 IX86_BUILTIN_PSLLW,
23041 IX86_BUILTIN_PSLLD,
23042 IX86_BUILTIN_PSLLQ,
23043 IX86_BUILTIN_PSRAW,
23044 IX86_BUILTIN_PSRAD,
23045 IX86_BUILTIN_PSRLW,
23046 IX86_BUILTIN_PSRLD,
23047 IX86_BUILTIN_PSRLQ,
23048 IX86_BUILTIN_PSLLWI,
23049 IX86_BUILTIN_PSLLDI,
23050 IX86_BUILTIN_PSLLQI,
23051 IX86_BUILTIN_PSRAWI,
23052 IX86_BUILTIN_PSRADI,
23053 IX86_BUILTIN_PSRLWI,
23054 IX86_BUILTIN_PSRLDI,
23055 IX86_BUILTIN_PSRLQI,
23057 IX86_BUILTIN_PUNPCKHBW,
23058 IX86_BUILTIN_PUNPCKHWD,
23059 IX86_BUILTIN_PUNPCKHDQ,
23060 IX86_BUILTIN_PUNPCKLBW,
23061 IX86_BUILTIN_PUNPCKLWD,
23062 IX86_BUILTIN_PUNPCKLDQ,
23064 IX86_BUILTIN_SHUFPS,
23066 IX86_BUILTIN_RCPPS,
23067 IX86_BUILTIN_RCPSS,
23068 IX86_BUILTIN_RSQRTPS,
23069 IX86_BUILTIN_RSQRTPS_NR,
23070 IX86_BUILTIN_RSQRTSS,
23071 IX86_BUILTIN_RSQRTF,
23072 IX86_BUILTIN_SQRTPS,
23073 IX86_BUILTIN_SQRTPS_NR,
23074 IX86_BUILTIN_SQRTSS,
23076 IX86_BUILTIN_UNPCKHPS,
23077 IX86_BUILTIN_UNPCKLPS,
23079 IX86_BUILTIN_ANDPS,
23080 IX86_BUILTIN_ANDNPS,
23082 IX86_BUILTIN_XORPS,
23085 IX86_BUILTIN_LDMXCSR,
23086 IX86_BUILTIN_STMXCSR,
23087 IX86_BUILTIN_SFENCE,
23089 /* 3DNow! Original */
23090 IX86_BUILTIN_FEMMS,
23091 IX86_BUILTIN_PAVGUSB,
23092 IX86_BUILTIN_PF2ID,
23093 IX86_BUILTIN_PFACC,
23094 IX86_BUILTIN_PFADD,
23095 IX86_BUILTIN_PFCMPEQ,
23096 IX86_BUILTIN_PFCMPGE,
23097 IX86_BUILTIN_PFCMPGT,
23098 IX86_BUILTIN_PFMAX,
23099 IX86_BUILTIN_PFMIN,
23100 IX86_BUILTIN_PFMUL,
23101 IX86_BUILTIN_PFRCP,
23102 IX86_BUILTIN_PFRCPIT1,
23103 IX86_BUILTIN_PFRCPIT2,
23104 IX86_BUILTIN_PFRSQIT1,
23105 IX86_BUILTIN_PFRSQRT,
23106 IX86_BUILTIN_PFSUB,
23107 IX86_BUILTIN_PFSUBR,
23108 IX86_BUILTIN_PI2FD,
23109 IX86_BUILTIN_PMULHRW,
23111 /* 3DNow! Athlon Extensions */
23112 IX86_BUILTIN_PF2IW,
23113 IX86_BUILTIN_PFNACC,
23114 IX86_BUILTIN_PFPNACC,
23115 IX86_BUILTIN_PI2FW,
23116 IX86_BUILTIN_PSWAPDSI,
23117 IX86_BUILTIN_PSWAPDSF,
23120 IX86_BUILTIN_ADDPD,
23121 IX86_BUILTIN_ADDSD,
23122 IX86_BUILTIN_DIVPD,
23123 IX86_BUILTIN_DIVSD,
23124 IX86_BUILTIN_MULPD,
23125 IX86_BUILTIN_MULSD,
23126 IX86_BUILTIN_SUBPD,
23127 IX86_BUILTIN_SUBSD,
23129 IX86_BUILTIN_CMPEQPD,
23130 IX86_BUILTIN_CMPLTPD,
23131 IX86_BUILTIN_CMPLEPD,
23132 IX86_BUILTIN_CMPGTPD,
23133 IX86_BUILTIN_CMPGEPD,
23134 IX86_BUILTIN_CMPNEQPD,
23135 IX86_BUILTIN_CMPNLTPD,
23136 IX86_BUILTIN_CMPNLEPD,
23137 IX86_BUILTIN_CMPNGTPD,
23138 IX86_BUILTIN_CMPNGEPD,
23139 IX86_BUILTIN_CMPORDPD,
23140 IX86_BUILTIN_CMPUNORDPD,
23141 IX86_BUILTIN_CMPEQSD,
23142 IX86_BUILTIN_CMPLTSD,
23143 IX86_BUILTIN_CMPLESD,
23144 IX86_BUILTIN_CMPNEQSD,
23145 IX86_BUILTIN_CMPNLTSD,
23146 IX86_BUILTIN_CMPNLESD,
23147 IX86_BUILTIN_CMPORDSD,
23148 IX86_BUILTIN_CMPUNORDSD,
23150 IX86_BUILTIN_COMIEQSD,
23151 IX86_BUILTIN_COMILTSD,
23152 IX86_BUILTIN_COMILESD,
23153 IX86_BUILTIN_COMIGTSD,
23154 IX86_BUILTIN_COMIGESD,
23155 IX86_BUILTIN_COMINEQSD,
23156 IX86_BUILTIN_UCOMIEQSD,
23157 IX86_BUILTIN_UCOMILTSD,
23158 IX86_BUILTIN_UCOMILESD,
23159 IX86_BUILTIN_UCOMIGTSD,
23160 IX86_BUILTIN_UCOMIGESD,
23161 IX86_BUILTIN_UCOMINEQSD,
23163 IX86_BUILTIN_MAXPD,
23164 IX86_BUILTIN_MAXSD,
23165 IX86_BUILTIN_MINPD,
23166 IX86_BUILTIN_MINSD,
23168 IX86_BUILTIN_ANDPD,
23169 IX86_BUILTIN_ANDNPD,
23171 IX86_BUILTIN_XORPD,
23173 IX86_BUILTIN_SQRTPD,
23174 IX86_BUILTIN_SQRTSD,
23176 IX86_BUILTIN_UNPCKHPD,
23177 IX86_BUILTIN_UNPCKLPD,
23179 IX86_BUILTIN_SHUFPD,
23181 IX86_BUILTIN_LOADUPD,
23182 IX86_BUILTIN_STOREUPD,
23183 IX86_BUILTIN_MOVSD,
23185 IX86_BUILTIN_LOADHPD,
23186 IX86_BUILTIN_LOADLPD,
23188 IX86_BUILTIN_CVTDQ2PD,
23189 IX86_BUILTIN_CVTDQ2PS,
23191 IX86_BUILTIN_CVTPD2DQ,
23192 IX86_BUILTIN_CVTPD2PI,
23193 IX86_BUILTIN_CVTPD2PS,
23194 IX86_BUILTIN_CVTTPD2DQ,
23195 IX86_BUILTIN_CVTTPD2PI,
23197 IX86_BUILTIN_CVTPI2PD,
23198 IX86_BUILTIN_CVTSI2SD,
23199 IX86_BUILTIN_CVTSI642SD,
23201 IX86_BUILTIN_CVTSD2SI,
23202 IX86_BUILTIN_CVTSD2SI64,
23203 IX86_BUILTIN_CVTSD2SS,
23204 IX86_BUILTIN_CVTSS2SD,
23205 IX86_BUILTIN_CVTTSD2SI,
23206 IX86_BUILTIN_CVTTSD2SI64,
23208 IX86_BUILTIN_CVTPS2DQ,
23209 IX86_BUILTIN_CVTPS2PD,
23210 IX86_BUILTIN_CVTTPS2DQ,
23212 IX86_BUILTIN_MOVNTI,
23213 IX86_BUILTIN_MOVNTPD,
23214 IX86_BUILTIN_MOVNTDQ,
23216 IX86_BUILTIN_MOVQ128,
23219 IX86_BUILTIN_MASKMOVDQU,
23220 IX86_BUILTIN_MOVMSKPD,
23221 IX86_BUILTIN_PMOVMSKB128,
23223 IX86_BUILTIN_PACKSSWB128,
23224 IX86_BUILTIN_PACKSSDW128,
23225 IX86_BUILTIN_PACKUSWB128,
23227 IX86_BUILTIN_PADDB128,
23228 IX86_BUILTIN_PADDW128,
23229 IX86_BUILTIN_PADDD128,
23230 IX86_BUILTIN_PADDQ128,
23231 IX86_BUILTIN_PADDSB128,
23232 IX86_BUILTIN_PADDSW128,
23233 IX86_BUILTIN_PADDUSB128,
23234 IX86_BUILTIN_PADDUSW128,
23235 IX86_BUILTIN_PSUBB128,
23236 IX86_BUILTIN_PSUBW128,
23237 IX86_BUILTIN_PSUBD128,
23238 IX86_BUILTIN_PSUBQ128,
23239 IX86_BUILTIN_PSUBSB128,
23240 IX86_BUILTIN_PSUBSW128,
23241 IX86_BUILTIN_PSUBUSB128,
23242 IX86_BUILTIN_PSUBUSW128,
23244 IX86_BUILTIN_PAND128,
23245 IX86_BUILTIN_PANDN128,
23246 IX86_BUILTIN_POR128,
23247 IX86_BUILTIN_PXOR128,
23249 IX86_BUILTIN_PAVGB128,
23250 IX86_BUILTIN_PAVGW128,
23252 IX86_BUILTIN_PCMPEQB128,
23253 IX86_BUILTIN_PCMPEQW128,
23254 IX86_BUILTIN_PCMPEQD128,
23255 IX86_BUILTIN_PCMPGTB128,
23256 IX86_BUILTIN_PCMPGTW128,
23257 IX86_BUILTIN_PCMPGTD128,
23259 IX86_BUILTIN_PMADDWD128,
23261 IX86_BUILTIN_PMAXSW128,
23262 IX86_BUILTIN_PMAXUB128,
23263 IX86_BUILTIN_PMINSW128,
23264 IX86_BUILTIN_PMINUB128,
23266 IX86_BUILTIN_PMULUDQ,
23267 IX86_BUILTIN_PMULUDQ128,
23268 IX86_BUILTIN_PMULHUW128,
23269 IX86_BUILTIN_PMULHW128,
23270 IX86_BUILTIN_PMULLW128,
23272 IX86_BUILTIN_PSADBW128,
23273 IX86_BUILTIN_PSHUFHW,
23274 IX86_BUILTIN_PSHUFLW,
23275 IX86_BUILTIN_PSHUFD,
23277 IX86_BUILTIN_PSLLDQI128,
23278 IX86_BUILTIN_PSLLWI128,
23279 IX86_BUILTIN_PSLLDI128,
23280 IX86_BUILTIN_PSLLQI128,
23281 IX86_BUILTIN_PSRAWI128,
23282 IX86_BUILTIN_PSRADI128,
23283 IX86_BUILTIN_PSRLDQI128,
23284 IX86_BUILTIN_PSRLWI128,
23285 IX86_BUILTIN_PSRLDI128,
23286 IX86_BUILTIN_PSRLQI128,
23288 IX86_BUILTIN_PSLLDQ128,
23289 IX86_BUILTIN_PSLLW128,
23290 IX86_BUILTIN_PSLLD128,
23291 IX86_BUILTIN_PSLLQ128,
23292 IX86_BUILTIN_PSRAW128,
23293 IX86_BUILTIN_PSRAD128,
23294 IX86_BUILTIN_PSRLW128,
23295 IX86_BUILTIN_PSRLD128,
23296 IX86_BUILTIN_PSRLQ128,
23298 IX86_BUILTIN_PUNPCKHBW128,
23299 IX86_BUILTIN_PUNPCKHWD128,
23300 IX86_BUILTIN_PUNPCKHDQ128,
23301 IX86_BUILTIN_PUNPCKHQDQ128,
23302 IX86_BUILTIN_PUNPCKLBW128,
23303 IX86_BUILTIN_PUNPCKLWD128,
23304 IX86_BUILTIN_PUNPCKLDQ128,
23305 IX86_BUILTIN_PUNPCKLQDQ128,
23307 IX86_BUILTIN_CLFLUSH,
23308 IX86_BUILTIN_MFENCE,
23309 IX86_BUILTIN_LFENCE,
23310 IX86_BUILTIN_PAUSE,
23312 IX86_BUILTIN_BSRSI,
23313 IX86_BUILTIN_BSRDI,
23314 IX86_BUILTIN_RDPMC,
23315 IX86_BUILTIN_RDTSC,
23316 IX86_BUILTIN_RDTSCP,
23317 IX86_BUILTIN_ROLQI,
23318 IX86_BUILTIN_ROLHI,
23319 IX86_BUILTIN_RORQI,
23320 IX86_BUILTIN_RORHI,
23323 IX86_BUILTIN_ADDSUBPS,
23324 IX86_BUILTIN_HADDPS,
23325 IX86_BUILTIN_HSUBPS,
23326 IX86_BUILTIN_MOVSHDUP,
23327 IX86_BUILTIN_MOVSLDUP,
23328 IX86_BUILTIN_ADDSUBPD,
23329 IX86_BUILTIN_HADDPD,
23330 IX86_BUILTIN_HSUBPD,
23331 IX86_BUILTIN_LDDQU,
23333 IX86_BUILTIN_MONITOR,
23334 IX86_BUILTIN_MWAIT,
23337 IX86_BUILTIN_PHADDW,
23338 IX86_BUILTIN_PHADDD,
23339 IX86_BUILTIN_PHADDSW,
23340 IX86_BUILTIN_PHSUBW,
23341 IX86_BUILTIN_PHSUBD,
23342 IX86_BUILTIN_PHSUBSW,
23343 IX86_BUILTIN_PMADDUBSW,
23344 IX86_BUILTIN_PMULHRSW,
23345 IX86_BUILTIN_PSHUFB,
23346 IX86_BUILTIN_PSIGNB,
23347 IX86_BUILTIN_PSIGNW,
23348 IX86_BUILTIN_PSIGND,
23349 IX86_BUILTIN_PALIGNR,
23350 IX86_BUILTIN_PABSB,
23351 IX86_BUILTIN_PABSW,
23352 IX86_BUILTIN_PABSD,
23354 IX86_BUILTIN_PHADDW128,
23355 IX86_BUILTIN_PHADDD128,
23356 IX86_BUILTIN_PHADDSW128,
23357 IX86_BUILTIN_PHSUBW128,
23358 IX86_BUILTIN_PHSUBD128,
23359 IX86_BUILTIN_PHSUBSW128,
23360 IX86_BUILTIN_PMADDUBSW128,
23361 IX86_BUILTIN_PMULHRSW128,
23362 IX86_BUILTIN_PSHUFB128,
23363 IX86_BUILTIN_PSIGNB128,
23364 IX86_BUILTIN_PSIGNW128,
23365 IX86_BUILTIN_PSIGND128,
23366 IX86_BUILTIN_PALIGNR128,
23367 IX86_BUILTIN_PABSB128,
23368 IX86_BUILTIN_PABSW128,
23369 IX86_BUILTIN_PABSD128,
23371 /* AMDFAM10 - SSE4A New Instructions. */
23372 IX86_BUILTIN_MOVNTSD,
23373 IX86_BUILTIN_MOVNTSS,
23374 IX86_BUILTIN_EXTRQI,
23375 IX86_BUILTIN_EXTRQ,
23376 IX86_BUILTIN_INSERTQI,
23377 IX86_BUILTIN_INSERTQ,
23380 IX86_BUILTIN_BLENDPD,
23381 IX86_BUILTIN_BLENDPS,
23382 IX86_BUILTIN_BLENDVPD,
23383 IX86_BUILTIN_BLENDVPS,
23384 IX86_BUILTIN_PBLENDVB128,
23385 IX86_BUILTIN_PBLENDW128,
23390 IX86_BUILTIN_INSERTPS128,
23392 IX86_BUILTIN_MOVNTDQA,
23393 IX86_BUILTIN_MPSADBW128,
23394 IX86_BUILTIN_PACKUSDW128,
23395 IX86_BUILTIN_PCMPEQQ,
23396 IX86_BUILTIN_PHMINPOSUW128,
23398 IX86_BUILTIN_PMAXSB128,
23399 IX86_BUILTIN_PMAXSD128,
23400 IX86_BUILTIN_PMAXUD128,
23401 IX86_BUILTIN_PMAXUW128,
23403 IX86_BUILTIN_PMINSB128,
23404 IX86_BUILTIN_PMINSD128,
23405 IX86_BUILTIN_PMINUD128,
23406 IX86_BUILTIN_PMINUW128,
23408 IX86_BUILTIN_PMOVSXBW128,
23409 IX86_BUILTIN_PMOVSXBD128,
23410 IX86_BUILTIN_PMOVSXBQ128,
23411 IX86_BUILTIN_PMOVSXWD128,
23412 IX86_BUILTIN_PMOVSXWQ128,
23413 IX86_BUILTIN_PMOVSXDQ128,
23415 IX86_BUILTIN_PMOVZXBW128,
23416 IX86_BUILTIN_PMOVZXBD128,
23417 IX86_BUILTIN_PMOVZXBQ128,
23418 IX86_BUILTIN_PMOVZXWD128,
23419 IX86_BUILTIN_PMOVZXWQ128,
23420 IX86_BUILTIN_PMOVZXDQ128,
23422 IX86_BUILTIN_PMULDQ128,
23423 IX86_BUILTIN_PMULLD128,
23425 IX86_BUILTIN_ROUNDPD,
23426 IX86_BUILTIN_ROUNDPS,
23427 IX86_BUILTIN_ROUNDSD,
23428 IX86_BUILTIN_ROUNDSS,
23430 IX86_BUILTIN_FLOORPD,
23431 IX86_BUILTIN_CEILPD,
23432 IX86_BUILTIN_TRUNCPD,
23433 IX86_BUILTIN_RINTPD,
23434 IX86_BUILTIN_FLOORPS,
23435 IX86_BUILTIN_CEILPS,
23436 IX86_BUILTIN_TRUNCPS,
23437 IX86_BUILTIN_RINTPS,
23439 IX86_BUILTIN_PTESTZ,
23440 IX86_BUILTIN_PTESTC,
23441 IX86_BUILTIN_PTESTNZC,
23443 IX86_BUILTIN_VEC_INIT_V2SI,
23444 IX86_BUILTIN_VEC_INIT_V4HI,
23445 IX86_BUILTIN_VEC_INIT_V8QI,
23446 IX86_BUILTIN_VEC_EXT_V2DF,
23447 IX86_BUILTIN_VEC_EXT_V2DI,
23448 IX86_BUILTIN_VEC_EXT_V4SF,
23449 IX86_BUILTIN_VEC_EXT_V4SI,
23450 IX86_BUILTIN_VEC_EXT_V8HI,
23451 IX86_BUILTIN_VEC_EXT_V2SI,
23452 IX86_BUILTIN_VEC_EXT_V4HI,
23453 IX86_BUILTIN_VEC_EXT_V16QI,
23454 IX86_BUILTIN_VEC_SET_V2DI,
23455 IX86_BUILTIN_VEC_SET_V4SF,
23456 IX86_BUILTIN_VEC_SET_V4SI,
23457 IX86_BUILTIN_VEC_SET_V8HI,
23458 IX86_BUILTIN_VEC_SET_V4HI,
23459 IX86_BUILTIN_VEC_SET_V16QI,
23461 IX86_BUILTIN_VEC_PACK_SFIX,
23464 IX86_BUILTIN_CRC32QI,
23465 IX86_BUILTIN_CRC32HI,
23466 IX86_BUILTIN_CRC32SI,
23467 IX86_BUILTIN_CRC32DI,
23469 IX86_BUILTIN_PCMPESTRI128,
23470 IX86_BUILTIN_PCMPESTRM128,
23471 IX86_BUILTIN_PCMPESTRA128,
23472 IX86_BUILTIN_PCMPESTRC128,
23473 IX86_BUILTIN_PCMPESTRO128,
23474 IX86_BUILTIN_PCMPESTRS128,
23475 IX86_BUILTIN_PCMPESTRZ128,
23476 IX86_BUILTIN_PCMPISTRI128,
23477 IX86_BUILTIN_PCMPISTRM128,
23478 IX86_BUILTIN_PCMPISTRA128,
23479 IX86_BUILTIN_PCMPISTRC128,
23480 IX86_BUILTIN_PCMPISTRO128,
23481 IX86_BUILTIN_PCMPISTRS128,
23482 IX86_BUILTIN_PCMPISTRZ128,
23484 IX86_BUILTIN_PCMPGTQ,
23486 /* AES instructions */
23487 IX86_BUILTIN_AESENC128,
23488 IX86_BUILTIN_AESENCLAST128,
23489 IX86_BUILTIN_AESDEC128,
23490 IX86_BUILTIN_AESDECLAST128,
23491 IX86_BUILTIN_AESIMC128,
23492 IX86_BUILTIN_AESKEYGENASSIST128,
23494 /* PCLMUL instruction */
23495 IX86_BUILTIN_PCLMULQDQ128,
23498 IX86_BUILTIN_ADDPD256,
23499 IX86_BUILTIN_ADDPS256,
23500 IX86_BUILTIN_ADDSUBPD256,
23501 IX86_BUILTIN_ADDSUBPS256,
23502 IX86_BUILTIN_ANDPD256,
23503 IX86_BUILTIN_ANDPS256,
23504 IX86_BUILTIN_ANDNPD256,
23505 IX86_BUILTIN_ANDNPS256,
23506 IX86_BUILTIN_BLENDPD256,
23507 IX86_BUILTIN_BLENDPS256,
23508 IX86_BUILTIN_BLENDVPD256,
23509 IX86_BUILTIN_BLENDVPS256,
23510 IX86_BUILTIN_DIVPD256,
23511 IX86_BUILTIN_DIVPS256,
23512 IX86_BUILTIN_DPPS256,
23513 IX86_BUILTIN_HADDPD256,
23514 IX86_BUILTIN_HADDPS256,
23515 IX86_BUILTIN_HSUBPD256,
23516 IX86_BUILTIN_HSUBPS256,
23517 IX86_BUILTIN_MAXPD256,
23518 IX86_BUILTIN_MAXPS256,
23519 IX86_BUILTIN_MINPD256,
23520 IX86_BUILTIN_MINPS256,
23521 IX86_BUILTIN_MULPD256,
23522 IX86_BUILTIN_MULPS256,
23523 IX86_BUILTIN_ORPD256,
23524 IX86_BUILTIN_ORPS256,
23525 IX86_BUILTIN_SHUFPD256,
23526 IX86_BUILTIN_SHUFPS256,
23527 IX86_BUILTIN_SUBPD256,
23528 IX86_BUILTIN_SUBPS256,
23529 IX86_BUILTIN_XORPD256,
23530 IX86_BUILTIN_XORPS256,
23531 IX86_BUILTIN_CMPSD,
23532 IX86_BUILTIN_CMPSS,
23533 IX86_BUILTIN_CMPPD,
23534 IX86_BUILTIN_CMPPS,
23535 IX86_BUILTIN_CMPPD256,
23536 IX86_BUILTIN_CMPPS256,
23537 IX86_BUILTIN_CVTDQ2PD256,
23538 IX86_BUILTIN_CVTDQ2PS256,
23539 IX86_BUILTIN_CVTPD2PS256,
23540 IX86_BUILTIN_CVTPS2DQ256,
23541 IX86_BUILTIN_CVTPS2PD256,
23542 IX86_BUILTIN_CVTTPD2DQ256,
23543 IX86_BUILTIN_CVTPD2DQ256,
23544 IX86_BUILTIN_CVTTPS2DQ256,
23545 IX86_BUILTIN_EXTRACTF128PD256,
23546 IX86_BUILTIN_EXTRACTF128PS256,
23547 IX86_BUILTIN_EXTRACTF128SI256,
23548 IX86_BUILTIN_VZEROALL,
23549 IX86_BUILTIN_VZEROUPPER,
23550 IX86_BUILTIN_VPERMILVARPD,
23551 IX86_BUILTIN_VPERMILVARPS,
23552 IX86_BUILTIN_VPERMILVARPD256,
23553 IX86_BUILTIN_VPERMILVARPS256,
23554 IX86_BUILTIN_VPERMILPD,
23555 IX86_BUILTIN_VPERMILPS,
23556 IX86_BUILTIN_VPERMILPD256,
23557 IX86_BUILTIN_VPERMILPS256,
23558 IX86_BUILTIN_VPERMIL2PD,
23559 IX86_BUILTIN_VPERMIL2PS,
23560 IX86_BUILTIN_VPERMIL2PD256,
23561 IX86_BUILTIN_VPERMIL2PS256,
23562 IX86_BUILTIN_VPERM2F128PD256,
23563 IX86_BUILTIN_VPERM2F128PS256,
23564 IX86_BUILTIN_VPERM2F128SI256,
23565 IX86_BUILTIN_VBROADCASTSS,
23566 IX86_BUILTIN_VBROADCASTSD256,
23567 IX86_BUILTIN_VBROADCASTSS256,
23568 IX86_BUILTIN_VBROADCASTPD256,
23569 IX86_BUILTIN_VBROADCASTPS256,
23570 IX86_BUILTIN_VINSERTF128PD256,
23571 IX86_BUILTIN_VINSERTF128PS256,
23572 IX86_BUILTIN_VINSERTF128SI256,
23573 IX86_BUILTIN_LOADUPD256,
23574 IX86_BUILTIN_LOADUPS256,
23575 IX86_BUILTIN_STOREUPD256,
23576 IX86_BUILTIN_STOREUPS256,
23577 IX86_BUILTIN_LDDQU256,
23578 IX86_BUILTIN_MOVNTDQ256,
23579 IX86_BUILTIN_MOVNTPD256,
23580 IX86_BUILTIN_MOVNTPS256,
23581 IX86_BUILTIN_LOADDQU256,
23582 IX86_BUILTIN_STOREDQU256,
23583 IX86_BUILTIN_MASKLOADPD,
23584 IX86_BUILTIN_MASKLOADPS,
23585 IX86_BUILTIN_MASKSTOREPD,
23586 IX86_BUILTIN_MASKSTOREPS,
23587 IX86_BUILTIN_MASKLOADPD256,
23588 IX86_BUILTIN_MASKLOADPS256,
23589 IX86_BUILTIN_MASKSTOREPD256,
23590 IX86_BUILTIN_MASKSTOREPS256,
23591 IX86_BUILTIN_MOVSHDUP256,
23592 IX86_BUILTIN_MOVSLDUP256,
23593 IX86_BUILTIN_MOVDDUP256,
23595 IX86_BUILTIN_SQRTPD256,
23596 IX86_BUILTIN_SQRTPS256,
23597 IX86_BUILTIN_SQRTPS_NR256,
23598 IX86_BUILTIN_RSQRTPS256,
23599 IX86_BUILTIN_RSQRTPS_NR256,
23601 IX86_BUILTIN_RCPPS256,
23603 IX86_BUILTIN_ROUNDPD256,
23604 IX86_BUILTIN_ROUNDPS256,
23606 IX86_BUILTIN_FLOORPD256,
23607 IX86_BUILTIN_CEILPD256,
23608 IX86_BUILTIN_TRUNCPD256,
23609 IX86_BUILTIN_RINTPD256,
23610 IX86_BUILTIN_FLOORPS256,
23611 IX86_BUILTIN_CEILPS256,
23612 IX86_BUILTIN_TRUNCPS256,
23613 IX86_BUILTIN_RINTPS256,
23615 IX86_BUILTIN_UNPCKHPD256,
23616 IX86_BUILTIN_UNPCKLPD256,
23617 IX86_BUILTIN_UNPCKHPS256,
23618 IX86_BUILTIN_UNPCKLPS256,
23620 IX86_BUILTIN_SI256_SI,
23621 IX86_BUILTIN_PS256_PS,
23622 IX86_BUILTIN_PD256_PD,
23623 IX86_BUILTIN_SI_SI256,
23624 IX86_BUILTIN_PS_PS256,
23625 IX86_BUILTIN_PD_PD256,
23627 IX86_BUILTIN_VTESTZPD,
23628 IX86_BUILTIN_VTESTCPD,
23629 IX86_BUILTIN_VTESTNZCPD,
23630 IX86_BUILTIN_VTESTZPS,
23631 IX86_BUILTIN_VTESTCPS,
23632 IX86_BUILTIN_VTESTNZCPS,
23633 IX86_BUILTIN_VTESTZPD256,
23634 IX86_BUILTIN_VTESTCPD256,
23635 IX86_BUILTIN_VTESTNZCPD256,
23636 IX86_BUILTIN_VTESTZPS256,
23637 IX86_BUILTIN_VTESTCPS256,
23638 IX86_BUILTIN_VTESTNZCPS256,
23639 IX86_BUILTIN_PTESTZ256,
23640 IX86_BUILTIN_PTESTC256,
23641 IX86_BUILTIN_PTESTNZC256,
23643 IX86_BUILTIN_MOVMSKPD256,
23644 IX86_BUILTIN_MOVMSKPS256,
23646 /* TFmode support builtins. */
23648 IX86_BUILTIN_HUGE_VALQ,
23649 IX86_BUILTIN_FABSQ,
23650 IX86_BUILTIN_COPYSIGNQ,
23652 /* Vectorizer support builtins. */
23653 IX86_BUILTIN_CPYSGNPS,
23654 IX86_BUILTIN_CPYSGNPD,
23655 IX86_BUILTIN_CPYSGNPS256,
23656 IX86_BUILTIN_CPYSGNPD256,
23658 IX86_BUILTIN_CVTUDQ2PS,
23660 IX86_BUILTIN_VEC_PERM_V2DF,
23661 IX86_BUILTIN_VEC_PERM_V4SF,
23662 IX86_BUILTIN_VEC_PERM_V2DI,
23663 IX86_BUILTIN_VEC_PERM_V4SI,
23664 IX86_BUILTIN_VEC_PERM_V8HI,
23665 IX86_BUILTIN_VEC_PERM_V16QI,
23666 IX86_BUILTIN_VEC_PERM_V2DI_U,
23667 IX86_BUILTIN_VEC_PERM_V4SI_U,
23668 IX86_BUILTIN_VEC_PERM_V8HI_U,
23669 IX86_BUILTIN_VEC_PERM_V16QI_U,
23670 IX86_BUILTIN_VEC_PERM_V4DF,
23671 IX86_BUILTIN_VEC_PERM_V8SF,
23673 /* FMA4 and XOP instructions. */
23674 IX86_BUILTIN_VFMADDSS,
23675 IX86_BUILTIN_VFMADDSD,
23676 IX86_BUILTIN_VFMADDPS,
23677 IX86_BUILTIN_VFMADDPD,
23678 IX86_BUILTIN_VFMADDPS256,
23679 IX86_BUILTIN_VFMADDPD256,
23680 IX86_BUILTIN_VFMADDSUBPS,
23681 IX86_BUILTIN_VFMADDSUBPD,
23682 IX86_BUILTIN_VFMADDSUBPS256,
23683 IX86_BUILTIN_VFMADDSUBPD256,
23685 IX86_BUILTIN_VPCMOV,
23686 IX86_BUILTIN_VPCMOV_V2DI,
23687 IX86_BUILTIN_VPCMOV_V4SI,
23688 IX86_BUILTIN_VPCMOV_V8HI,
23689 IX86_BUILTIN_VPCMOV_V16QI,
23690 IX86_BUILTIN_VPCMOV_V4SF,
23691 IX86_BUILTIN_VPCMOV_V2DF,
23692 IX86_BUILTIN_VPCMOV256,
23693 IX86_BUILTIN_VPCMOV_V4DI256,
23694 IX86_BUILTIN_VPCMOV_V8SI256,
23695 IX86_BUILTIN_VPCMOV_V16HI256,
23696 IX86_BUILTIN_VPCMOV_V32QI256,
23697 IX86_BUILTIN_VPCMOV_V8SF256,
23698 IX86_BUILTIN_VPCMOV_V4DF256,
23700 IX86_BUILTIN_VPPERM,
23702 IX86_BUILTIN_VPMACSSWW,
23703 IX86_BUILTIN_VPMACSWW,
23704 IX86_BUILTIN_VPMACSSWD,
23705 IX86_BUILTIN_VPMACSWD,
23706 IX86_BUILTIN_VPMACSSDD,
23707 IX86_BUILTIN_VPMACSDD,
23708 IX86_BUILTIN_VPMACSSDQL,
23709 IX86_BUILTIN_VPMACSSDQH,
23710 IX86_BUILTIN_VPMACSDQL,
23711 IX86_BUILTIN_VPMACSDQH,
23712 IX86_BUILTIN_VPMADCSSWD,
23713 IX86_BUILTIN_VPMADCSWD,
23715 IX86_BUILTIN_VPHADDBW,
23716 IX86_BUILTIN_VPHADDBD,
23717 IX86_BUILTIN_VPHADDBQ,
23718 IX86_BUILTIN_VPHADDWD,
23719 IX86_BUILTIN_VPHADDWQ,
23720 IX86_BUILTIN_VPHADDDQ,
23721 IX86_BUILTIN_VPHADDUBW,
23722 IX86_BUILTIN_VPHADDUBD,
23723 IX86_BUILTIN_VPHADDUBQ,
23724 IX86_BUILTIN_VPHADDUWD,
23725 IX86_BUILTIN_VPHADDUWQ,
23726 IX86_BUILTIN_VPHADDUDQ,
23727 IX86_BUILTIN_VPHSUBBW,
23728 IX86_BUILTIN_VPHSUBWD,
23729 IX86_BUILTIN_VPHSUBDQ,
23731 IX86_BUILTIN_VPROTB,
23732 IX86_BUILTIN_VPROTW,
23733 IX86_BUILTIN_VPROTD,
23734 IX86_BUILTIN_VPROTQ,
23735 IX86_BUILTIN_VPROTB_IMM,
23736 IX86_BUILTIN_VPROTW_IMM,
23737 IX86_BUILTIN_VPROTD_IMM,
23738 IX86_BUILTIN_VPROTQ_IMM,
23740 IX86_BUILTIN_VPSHLB,
23741 IX86_BUILTIN_VPSHLW,
23742 IX86_BUILTIN_VPSHLD,
23743 IX86_BUILTIN_VPSHLQ,
23744 IX86_BUILTIN_VPSHAB,
23745 IX86_BUILTIN_VPSHAW,
23746 IX86_BUILTIN_VPSHAD,
23747 IX86_BUILTIN_VPSHAQ,
23749 IX86_BUILTIN_VFRCZSS,
23750 IX86_BUILTIN_VFRCZSD,
23751 IX86_BUILTIN_VFRCZPS,
23752 IX86_BUILTIN_VFRCZPD,
23753 IX86_BUILTIN_VFRCZPS256,
23754 IX86_BUILTIN_VFRCZPD256,
23756 IX86_BUILTIN_VPCOMEQUB,
23757 IX86_BUILTIN_VPCOMNEUB,
23758 IX86_BUILTIN_VPCOMLTUB,
23759 IX86_BUILTIN_VPCOMLEUB,
23760 IX86_BUILTIN_VPCOMGTUB,
23761 IX86_BUILTIN_VPCOMGEUB,
23762 IX86_BUILTIN_VPCOMFALSEUB,
23763 IX86_BUILTIN_VPCOMTRUEUB,
23765 IX86_BUILTIN_VPCOMEQUW,
23766 IX86_BUILTIN_VPCOMNEUW,
23767 IX86_BUILTIN_VPCOMLTUW,
23768 IX86_BUILTIN_VPCOMLEUW,
23769 IX86_BUILTIN_VPCOMGTUW,
23770 IX86_BUILTIN_VPCOMGEUW,
23771 IX86_BUILTIN_VPCOMFALSEUW,
23772 IX86_BUILTIN_VPCOMTRUEUW,
23774 IX86_BUILTIN_VPCOMEQUD,
23775 IX86_BUILTIN_VPCOMNEUD,
23776 IX86_BUILTIN_VPCOMLTUD,
23777 IX86_BUILTIN_VPCOMLEUD,
23778 IX86_BUILTIN_VPCOMGTUD,
23779 IX86_BUILTIN_VPCOMGEUD,
23780 IX86_BUILTIN_VPCOMFALSEUD,
23781 IX86_BUILTIN_VPCOMTRUEUD,
23783 IX86_BUILTIN_VPCOMEQUQ,
23784 IX86_BUILTIN_VPCOMNEUQ,
23785 IX86_BUILTIN_VPCOMLTUQ,
23786 IX86_BUILTIN_VPCOMLEUQ,
23787 IX86_BUILTIN_VPCOMGTUQ,
23788 IX86_BUILTIN_VPCOMGEUQ,
23789 IX86_BUILTIN_VPCOMFALSEUQ,
23790 IX86_BUILTIN_VPCOMTRUEUQ,
23792 IX86_BUILTIN_VPCOMEQB,
23793 IX86_BUILTIN_VPCOMNEB,
23794 IX86_BUILTIN_VPCOMLTB,
23795 IX86_BUILTIN_VPCOMLEB,
23796 IX86_BUILTIN_VPCOMGTB,
23797 IX86_BUILTIN_VPCOMGEB,
23798 IX86_BUILTIN_VPCOMFALSEB,
23799 IX86_BUILTIN_VPCOMTRUEB,
23801 IX86_BUILTIN_VPCOMEQW,
23802 IX86_BUILTIN_VPCOMNEW,
23803 IX86_BUILTIN_VPCOMLTW,
23804 IX86_BUILTIN_VPCOMLEW,
23805 IX86_BUILTIN_VPCOMGTW,
23806 IX86_BUILTIN_VPCOMGEW,
23807 IX86_BUILTIN_VPCOMFALSEW,
23808 IX86_BUILTIN_VPCOMTRUEW,
23810 IX86_BUILTIN_VPCOMEQD,
23811 IX86_BUILTIN_VPCOMNED,
23812 IX86_BUILTIN_VPCOMLTD,
23813 IX86_BUILTIN_VPCOMLED,
23814 IX86_BUILTIN_VPCOMGTD,
23815 IX86_BUILTIN_VPCOMGED,
23816 IX86_BUILTIN_VPCOMFALSED,
23817 IX86_BUILTIN_VPCOMTRUED,
23819 IX86_BUILTIN_VPCOMEQQ,
23820 IX86_BUILTIN_VPCOMNEQ,
23821 IX86_BUILTIN_VPCOMLTQ,
23822 IX86_BUILTIN_VPCOMLEQ,
23823 IX86_BUILTIN_VPCOMGTQ,
23824 IX86_BUILTIN_VPCOMGEQ,
23825 IX86_BUILTIN_VPCOMFALSEQ,
23826 IX86_BUILTIN_VPCOMTRUEQ,
23828 /* LWP instructions. */
23829 IX86_BUILTIN_LLWPCB,
23830 IX86_BUILTIN_SLWPCB,
23831 IX86_BUILTIN_LWPVAL32,
23832 IX86_BUILTIN_LWPVAL64,
23833 IX86_BUILTIN_LWPINS32,
23834 IX86_BUILTIN_LWPINS64,
23838 /* BMI instructions. */
23839 IX86_BUILTIN_BEXTR32,
23840 IX86_BUILTIN_BEXTR64,
23843 /* TBM instructions. */
23844 IX86_BUILTIN_BEXTRI32,
23845 IX86_BUILTIN_BEXTRI64,
23848 /* FSGSBASE instructions. */
23849 IX86_BUILTIN_RDFSBASE32,
23850 IX86_BUILTIN_RDFSBASE64,
23851 IX86_BUILTIN_RDGSBASE32,
23852 IX86_BUILTIN_RDGSBASE64,
23853 IX86_BUILTIN_WRFSBASE32,
23854 IX86_BUILTIN_WRFSBASE64,
23855 IX86_BUILTIN_WRGSBASE32,
23856 IX86_BUILTIN_WRGSBASE64,
23858 /* RDRND instructions. */
23859 IX86_BUILTIN_RDRAND16_STEP,
23860 IX86_BUILTIN_RDRAND32_STEP,
23861 IX86_BUILTIN_RDRAND64_STEP,
23863 /* F16C instructions. */
23864 IX86_BUILTIN_CVTPH2PS,
23865 IX86_BUILTIN_CVTPH2PS256,
23866 IX86_BUILTIN_CVTPS2PH,
23867 IX86_BUILTIN_CVTPS2PH256,
23869 /* CFString built-in for darwin */
23870 IX86_BUILTIN_CFSTRING,
23875 /* Table for the ix86 builtin decls. */
23876 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
23878 /* Table of all of the builtin functions that are possible with different ISA's
23879 but are waiting to be built until a function is declared to use that
23881 struct builtin_isa {
23882 const char *name; /* function name */
23883 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
23884 int isa; /* isa_flags this builtin is defined for */
23885 bool const_p; /* true if the declaration is constant */
23886 bool set_and_not_built_p;
23889 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
23892 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
23893 of which isa_flags to use in the ix86_builtins_isa array. Stores the
23894 function decl in the ix86_builtins array. Returns the function decl or
23895 NULL_TREE, if the builtin was not added.
23897 If the front end has a special hook for builtin functions, delay adding
23898 builtin functions that aren't in the current ISA until the ISA is changed
23899 with function specific optimization. Doing so, can save about 300K for the
23900 default compiler. When the builtin is expanded, check at that time whether
23903 If the front end doesn't have a special hook, record all builtins, even if
23904 it isn't an instruction set in the current ISA in case the user uses
23905 function specific options for a different ISA, so that we don't get scope
23906 errors if a builtin is added in the middle of a function scope. */
23909 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
23910 enum ix86_builtins code)
23912 tree decl = NULL_TREE;
23914 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
23916 ix86_builtins_isa[(int) code].isa = mask;
23918 mask &= ~OPTION_MASK_ISA_64BIT;
23920 || (mask & ix86_isa_flags) != 0
23921 || (lang_hooks.builtin_function
23922 == lang_hooks.builtin_function_ext_scope))
23925 tree type = ix86_get_builtin_func_type (tcode);
23926 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
23928 ix86_builtins[(int) code] = decl;
23929 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
23933 ix86_builtins[(int) code] = NULL_TREE;
23934 ix86_builtins_isa[(int) code].tcode = tcode;
23935 ix86_builtins_isa[(int) code].name = name;
23936 ix86_builtins_isa[(int) code].const_p = false;
23937 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
23944 /* Like def_builtin, but also marks the function decl "const". */
23947 def_builtin_const (int mask, const char *name,
23948 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
23950 tree decl = def_builtin (mask, name, tcode, code);
23952 TREE_READONLY (decl) = 1;
23954 ix86_builtins_isa[(int) code].const_p = true;
23959 /* Add any new builtin functions for a given ISA that may not have been
23960 declared. This saves a bit of space compared to adding all of the
23961 declarations to the tree, even if we didn't use them. */
23964 ix86_add_new_builtins (int isa)
23968 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
23970 if ((ix86_builtins_isa[i].isa & isa) != 0
23971 && ix86_builtins_isa[i].set_and_not_built_p)
23975 /* Don't define the builtin again. */
23976 ix86_builtins_isa[i].set_and_not_built_p = false;
23978 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
23979 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
23980 type, i, BUILT_IN_MD, NULL,
23983 ix86_builtins[i] = decl;
23984 if (ix86_builtins_isa[i].const_p)
23985 TREE_READONLY (decl) = 1;
23990 /* Bits for builtin_description.flag. */
23992 /* Set when we don't support the comparison natively, and should
23993 swap_comparison in order to support it. */
23994 #define BUILTIN_DESC_SWAP_OPERANDS 1
23996 struct builtin_description
23998 const unsigned int mask;
23999 const enum insn_code icode;
24000 const char *const name;
24001 const enum ix86_builtins code;
24002 const enum rtx_code comparison;
24006 static const struct builtin_description bdesc_comi[] =
24008 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24009 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24010 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24011 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24012 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24013 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24014 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24015 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24016 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24017 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24018 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24019 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24023 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24034 static const struct builtin_description bdesc_pcmpestr[] =
24037 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24038 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24039 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24040 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24041 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24042 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24043 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24046 static const struct builtin_description bdesc_pcmpistr[] =
24049 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24050 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24051 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24052 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24053 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24054 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24055 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24058 /* Special builtins with variable number of arguments. */
24059 static const struct builtin_description bdesc_special_args[] =
24061 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24062 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24063 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
24066 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24069 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24072 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24073 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24074 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24076 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24077 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24078 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24079 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24081 /* SSE or 3DNow!A */
24082 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24083 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24086 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24087 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24088 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24089 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24090 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24091 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24092 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24093 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24094 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24096 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24100 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24103 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24106 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24107 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24110 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24111 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24113 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24114 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24115 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24116 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24117 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24119 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24120 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24121 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24122 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24123 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24124 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24125 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24127 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24128 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24129 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24131 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
24132 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
24133 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
24134 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
24135 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
24136 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
24137 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
24138 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
24140 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24141 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24142 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24143 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24144 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24145 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24148 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24149 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24150 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24151 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24152 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24153 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24154 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24155 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24158 /* Builtins with variable number of arguments. */
24159 static const struct builtin_description bdesc_args[] =
24161 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24162 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24163 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24164 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24165 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24166 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24167 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24170 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24172 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24175 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24177 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24178 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24179 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24180 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24181 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24182 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24183 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24184 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24186 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24187 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24190 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24191 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24192 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24198 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24199 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24203 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24205 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24206 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24215 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24217 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24222 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24224 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24234 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24235 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24236 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24237 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24239 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24240 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24241 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24242 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24243 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24244 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24245 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24246 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24247 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24248 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24249 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24250 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24251 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24252 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24253 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24256 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24257 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24258 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24259 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24260 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24261 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24264 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24265 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24266 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24267 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24268 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24269 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24270 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24271 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24272 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24275 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24277 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24279 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24280 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24281 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24283 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24289 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24290 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24291 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24293 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24294 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24295 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24296 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24297 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24311 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24312 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24316 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24318 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24319 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24321 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24323 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24324 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24326 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24327 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24329 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24330 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24331 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24333 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24335 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24336 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24337 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24339 /* SSE MMX or 3Dnow!A */
24340 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24341 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24342 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24344 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24345 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24346 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24347 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24349 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24350 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24352 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24357 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24358 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24359 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24360 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24361 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24362 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24363 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24364 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24365 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24366 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24367 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24368 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24370 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24372 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24373 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24374 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24375 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24377 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24380 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24387 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24388 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24394 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24395 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24396 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24397 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
24408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24425 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24429 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24431 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24432 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24434 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24437 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24438 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24440 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
24442 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24443 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24444 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24445 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24446 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24447 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24448 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24449 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24460 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24461 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
24463 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24465 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24466 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24478 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24479 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24480 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24483 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24484 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24485 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24486 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24487 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24488 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24489 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24490 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24496 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
24499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
24500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
24504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
24505 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
24506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
24507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
24509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24510 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24511 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24512 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24513 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24514 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24515 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24518 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24519 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24520 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24521 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24522 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24523 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24525 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24526 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24527 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24528 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
24531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
24536 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
24537 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
24539 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24542 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24543 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24546 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
24547 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24549 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24550 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24551 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24552 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24553 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24554 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24557 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
24558 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
24559 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24560 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
24561 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
24562 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24564 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24565 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24566 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24567 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24568 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24569 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24570 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24571 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24572 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24573 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24574 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24575 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24576 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
24577 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
24578 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24579 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24580 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24586 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24587 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24590 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
24591 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
24594 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24595 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24596 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
24597 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
24598 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24599 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24600 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24601 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
24602 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24603 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
24605 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24606 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24607 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24608 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24609 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24610 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24611 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24612 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24613 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24615 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24617 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24628 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24629 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24630 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24633 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24634 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24635 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24636 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24638 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
24639 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
24640 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
24641 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
24643 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
24644 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
24645 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
24646 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
24648 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24649 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24650 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24653 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24654 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
24655 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
24656 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24657 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24660 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
24661 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
24662 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
24663 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24666 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
24667 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24669 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24670 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24671 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24672 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24675 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
24678 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24679 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24681 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24682 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24683 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24686 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24688 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24692 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24693 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24694 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24695 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24696 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24697 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24698 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24699 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24700 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24701 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24702 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24703 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
24706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
24707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
24708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
24710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
24713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
24714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
24724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
24725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
24726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
24727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
24728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
24729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
24731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
24737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
24742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
24743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
24745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24749 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24751 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24753 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
24761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
24762 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
24763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
24765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
24766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
24767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
24768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
24770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
24776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
24777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
24778 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
24779 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
24780 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
24782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
24799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
24801 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24802 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24804 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
24807 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24808 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24809 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
24812 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24813 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24816 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
24817 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
24818 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
24819 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
24822 /* FMA4 and XOP. */
24823 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
24824 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
24825 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
24826 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
24827 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
24828 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
24829 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
24830 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
24831 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
24832 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
24833 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
24834 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
24835 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
24836 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
24837 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
24838 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
24839 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
24840 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
24841 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
24842 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
24843 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
24844 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
24845 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
24846 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
24847 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
24848 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
24849 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
24850 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
24851 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
24852 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
24853 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
24854 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
24855 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
24856 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
24857 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
24858 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
24859 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
24860 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
24861 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
24862 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
24863 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
24864 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
24865 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
24866 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
24867 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
24868 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
24869 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
24870 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
24871 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
24872 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
24873 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
24874 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
24876 static const struct builtin_description bdesc_multi_arg[] =
24878 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
24879 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
24880 UNKNOWN, (int)MULTI_ARG_3_SF },
24881 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
24882 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
24883 UNKNOWN, (int)MULTI_ARG_3_DF },
24885 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
24886 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
24887 UNKNOWN, (int)MULTI_ARG_3_SF },
24888 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
24889 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
24890 UNKNOWN, (int)MULTI_ARG_3_DF },
24891 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
24892 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
24893 UNKNOWN, (int)MULTI_ARG_3_SF2 },
24894 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
24895 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
24896 UNKNOWN, (int)MULTI_ARG_3_DF2 },
24898 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
24899 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
24900 UNKNOWN, (int)MULTI_ARG_3_SF },
24901 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
24902 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
24903 UNKNOWN, (int)MULTI_ARG_3_DF },
24904 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
24905 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
24906 UNKNOWN, (int)MULTI_ARG_3_SF2 },
24907 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
24908 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
24909 UNKNOWN, (int)MULTI_ARG_3_DF2 },
24911 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
24912 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
24913 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
24914 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
24915 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
24916 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
24917 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
24919 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
24920 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
24921 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
24922 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
24923 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
24924 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
24925 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
24927 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
24929 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
24930 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
24931 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24932 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24933 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
24934 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
24935 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24936 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24937 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24938 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24939 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24940 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24942 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24943 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
24944 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
24945 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
24946 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
24947 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
24948 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
24949 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
24950 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24951 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
24952 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
24953 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
24954 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24955 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
24956 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
24957 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
24959 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
24960 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
24961 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
24962 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
24963 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
24964 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
24966 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24967 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
24968 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
24969 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24970 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
24971 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24972 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24973 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
24974 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
24975 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24976 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
24977 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24978 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24979 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24980 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24982 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
24983 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
24984 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
24985 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
24986 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
24987 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
24988 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
24990 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
24991 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
24992 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
24993 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
24994 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
24995 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
24996 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
24998 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
24999 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25000 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25001 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25002 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25003 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25004 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25006 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25007 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25008 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25009 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25010 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25011 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25012 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25014 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25015 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25016 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25017 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25018 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25019 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25022 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25024 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25025 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25026 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25027 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25032 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25034 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25041 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25058 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25065 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25071 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25072 in the current target ISA to allow the user to compile particular modules
25073 with different target specific options that differ from the command line
25076 ix86_init_mmx_sse_builtins (void)
25078 const struct builtin_description * d;
25079 enum ix86_builtin_func_type ftype;
25082 /* Add all special builtins with variable number of operands. */
25083 for (i = 0, d = bdesc_special_args;
25084 i < ARRAY_SIZE (bdesc_special_args);
25090 ftype = (enum ix86_builtin_func_type) d->flag;
25091 def_builtin (d->mask, d->name, ftype, d->code);
25094 /* Add all builtins with variable number of operands. */
25095 for (i = 0, d = bdesc_args;
25096 i < ARRAY_SIZE (bdesc_args);
25102 ftype = (enum ix86_builtin_func_type) d->flag;
25103 def_builtin_const (d->mask, d->name, ftype, d->code);
25106 /* pcmpestr[im] insns. */
25107 for (i = 0, d = bdesc_pcmpestr;
25108 i < ARRAY_SIZE (bdesc_pcmpestr);
25111 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25112 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25114 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25115 def_builtin_const (d->mask, d->name, ftype, d->code);
25118 /* pcmpistr[im] insns. */
25119 for (i = 0, d = bdesc_pcmpistr;
25120 i < ARRAY_SIZE (bdesc_pcmpistr);
25123 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25124 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25126 ftype = INT_FTYPE_V16QI_V16QI_INT;
25127 def_builtin_const (d->mask, d->name, ftype, d->code);
25130 /* comi/ucomi insns. */
25131 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25133 if (d->mask == OPTION_MASK_ISA_SSE2)
25134 ftype = INT_FTYPE_V2DF_V2DF;
25136 ftype = INT_FTYPE_V4SF_V4SF;
25137 def_builtin_const (d->mask, d->name, ftype, d->code);
25141 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25142 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25143 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25144 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25146 /* SSE or 3DNow!A */
25147 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25148 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25149 IX86_BUILTIN_MASKMOVQ);
25152 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25153 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25155 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25156 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25157 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25158 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25161 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25162 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25163 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25164 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25167 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25168 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25169 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25170 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25171 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25172 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25173 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25174 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25175 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25176 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25177 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25178 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25181 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25182 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25185 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
25186 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
25187 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
25188 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
25189 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
25190 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
25191 IX86_BUILTIN_RDRAND64_STEP);
25193 /* MMX access to the vec_init patterns. */
25194 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25195 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25197 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25198 V4HI_FTYPE_HI_HI_HI_HI,
25199 IX86_BUILTIN_VEC_INIT_V4HI);
25201 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25202 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25203 IX86_BUILTIN_VEC_INIT_V8QI);
25205 /* Access to the vec_extract patterns. */
25206 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25207 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25208 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25209 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25210 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25211 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25212 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25213 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25214 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25215 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25217 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25218 "__builtin_ia32_vec_ext_v4hi",
25219 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25221 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25222 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25224 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25225 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25227 /* Access to the vec_set patterns. */
25228 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25229 "__builtin_ia32_vec_set_v2di",
25230 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25232 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25233 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25235 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25236 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25238 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25239 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25241 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25242 "__builtin_ia32_vec_set_v4hi",
25243 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25245 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25246 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25248 /* Add FMA4 multi-arg argument instructions */
25249 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25254 ftype = (enum ix86_builtin_func_type) d->flag;
25255 def_builtin_const (d->mask, d->name, ftype, d->code);
25259 /* Internal method for ix86_init_builtins. */
25262 ix86_init_builtins_va_builtins_abi (void)
25264 tree ms_va_ref, sysv_va_ref;
25265 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25266 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25267 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25268 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25272 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25273 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25274 ms_va_ref = build_reference_type (ms_va_list_type_node);
25276 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25279 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25280 fnvoid_va_start_ms =
25281 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25282 fnvoid_va_end_sysv =
25283 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25284 fnvoid_va_start_sysv =
25285 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25287 fnvoid_va_copy_ms =
25288 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25290 fnvoid_va_copy_sysv =
25291 build_function_type_list (void_type_node, sysv_va_ref,
25292 sysv_va_ref, NULL_TREE);
25294 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25295 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25296 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25297 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25298 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25299 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25300 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25301 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25302 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25303 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25304 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25305 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25309 ix86_init_builtin_types (void)
25311 tree float128_type_node, float80_type_node;
25313 /* The __float80 type. */
25314 float80_type_node = long_double_type_node;
25315 if (TYPE_MODE (float80_type_node) != XFmode)
25317 /* The __float80 type. */
25318 float80_type_node = make_node (REAL_TYPE);
25320 TYPE_PRECISION (float80_type_node) = 80;
25321 layout_type (float80_type_node);
25323 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25325 /* The __float128 type. */
25326 float128_type_node = make_node (REAL_TYPE);
25327 TYPE_PRECISION (float128_type_node) = 128;
25328 layout_type (float128_type_node);
25329 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25331 /* This macro is built by i386-builtin-types.awk. */
25332 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25336 ix86_init_builtins (void)
25340 ix86_init_builtin_types ();
25342 /* TFmode support builtins. */
25343 def_builtin_const (0, "__builtin_infq",
25344 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25345 def_builtin_const (0, "__builtin_huge_valq",
25346 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25348 /* We will expand them to normal call if SSE2 isn't available since
25349 they are used by libgcc. */
25350 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25351 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25352 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25353 TREE_READONLY (t) = 1;
25354 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25356 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25357 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25358 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25359 TREE_READONLY (t) = 1;
25360 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25362 ix86_init_mmx_sse_builtins ();
25365 ix86_init_builtins_va_builtins_abi ();
25367 #ifdef SUBTARGET_INIT_BUILTINS
25368 SUBTARGET_INIT_BUILTINS;
25372 /* Return the ix86 builtin for CODE. */
25375 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25377 if (code >= IX86_BUILTIN_MAX)
25378 return error_mark_node;
25380 return ix86_builtins[code];
25383 /* Errors in the source file can cause expand_expr to return const0_rtx
25384 where we expect a vector. To avoid crashing, use one of the vector
25385 clear instructions. */
25387 safe_vector_operand (rtx x, enum machine_mode mode)
25389 if (x == const0_rtx)
25390 x = CONST0_RTX (mode);
25394 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25397 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
25400 tree arg0 = CALL_EXPR_ARG (exp, 0);
25401 tree arg1 = CALL_EXPR_ARG (exp, 1);
25402 rtx op0 = expand_normal (arg0);
25403 rtx op1 = expand_normal (arg1);
25404 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25405 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25406 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25408 if (VECTOR_MODE_P (mode0))
25409 op0 = safe_vector_operand (op0, mode0);
25410 if (VECTOR_MODE_P (mode1))
25411 op1 = safe_vector_operand (op1, mode1);
25413 if (optimize || !target
25414 || GET_MODE (target) != tmode
25415 || !insn_data[icode].operand[0].predicate (target, tmode))
25416 target = gen_reg_rtx (tmode);
25418 if (GET_MODE (op1) == SImode && mode1 == TImode)
25420 rtx x = gen_reg_rtx (V4SImode);
25421 emit_insn (gen_sse2_loadd (x, op1));
25422 op1 = gen_lowpart (TImode, x);
25425 if (!insn_data[icode].operand[1].predicate (op0, mode0))
25426 op0 = copy_to_mode_reg (mode0, op0);
25427 if (!insn_data[icode].operand[2].predicate (op1, mode1))
25428 op1 = copy_to_mode_reg (mode1, op1);
25430 pat = GEN_FCN (icode) (target, op0, op1);
25439 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
25442 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
25443 enum ix86_builtin_func_type m_type,
25444 enum rtx_code sub_code)
25449 bool comparison_p = false;
25451 bool last_arg_constant = false;
25452 int num_memory = 0;
25455 enum machine_mode mode;
25458 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25462 case MULTI_ARG_4_DF2_DI_I:
25463 case MULTI_ARG_4_DF2_DI_I1:
25464 case MULTI_ARG_4_SF2_SI_I:
25465 case MULTI_ARG_4_SF2_SI_I1:
25467 last_arg_constant = true;
25470 case MULTI_ARG_3_SF:
25471 case MULTI_ARG_3_DF:
25472 case MULTI_ARG_3_SF2:
25473 case MULTI_ARG_3_DF2:
25474 case MULTI_ARG_3_DI:
25475 case MULTI_ARG_3_SI:
25476 case MULTI_ARG_3_SI_DI:
25477 case MULTI_ARG_3_HI:
25478 case MULTI_ARG_3_HI_SI:
25479 case MULTI_ARG_3_QI:
25480 case MULTI_ARG_3_DI2:
25481 case MULTI_ARG_3_SI2:
25482 case MULTI_ARG_3_HI2:
25483 case MULTI_ARG_3_QI2:
25487 case MULTI_ARG_2_SF:
25488 case MULTI_ARG_2_DF:
25489 case MULTI_ARG_2_DI:
25490 case MULTI_ARG_2_SI:
25491 case MULTI_ARG_2_HI:
25492 case MULTI_ARG_2_QI:
25496 case MULTI_ARG_2_DI_IMM:
25497 case MULTI_ARG_2_SI_IMM:
25498 case MULTI_ARG_2_HI_IMM:
25499 case MULTI_ARG_2_QI_IMM:
25501 last_arg_constant = true;
25504 case MULTI_ARG_1_SF:
25505 case MULTI_ARG_1_DF:
25506 case MULTI_ARG_1_SF2:
25507 case MULTI_ARG_1_DF2:
25508 case MULTI_ARG_1_DI:
25509 case MULTI_ARG_1_SI:
25510 case MULTI_ARG_1_HI:
25511 case MULTI_ARG_1_QI:
25512 case MULTI_ARG_1_SI_DI:
25513 case MULTI_ARG_1_HI_DI:
25514 case MULTI_ARG_1_HI_SI:
25515 case MULTI_ARG_1_QI_DI:
25516 case MULTI_ARG_1_QI_SI:
25517 case MULTI_ARG_1_QI_HI:
25521 case MULTI_ARG_2_DI_CMP:
25522 case MULTI_ARG_2_SI_CMP:
25523 case MULTI_ARG_2_HI_CMP:
25524 case MULTI_ARG_2_QI_CMP:
25526 comparison_p = true;
25529 case MULTI_ARG_2_SF_TF:
25530 case MULTI_ARG_2_DF_TF:
25531 case MULTI_ARG_2_DI_TF:
25532 case MULTI_ARG_2_SI_TF:
25533 case MULTI_ARG_2_HI_TF:
25534 case MULTI_ARG_2_QI_TF:
25540 gcc_unreachable ();
25543 if (optimize || !target
25544 || GET_MODE (target) != tmode
25545 || !insn_data[icode].operand[0].predicate (target, tmode))
25546 target = gen_reg_rtx (tmode);
25548 gcc_assert (nargs <= 4);
25550 for (i = 0; i < nargs; i++)
25552 tree arg = CALL_EXPR_ARG (exp, i);
25553 rtx op = expand_normal (arg);
25554 int adjust = (comparison_p) ? 1 : 0;
25555 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
25557 if (last_arg_constant && i == nargs - 1)
25559 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
25561 enum insn_code new_icode = icode;
25564 case CODE_FOR_xop_vpermil2v2df3:
25565 case CODE_FOR_xop_vpermil2v4sf3:
25566 case CODE_FOR_xop_vpermil2v4df3:
25567 case CODE_FOR_xop_vpermil2v8sf3:
25568 error ("the last argument must be a 2-bit immediate");
25569 return gen_reg_rtx (tmode);
25570 case CODE_FOR_xop_rotlv2di3:
25571 new_icode = CODE_FOR_rotlv2di3;
25573 case CODE_FOR_xop_rotlv4si3:
25574 new_icode = CODE_FOR_rotlv4si3;
25576 case CODE_FOR_xop_rotlv8hi3:
25577 new_icode = CODE_FOR_rotlv8hi3;
25579 case CODE_FOR_xop_rotlv16qi3:
25580 new_icode = CODE_FOR_rotlv16qi3;
25582 if (CONST_INT_P (op))
25584 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
25585 op = GEN_INT (INTVAL (op) & mask);
25586 gcc_checking_assert
25587 (insn_data[icode].operand[i + 1].predicate (op, mode));
25591 gcc_checking_assert
25593 && insn_data[new_icode].operand[0].mode == tmode
25594 && insn_data[new_icode].operand[1].mode == tmode
25595 && insn_data[new_icode].operand[2].mode == mode
25596 && insn_data[new_icode].operand[0].predicate
25597 == insn_data[icode].operand[0].predicate
25598 && insn_data[new_icode].operand[1].predicate
25599 == insn_data[icode].operand[1].predicate);
25605 gcc_unreachable ();
25612 if (VECTOR_MODE_P (mode))
25613 op = safe_vector_operand (op, mode);
25615 /* If we aren't optimizing, only allow one memory operand to be
25617 if (memory_operand (op, mode))
25620 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
25623 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
25625 op = force_reg (mode, op);
25629 args[i].mode = mode;
25635 pat = GEN_FCN (icode) (target, args[0].op);
25640 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
25641 GEN_INT ((int)sub_code));
25642 else if (! comparison_p)
25643 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25646 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
25650 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
25655 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25659 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
25663 gcc_unreachable ();
25673 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
25674 insns with vec_merge. */
25677 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
25681 tree arg0 = CALL_EXPR_ARG (exp, 0);
25682 rtx op1, op0 = expand_normal (arg0);
25683 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25684 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25686 if (optimize || !target
25687 || GET_MODE (target) != tmode
25688 || !insn_data[icode].operand[0].predicate (target, tmode))
25689 target = gen_reg_rtx (tmode);
25691 if (VECTOR_MODE_P (mode0))
25692 op0 = safe_vector_operand (op0, mode0);
25694 if ((optimize && !register_operand (op0, mode0))
25695 || !insn_data[icode].operand[1].predicate (op0, mode0))
25696 op0 = copy_to_mode_reg (mode0, op0);
25699 if (!insn_data[icode].operand[2].predicate (op1, mode0))
25700 op1 = copy_to_mode_reg (mode0, op1);
25702 pat = GEN_FCN (icode) (target, op0, op1);
25709 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
25712 ix86_expand_sse_compare (const struct builtin_description *d,
25713 tree exp, rtx target, bool swap)
25716 tree arg0 = CALL_EXPR_ARG (exp, 0);
25717 tree arg1 = CALL_EXPR_ARG (exp, 1);
25718 rtx op0 = expand_normal (arg0);
25719 rtx op1 = expand_normal (arg1);
25721 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
25722 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
25723 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
25724 enum rtx_code comparison = d->comparison;
25726 if (VECTOR_MODE_P (mode0))
25727 op0 = safe_vector_operand (op0, mode0);
25728 if (VECTOR_MODE_P (mode1))
25729 op1 = safe_vector_operand (op1, mode1);
25731 /* Swap operands if we have a comparison that isn't available in
25735 rtx tmp = gen_reg_rtx (mode1);
25736 emit_move_insn (tmp, op1);
25741 if (optimize || !target
25742 || GET_MODE (target) != tmode
25743 || !insn_data[d->icode].operand[0].predicate (target, tmode))
25744 target = gen_reg_rtx (tmode);
25746 if ((optimize && !register_operand (op0, mode0))
25747 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
25748 op0 = copy_to_mode_reg (mode0, op0);
25749 if ((optimize && !register_operand (op1, mode1))
25750 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
25751 op1 = copy_to_mode_reg (mode1, op1);
25753 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
25754 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
25761 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
25764 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
25768 tree arg0 = CALL_EXPR_ARG (exp, 0);
25769 tree arg1 = CALL_EXPR_ARG (exp, 1);
25770 rtx op0 = expand_normal (arg0);
25771 rtx op1 = expand_normal (arg1);
25772 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
25773 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
25774 enum rtx_code comparison = d->comparison;
25776 if (VECTOR_MODE_P (mode0))
25777 op0 = safe_vector_operand (op0, mode0);
25778 if (VECTOR_MODE_P (mode1))
25779 op1 = safe_vector_operand (op1, mode1);
25781 /* Swap operands if we have a comparison that isn't available in
25783 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
25790 target = gen_reg_rtx (SImode);
25791 emit_move_insn (target, const0_rtx);
25792 target = gen_rtx_SUBREG (QImode, target, 0);
25794 if ((optimize && !register_operand (op0, mode0))
25795 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25796 op0 = copy_to_mode_reg (mode0, op0);
25797 if ((optimize && !register_operand (op1, mode1))
25798 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
25799 op1 = copy_to_mode_reg (mode1, op1);
25801 pat = GEN_FCN (d->icode) (op0, op1);
25805 emit_insn (gen_rtx_SET (VOIDmode,
25806 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25807 gen_rtx_fmt_ee (comparison, QImode,
25811 return SUBREG_REG (target);
25814 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
25817 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
25821 tree arg0 = CALL_EXPR_ARG (exp, 0);
25822 rtx op1, op0 = expand_normal (arg0);
25823 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
25824 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
25826 if (optimize || target == 0
25827 || GET_MODE (target) != tmode
25828 || !insn_data[d->icode].operand[0].predicate (target, tmode))
25829 target = gen_reg_rtx (tmode);
25831 if (VECTOR_MODE_P (mode0))
25832 op0 = safe_vector_operand (op0, mode0);
25834 if ((optimize && !register_operand (op0, mode0))
25835 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25836 op0 = copy_to_mode_reg (mode0, op0);
25838 op1 = GEN_INT (d->comparison);
25840 pat = GEN_FCN (d->icode) (target, op0, op1);
25847 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
25850 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
25854 tree arg0 = CALL_EXPR_ARG (exp, 0);
25855 tree arg1 = CALL_EXPR_ARG (exp, 1);
25856 rtx op0 = expand_normal (arg0);
25857 rtx op1 = expand_normal (arg1);
25858 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
25859 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
25860 enum rtx_code comparison = d->comparison;
25862 if (VECTOR_MODE_P (mode0))
25863 op0 = safe_vector_operand (op0, mode0);
25864 if (VECTOR_MODE_P (mode1))
25865 op1 = safe_vector_operand (op1, mode1);
25867 target = gen_reg_rtx (SImode);
25868 emit_move_insn (target, const0_rtx);
25869 target = gen_rtx_SUBREG (QImode, target, 0);
25871 if ((optimize && !register_operand (op0, mode0))
25872 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25873 op0 = copy_to_mode_reg (mode0, op0);
25874 if ((optimize && !register_operand (op1, mode1))
25875 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
25876 op1 = copy_to_mode_reg (mode1, op1);
25878 pat = GEN_FCN (d->icode) (op0, op1);
25882 emit_insn (gen_rtx_SET (VOIDmode,
25883 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25884 gen_rtx_fmt_ee (comparison, QImode,
25888 return SUBREG_REG (target);
25891 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
25894 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
25895 tree exp, rtx target)
25898 tree arg0 = CALL_EXPR_ARG (exp, 0);
25899 tree arg1 = CALL_EXPR_ARG (exp, 1);
25900 tree arg2 = CALL_EXPR_ARG (exp, 2);
25901 tree arg3 = CALL_EXPR_ARG (exp, 3);
25902 tree arg4 = CALL_EXPR_ARG (exp, 4);
25903 rtx scratch0, scratch1;
25904 rtx op0 = expand_normal (arg0);
25905 rtx op1 = expand_normal (arg1);
25906 rtx op2 = expand_normal (arg2);
25907 rtx op3 = expand_normal (arg3);
25908 rtx op4 = expand_normal (arg4);
25909 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
25911 tmode0 = insn_data[d->icode].operand[0].mode;
25912 tmode1 = insn_data[d->icode].operand[1].mode;
25913 modev2 = insn_data[d->icode].operand[2].mode;
25914 modei3 = insn_data[d->icode].operand[3].mode;
25915 modev4 = insn_data[d->icode].operand[4].mode;
25916 modei5 = insn_data[d->icode].operand[5].mode;
25917 modeimm = insn_data[d->icode].operand[6].mode;
25919 if (VECTOR_MODE_P (modev2))
25920 op0 = safe_vector_operand (op0, modev2);
25921 if (VECTOR_MODE_P (modev4))
25922 op2 = safe_vector_operand (op2, modev4);
25924 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
25925 op0 = copy_to_mode_reg (modev2, op0);
25926 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
25927 op1 = copy_to_mode_reg (modei3, op1);
25928 if ((optimize && !register_operand (op2, modev4))
25929 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
25930 op2 = copy_to_mode_reg (modev4, op2);
25931 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
25932 op3 = copy_to_mode_reg (modei5, op3);
25934 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
25936 error ("the fifth argument must be an 8-bit immediate");
25940 if (d->code == IX86_BUILTIN_PCMPESTRI128)
25942 if (optimize || !target
25943 || GET_MODE (target) != tmode0
25944 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
25945 target = gen_reg_rtx (tmode0);
25947 scratch1 = gen_reg_rtx (tmode1);
25949 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
25951 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
25953 if (optimize || !target
25954 || GET_MODE (target) != tmode1
25955 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
25956 target = gen_reg_rtx (tmode1);
25958 scratch0 = gen_reg_rtx (tmode0);
25960 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
25964 gcc_assert (d->flag);
25966 scratch0 = gen_reg_rtx (tmode0);
25967 scratch1 = gen_reg_rtx (tmode1);
25969 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
25979 target = gen_reg_rtx (SImode);
25980 emit_move_insn (target, const0_rtx);
25981 target = gen_rtx_SUBREG (QImode, target, 0);
25984 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25985 gen_rtx_fmt_ee (EQ, QImode,
25986 gen_rtx_REG ((enum machine_mode) d->flag,
25989 return SUBREG_REG (target);
25996 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
25999 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
26000 tree exp, rtx target)
26003 tree arg0 = CALL_EXPR_ARG (exp, 0);
26004 tree arg1 = CALL_EXPR_ARG (exp, 1);
26005 tree arg2 = CALL_EXPR_ARG (exp, 2);
26006 rtx scratch0, scratch1;
26007 rtx op0 = expand_normal (arg0);
26008 rtx op1 = expand_normal (arg1);
26009 rtx op2 = expand_normal (arg2);
26010 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26012 tmode0 = insn_data[d->icode].operand[0].mode;
26013 tmode1 = insn_data[d->icode].operand[1].mode;
26014 modev2 = insn_data[d->icode].operand[2].mode;
26015 modev3 = insn_data[d->icode].operand[3].mode;
26016 modeimm = insn_data[d->icode].operand[4].mode;
26018 if (VECTOR_MODE_P (modev2))
26019 op0 = safe_vector_operand (op0, modev2);
26020 if (VECTOR_MODE_P (modev3))
26021 op1 = safe_vector_operand (op1, modev3);
26023 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26024 op0 = copy_to_mode_reg (modev2, op0);
26025 if ((optimize && !register_operand (op1, modev3))
26026 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26027 op1 = copy_to_mode_reg (modev3, op1);
26029 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26031 error ("the third argument must be an 8-bit immediate");
26035 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26037 if (optimize || !target
26038 || GET_MODE (target) != tmode0
26039 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26040 target = gen_reg_rtx (tmode0);
26042 scratch1 = gen_reg_rtx (tmode1);
26044 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26046 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26048 if (optimize || !target
26049 || GET_MODE (target) != tmode1
26050 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26051 target = gen_reg_rtx (tmode1);
26053 scratch0 = gen_reg_rtx (tmode0);
26055 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26059 gcc_assert (d->flag);
26061 scratch0 = gen_reg_rtx (tmode0);
26062 scratch1 = gen_reg_rtx (tmode1);
26064 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26074 target = gen_reg_rtx (SImode);
26075 emit_move_insn (target, const0_rtx);
26076 target = gen_rtx_SUBREG (QImode, target, 0);
26079 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26080 gen_rtx_fmt_ee (EQ, QImode,
26081 gen_rtx_REG ((enum machine_mode) d->flag,
26084 return SUBREG_REG (target);
26090 /* Subroutine of ix86_expand_builtin to take care of insns with
26091 variable number of operands. */
26094 ix86_expand_args_builtin (const struct builtin_description *d,
26095 tree exp, rtx target)
26097 rtx pat, real_target;
26098 unsigned int i, nargs;
26099 unsigned int nargs_constant = 0;
26100 int num_memory = 0;
26104 enum machine_mode mode;
26106 bool last_arg_count = false;
26107 enum insn_code icode = d->icode;
26108 const struct insn_data_d *insn_p = &insn_data[icode];
26109 enum machine_mode tmode = insn_p->operand[0].mode;
26110 enum machine_mode rmode = VOIDmode;
26112 enum rtx_code comparison = d->comparison;
26114 switch ((enum ix86_builtin_func_type) d->flag)
26116 case V2DF_FTYPE_V2DF_ROUND:
26117 case V4DF_FTYPE_V4DF_ROUND:
26118 case V4SF_FTYPE_V4SF_ROUND:
26119 case V8SF_FTYPE_V8SF_ROUND:
26120 return ix86_expand_sse_round (d, exp, target);
26121 case INT_FTYPE_V8SF_V8SF_PTEST:
26122 case INT_FTYPE_V4DI_V4DI_PTEST:
26123 case INT_FTYPE_V4DF_V4DF_PTEST:
26124 case INT_FTYPE_V4SF_V4SF_PTEST:
26125 case INT_FTYPE_V2DI_V2DI_PTEST:
26126 case INT_FTYPE_V2DF_V2DF_PTEST:
26127 return ix86_expand_sse_ptest (d, exp, target);
26128 case FLOAT128_FTYPE_FLOAT128:
26129 case FLOAT_FTYPE_FLOAT:
26130 case INT_FTYPE_INT:
26131 case UINT64_FTYPE_INT:
26132 case UINT16_FTYPE_UINT16:
26133 case INT64_FTYPE_INT64:
26134 case INT64_FTYPE_V4SF:
26135 case INT64_FTYPE_V2DF:
26136 case INT_FTYPE_V16QI:
26137 case INT_FTYPE_V8QI:
26138 case INT_FTYPE_V8SF:
26139 case INT_FTYPE_V4DF:
26140 case INT_FTYPE_V4SF:
26141 case INT_FTYPE_V2DF:
26142 case V16QI_FTYPE_V16QI:
26143 case V8SI_FTYPE_V8SF:
26144 case V8SI_FTYPE_V4SI:
26145 case V8HI_FTYPE_V8HI:
26146 case V8HI_FTYPE_V16QI:
26147 case V8QI_FTYPE_V8QI:
26148 case V8SF_FTYPE_V8SF:
26149 case V8SF_FTYPE_V8SI:
26150 case V8SF_FTYPE_V4SF:
26151 case V8SF_FTYPE_V8HI:
26152 case V4SI_FTYPE_V4SI:
26153 case V4SI_FTYPE_V16QI:
26154 case V4SI_FTYPE_V4SF:
26155 case V4SI_FTYPE_V8SI:
26156 case V4SI_FTYPE_V8HI:
26157 case V4SI_FTYPE_V4DF:
26158 case V4SI_FTYPE_V2DF:
26159 case V4HI_FTYPE_V4HI:
26160 case V4DF_FTYPE_V4DF:
26161 case V4DF_FTYPE_V4SI:
26162 case V4DF_FTYPE_V4SF:
26163 case V4DF_FTYPE_V2DF:
26164 case V4SF_FTYPE_V4SF:
26165 case V4SF_FTYPE_V4SI:
26166 case V4SF_FTYPE_V8SF:
26167 case V4SF_FTYPE_V4DF:
26168 case V4SF_FTYPE_V8HI:
26169 case V4SF_FTYPE_V2DF:
26170 case V2DI_FTYPE_V2DI:
26171 case V2DI_FTYPE_V16QI:
26172 case V2DI_FTYPE_V8HI:
26173 case V2DI_FTYPE_V4SI:
26174 case V2DF_FTYPE_V2DF:
26175 case V2DF_FTYPE_V4SI:
26176 case V2DF_FTYPE_V4DF:
26177 case V2DF_FTYPE_V4SF:
26178 case V2DF_FTYPE_V2SI:
26179 case V2SI_FTYPE_V2SI:
26180 case V2SI_FTYPE_V4SF:
26181 case V2SI_FTYPE_V2SF:
26182 case V2SI_FTYPE_V2DF:
26183 case V2SF_FTYPE_V2SF:
26184 case V2SF_FTYPE_V2SI:
26187 case V4SF_FTYPE_V4SF_VEC_MERGE:
26188 case V2DF_FTYPE_V2DF_VEC_MERGE:
26189 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26190 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26191 case V16QI_FTYPE_V16QI_V16QI:
26192 case V16QI_FTYPE_V8HI_V8HI:
26193 case V8QI_FTYPE_V8QI_V8QI:
26194 case V8QI_FTYPE_V4HI_V4HI:
26195 case V8HI_FTYPE_V8HI_V8HI:
26196 case V8HI_FTYPE_V16QI_V16QI:
26197 case V8HI_FTYPE_V4SI_V4SI:
26198 case V8SF_FTYPE_V8SF_V8SF:
26199 case V8SF_FTYPE_V8SF_V8SI:
26200 case V4SI_FTYPE_V4SI_V4SI:
26201 case V4SI_FTYPE_V8HI_V8HI:
26202 case V4SI_FTYPE_V4SF_V4SF:
26203 case V4SI_FTYPE_V2DF_V2DF:
26204 case V4HI_FTYPE_V4HI_V4HI:
26205 case V4HI_FTYPE_V8QI_V8QI:
26206 case V4HI_FTYPE_V2SI_V2SI:
26207 case V4DF_FTYPE_V4DF_V4DF:
26208 case V4DF_FTYPE_V4DF_V4DI:
26209 case V4SF_FTYPE_V4SF_V4SF:
26210 case V4SF_FTYPE_V4SF_V4SI:
26211 case V4SF_FTYPE_V4SF_V2SI:
26212 case V4SF_FTYPE_V4SF_V2DF:
26213 case V4SF_FTYPE_V4SF_DI:
26214 case V4SF_FTYPE_V4SF_SI:
26215 case V2DI_FTYPE_V2DI_V2DI:
26216 case V2DI_FTYPE_V16QI_V16QI:
26217 case V2DI_FTYPE_V4SI_V4SI:
26218 case V2DI_FTYPE_V2DI_V16QI:
26219 case V2DI_FTYPE_V2DF_V2DF:
26220 case V2SI_FTYPE_V2SI_V2SI:
26221 case V2SI_FTYPE_V4HI_V4HI:
26222 case V2SI_FTYPE_V2SF_V2SF:
26223 case V2DF_FTYPE_V2DF_V2DF:
26224 case V2DF_FTYPE_V2DF_V4SF:
26225 case V2DF_FTYPE_V2DF_V2DI:
26226 case V2DF_FTYPE_V2DF_DI:
26227 case V2DF_FTYPE_V2DF_SI:
26228 case V2SF_FTYPE_V2SF_V2SF:
26229 case V1DI_FTYPE_V1DI_V1DI:
26230 case V1DI_FTYPE_V8QI_V8QI:
26231 case V1DI_FTYPE_V2SI_V2SI:
26232 if (comparison == UNKNOWN)
26233 return ix86_expand_binop_builtin (icode, exp, target);
26236 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26237 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26238 gcc_assert (comparison != UNKNOWN);
26242 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26243 case V8HI_FTYPE_V8HI_SI_COUNT:
26244 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26245 case V4SI_FTYPE_V4SI_SI_COUNT:
26246 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26247 case V4HI_FTYPE_V4HI_SI_COUNT:
26248 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26249 case V2DI_FTYPE_V2DI_SI_COUNT:
26250 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26251 case V2SI_FTYPE_V2SI_SI_COUNT:
26252 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26253 case V1DI_FTYPE_V1DI_SI_COUNT:
26255 last_arg_count = true;
26257 case UINT64_FTYPE_UINT64_UINT64:
26258 case UINT_FTYPE_UINT_UINT:
26259 case UINT_FTYPE_UINT_USHORT:
26260 case UINT_FTYPE_UINT_UCHAR:
26261 case UINT16_FTYPE_UINT16_INT:
26262 case UINT8_FTYPE_UINT8_INT:
26265 case V2DI_FTYPE_V2DI_INT_CONVERT:
26268 nargs_constant = 1;
26270 case V8HI_FTYPE_V8HI_INT:
26271 case V8HI_FTYPE_V8SF_INT:
26272 case V8HI_FTYPE_V4SF_INT:
26273 case V8SF_FTYPE_V8SF_INT:
26274 case V4SI_FTYPE_V4SI_INT:
26275 case V4SI_FTYPE_V8SI_INT:
26276 case V4HI_FTYPE_V4HI_INT:
26277 case V4DF_FTYPE_V4DF_INT:
26278 case V4SF_FTYPE_V4SF_INT:
26279 case V4SF_FTYPE_V8SF_INT:
26280 case V2DI_FTYPE_V2DI_INT:
26281 case V2DF_FTYPE_V2DF_INT:
26282 case V2DF_FTYPE_V4DF_INT:
26284 nargs_constant = 1;
26286 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26287 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26288 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26289 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26290 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26293 case V16QI_FTYPE_V16QI_V16QI_INT:
26294 case V8HI_FTYPE_V8HI_V8HI_INT:
26295 case V8SI_FTYPE_V8SI_V8SI_INT:
26296 case V8SI_FTYPE_V8SI_V4SI_INT:
26297 case V8SF_FTYPE_V8SF_V8SF_INT:
26298 case V8SF_FTYPE_V8SF_V4SF_INT:
26299 case V4SI_FTYPE_V4SI_V4SI_INT:
26300 case V4DF_FTYPE_V4DF_V4DF_INT:
26301 case V4DF_FTYPE_V4DF_V2DF_INT:
26302 case V4SF_FTYPE_V4SF_V4SF_INT:
26303 case V2DI_FTYPE_V2DI_V2DI_INT:
26304 case V2DF_FTYPE_V2DF_V2DF_INT:
26306 nargs_constant = 1;
26308 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26311 nargs_constant = 1;
26313 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26316 nargs_constant = 1;
26318 case V2DI_FTYPE_V2DI_UINT_UINT:
26320 nargs_constant = 2;
26322 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26323 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26324 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26325 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26327 nargs_constant = 1;
26329 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26331 nargs_constant = 2;
26334 gcc_unreachable ();
26337 gcc_assert (nargs <= ARRAY_SIZE (args));
26339 if (comparison != UNKNOWN)
26341 gcc_assert (nargs == 2);
26342 return ix86_expand_sse_compare (d, exp, target, swap);
26345 if (rmode == VOIDmode || rmode == tmode)
26349 || GET_MODE (target) != tmode
26350 || !insn_p->operand[0].predicate (target, tmode))
26351 target = gen_reg_rtx (tmode);
26352 real_target = target;
26356 target = gen_reg_rtx (rmode);
26357 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26360 for (i = 0; i < nargs; i++)
26362 tree arg = CALL_EXPR_ARG (exp, i);
26363 rtx op = expand_normal (arg);
26364 enum machine_mode mode = insn_p->operand[i + 1].mode;
26365 bool match = insn_p->operand[i + 1].predicate (op, mode);
26367 if (last_arg_count && (i + 1) == nargs)
26369 /* SIMD shift insns take either an 8-bit immediate or
26370 register as count. But builtin functions take int as
26371 count. If count doesn't match, we put it in register. */
26374 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26375 if (!insn_p->operand[i + 1].predicate (op, mode))
26376 op = copy_to_reg (op);
26379 else if ((nargs - i) <= nargs_constant)
26384 case CODE_FOR_sse4_1_roundpd:
26385 case CODE_FOR_sse4_1_roundps:
26386 case CODE_FOR_sse4_1_roundsd:
26387 case CODE_FOR_sse4_1_roundss:
26388 case CODE_FOR_sse4_1_blendps:
26389 case CODE_FOR_avx_blendpd256:
26390 case CODE_FOR_avx_vpermilv4df:
26391 case CODE_FOR_avx_roundpd256:
26392 case CODE_FOR_avx_roundps256:
26393 error ("the last argument must be a 4-bit immediate");
26396 case CODE_FOR_sse4_1_blendpd:
26397 case CODE_FOR_avx_vpermilv2df:
26398 case CODE_FOR_xop_vpermil2v2df3:
26399 case CODE_FOR_xop_vpermil2v4sf3:
26400 case CODE_FOR_xop_vpermil2v4df3:
26401 case CODE_FOR_xop_vpermil2v8sf3:
26402 error ("the last argument must be a 2-bit immediate");
26405 case CODE_FOR_avx_vextractf128v4df:
26406 case CODE_FOR_avx_vextractf128v8sf:
26407 case CODE_FOR_avx_vextractf128v8si:
26408 case CODE_FOR_avx_vinsertf128v4df:
26409 case CODE_FOR_avx_vinsertf128v8sf:
26410 case CODE_FOR_avx_vinsertf128v8si:
26411 error ("the last argument must be a 1-bit immediate");
26414 case CODE_FOR_avx_vmcmpv2df3:
26415 case CODE_FOR_avx_vmcmpv4sf3:
26416 case CODE_FOR_avx_cmpv2df3:
26417 case CODE_FOR_avx_cmpv4sf3:
26418 case CODE_FOR_avx_cmpv4df3:
26419 case CODE_FOR_avx_cmpv8sf3:
26420 error ("the last argument must be a 5-bit immediate");
26424 switch (nargs_constant)
26427 if ((nargs - i) == nargs_constant)
26429 error ("the next to last argument must be an 8-bit immediate");
26433 error ("the last argument must be an 8-bit immediate");
26436 gcc_unreachable ();
26443 if (VECTOR_MODE_P (mode))
26444 op = safe_vector_operand (op, mode);
26446 /* If we aren't optimizing, only allow one memory operand to
26448 if (memory_operand (op, mode))
26451 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
26453 if (optimize || !match || num_memory > 1)
26454 op = copy_to_mode_reg (mode, op);
26458 op = copy_to_reg (op);
26459 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
26464 args[i].mode = mode;
26470 pat = GEN_FCN (icode) (real_target, args[0].op);
26473 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
26476 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26480 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26481 args[2].op, args[3].op);
26484 gcc_unreachable ();
26494 /* Subroutine of ix86_expand_builtin to take care of special insns
26495 with variable number of operands. */
26498 ix86_expand_special_args_builtin (const struct builtin_description *d,
26499 tree exp, rtx target)
26503 unsigned int i, nargs, arg_adjust, memory;
26507 enum machine_mode mode;
26509 enum insn_code icode = d->icode;
26510 bool last_arg_constant = false;
26511 const struct insn_data_d *insn_p = &insn_data[icode];
26512 enum machine_mode tmode = insn_p->operand[0].mode;
26513 enum { load, store } klass;
26515 switch ((enum ix86_builtin_func_type) d->flag)
26517 case VOID_FTYPE_VOID:
26518 if (icode == CODE_FOR_avx_vzeroupper)
26519 target = GEN_INT (vzeroupper_intrinsic);
26520 emit_insn (GEN_FCN (icode) (target));
26522 case VOID_FTYPE_UINT64:
26523 case VOID_FTYPE_UNSIGNED:
26529 case UINT64_FTYPE_VOID:
26530 case UNSIGNED_FTYPE_VOID:
26535 case UINT64_FTYPE_PUNSIGNED:
26536 case V2DI_FTYPE_PV2DI:
26537 case V32QI_FTYPE_PCCHAR:
26538 case V16QI_FTYPE_PCCHAR:
26539 case V8SF_FTYPE_PCV4SF:
26540 case V8SF_FTYPE_PCFLOAT:
26541 case V4SF_FTYPE_PCFLOAT:
26542 case V4DF_FTYPE_PCV2DF:
26543 case V4DF_FTYPE_PCDOUBLE:
26544 case V2DF_FTYPE_PCDOUBLE:
26545 case VOID_FTYPE_PVOID:
26550 case VOID_FTYPE_PV2SF_V4SF:
26551 case VOID_FTYPE_PV4DI_V4DI:
26552 case VOID_FTYPE_PV2DI_V2DI:
26553 case VOID_FTYPE_PCHAR_V32QI:
26554 case VOID_FTYPE_PCHAR_V16QI:
26555 case VOID_FTYPE_PFLOAT_V8SF:
26556 case VOID_FTYPE_PFLOAT_V4SF:
26557 case VOID_FTYPE_PDOUBLE_V4DF:
26558 case VOID_FTYPE_PDOUBLE_V2DF:
26559 case VOID_FTYPE_PULONGLONG_ULONGLONG:
26560 case VOID_FTYPE_PINT_INT:
26563 /* Reserve memory operand for target. */
26564 memory = ARRAY_SIZE (args);
26566 case V4SF_FTYPE_V4SF_PCV2SF:
26567 case V2DF_FTYPE_V2DF_PCDOUBLE:
26572 case V8SF_FTYPE_PCV8SF_V8SI:
26573 case V4DF_FTYPE_PCV4DF_V4DI:
26574 case V4SF_FTYPE_PCV4SF_V4SI:
26575 case V2DF_FTYPE_PCV2DF_V2DI:
26580 case VOID_FTYPE_PV8SF_V8SI_V8SF:
26581 case VOID_FTYPE_PV4DF_V4DI_V4DF:
26582 case VOID_FTYPE_PV4SF_V4SI_V4SF:
26583 case VOID_FTYPE_PV2DF_V2DI_V2DF:
26586 /* Reserve memory operand for target. */
26587 memory = ARRAY_SIZE (args);
26589 case VOID_FTYPE_UINT_UINT_UINT:
26590 case VOID_FTYPE_UINT64_UINT_UINT:
26591 case UCHAR_FTYPE_UINT_UINT_UINT:
26592 case UCHAR_FTYPE_UINT64_UINT_UINT:
26595 memory = ARRAY_SIZE (args);
26596 last_arg_constant = true;
26599 gcc_unreachable ();
26602 gcc_assert (nargs <= ARRAY_SIZE (args));
26604 if (klass == store)
26606 arg = CALL_EXPR_ARG (exp, 0);
26607 op = expand_normal (arg);
26608 gcc_assert (target == 0);
26610 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
26612 target = force_reg (tmode, op);
26620 || GET_MODE (target) != tmode
26621 || !insn_p->operand[0].predicate (target, tmode))
26622 target = gen_reg_rtx (tmode);
26625 for (i = 0; i < nargs; i++)
26627 enum machine_mode mode = insn_p->operand[i + 1].mode;
26630 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
26631 op = expand_normal (arg);
26632 match = insn_p->operand[i + 1].predicate (op, mode);
26634 if (last_arg_constant && (i + 1) == nargs)
26638 if (icode == CODE_FOR_lwp_lwpvalsi3
26639 || icode == CODE_FOR_lwp_lwpinssi3
26640 || icode == CODE_FOR_lwp_lwpvaldi3
26641 || icode == CODE_FOR_lwp_lwpinsdi3)
26642 error ("the last argument must be a 32-bit immediate");
26644 error ("the last argument must be an 8-bit immediate");
26652 /* This must be the memory operand. */
26653 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
26654 gcc_assert (GET_MODE (op) == mode
26655 || GET_MODE (op) == VOIDmode);
26659 /* This must be register. */
26660 if (VECTOR_MODE_P (mode))
26661 op = safe_vector_operand (op, mode);
26663 gcc_assert (GET_MODE (op) == mode
26664 || GET_MODE (op) == VOIDmode);
26665 op = copy_to_mode_reg (mode, op);
26670 args[i].mode = mode;
26676 pat = GEN_FCN (icode) (target);
26679 pat = GEN_FCN (icode) (target, args[0].op);
26682 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26685 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26688 gcc_unreachable ();
26694 return klass == store ? 0 : target;
26697 /* Return the integer constant in ARG. Constrain it to be in the range
26698 of the subparts of VEC_TYPE; issue an error if not. */
26701 get_element_number (tree vec_type, tree arg)
26703 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
26705 if (!host_integerp (arg, 1)
26706 || (elt = tree_low_cst (arg, 1), elt > max))
26708 error ("selector must be an integer constant in the range 0..%wi", max);
26715 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26716 ix86_expand_vector_init. We DO have language-level syntax for this, in
26717 the form of (type){ init-list }. Except that since we can't place emms
26718 instructions from inside the compiler, we can't allow the use of MMX
26719 registers unless the user explicitly asks for it. So we do *not* define
26720 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
26721 we have builtins invoked by mmintrin.h that gives us license to emit
26722 these sorts of instructions. */
26725 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
26727 enum machine_mode tmode = TYPE_MODE (type);
26728 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
26729 int i, n_elt = GET_MODE_NUNITS (tmode);
26730 rtvec v = rtvec_alloc (n_elt);
26732 gcc_assert (VECTOR_MODE_P (tmode));
26733 gcc_assert (call_expr_nargs (exp) == n_elt);
26735 for (i = 0; i < n_elt; ++i)
26737 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
26738 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
26741 if (!target || !register_operand (target, tmode))
26742 target = gen_reg_rtx (tmode);
26744 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
26748 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26749 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
26750 had a language-level syntax for referencing vector elements. */
26753 ix86_expand_vec_ext_builtin (tree exp, rtx target)
26755 enum machine_mode tmode, mode0;
26760 arg0 = CALL_EXPR_ARG (exp, 0);
26761 arg1 = CALL_EXPR_ARG (exp, 1);
26763 op0 = expand_normal (arg0);
26764 elt = get_element_number (TREE_TYPE (arg0), arg1);
26766 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26767 mode0 = TYPE_MODE (TREE_TYPE (arg0));
26768 gcc_assert (VECTOR_MODE_P (mode0));
26770 op0 = force_reg (mode0, op0);
26772 if (optimize || !target || !register_operand (target, tmode))
26773 target = gen_reg_rtx (tmode);
26775 ix86_expand_vector_extract (true, target, op0, elt);
26780 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26781 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
26782 a language-level syntax for referencing vector elements. */
26785 ix86_expand_vec_set_builtin (tree exp)
26787 enum machine_mode tmode, mode1;
26788 tree arg0, arg1, arg2;
26790 rtx op0, op1, target;
26792 arg0 = CALL_EXPR_ARG (exp, 0);
26793 arg1 = CALL_EXPR_ARG (exp, 1);
26794 arg2 = CALL_EXPR_ARG (exp, 2);
26796 tmode = TYPE_MODE (TREE_TYPE (arg0));
26797 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26798 gcc_assert (VECTOR_MODE_P (tmode));
26800 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
26801 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
26802 elt = get_element_number (TREE_TYPE (arg0), arg2);
26804 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
26805 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
26807 op0 = force_reg (tmode, op0);
26808 op1 = force_reg (mode1, op1);
26810 /* OP0 is the source of these builtin functions and shouldn't be
26811 modified. Create a copy, use it and return it as target. */
26812 target = gen_reg_rtx (tmode);
26813 emit_move_insn (target, op0);
26814 ix86_expand_vector_set (true, target, op1, elt);
26819 /* Expand an expression EXP that calls a built-in function,
26820 with result going to TARGET if that's convenient
26821 (and in mode MODE if that's convenient).
26822 SUBTARGET may be used as the target for computing one of EXP's operands.
26823 IGNORE is nonzero if the value is to be ignored. */
26826 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
26827 enum machine_mode mode ATTRIBUTE_UNUSED,
26828 int ignore ATTRIBUTE_UNUSED)
26830 const struct builtin_description *d;
26832 enum insn_code icode;
26833 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
26834 tree arg0, arg1, arg2;
26835 rtx op0, op1, op2, pat;
26836 enum machine_mode mode0, mode1, mode2;
26837 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
26839 /* Determine whether the builtin function is available under the current ISA.
26840 Originally the builtin was not created if it wasn't applicable to the
26841 current ISA based on the command line switches. With function specific
26842 options, we need to check in the context of the function making the call
26843 whether it is supported. */
26844 if (ix86_builtins_isa[fcode].isa
26845 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
26847 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
26848 NULL, (enum fpmath_unit) 0, false);
26851 error ("%qE needs unknown isa option", fndecl);
26854 gcc_assert (opts != NULL);
26855 error ("%qE needs isa option %s", fndecl, opts);
26863 case IX86_BUILTIN_MASKMOVQ:
26864 case IX86_BUILTIN_MASKMOVDQU:
26865 icode = (fcode == IX86_BUILTIN_MASKMOVQ
26866 ? CODE_FOR_mmx_maskmovq
26867 : CODE_FOR_sse2_maskmovdqu);
26868 /* Note the arg order is different from the operand order. */
26869 arg1 = CALL_EXPR_ARG (exp, 0);
26870 arg2 = CALL_EXPR_ARG (exp, 1);
26871 arg0 = CALL_EXPR_ARG (exp, 2);
26872 op0 = expand_normal (arg0);
26873 op1 = expand_normal (arg1);
26874 op2 = expand_normal (arg2);
26875 mode0 = insn_data[icode].operand[0].mode;
26876 mode1 = insn_data[icode].operand[1].mode;
26877 mode2 = insn_data[icode].operand[2].mode;
26879 op0 = force_reg (Pmode, op0);
26880 op0 = gen_rtx_MEM (mode1, op0);
26882 if (!insn_data[icode].operand[0].predicate (op0, mode0))
26883 op0 = copy_to_mode_reg (mode0, op0);
26884 if (!insn_data[icode].operand[1].predicate (op1, mode1))
26885 op1 = copy_to_mode_reg (mode1, op1);
26886 if (!insn_data[icode].operand[2].predicate (op2, mode2))
26887 op2 = copy_to_mode_reg (mode2, op2);
26888 pat = GEN_FCN (icode) (op0, op1, op2);
26894 case IX86_BUILTIN_LDMXCSR:
26895 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
26896 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
26897 emit_move_insn (target, op0);
26898 emit_insn (gen_sse_ldmxcsr (target));
26901 case IX86_BUILTIN_STMXCSR:
26902 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
26903 emit_insn (gen_sse_stmxcsr (target));
26904 return copy_to_mode_reg (SImode, target);
26906 case IX86_BUILTIN_CLFLUSH:
26907 arg0 = CALL_EXPR_ARG (exp, 0);
26908 op0 = expand_normal (arg0);
26909 icode = CODE_FOR_sse2_clflush;
26910 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
26911 op0 = copy_to_mode_reg (Pmode, op0);
26913 emit_insn (gen_sse2_clflush (op0));
26916 case IX86_BUILTIN_MONITOR:
26917 arg0 = CALL_EXPR_ARG (exp, 0);
26918 arg1 = CALL_EXPR_ARG (exp, 1);
26919 arg2 = CALL_EXPR_ARG (exp, 2);
26920 op0 = expand_normal (arg0);
26921 op1 = expand_normal (arg1);
26922 op2 = expand_normal (arg2);
26924 op0 = copy_to_mode_reg (Pmode, op0);
26926 op1 = copy_to_mode_reg (SImode, op1);
26928 op2 = copy_to_mode_reg (SImode, op2);
26929 emit_insn (ix86_gen_monitor (op0, op1, op2));
26932 case IX86_BUILTIN_MWAIT:
26933 arg0 = CALL_EXPR_ARG (exp, 0);
26934 arg1 = CALL_EXPR_ARG (exp, 1);
26935 op0 = expand_normal (arg0);
26936 op1 = expand_normal (arg1);
26938 op0 = copy_to_mode_reg (SImode, op0);
26940 op1 = copy_to_mode_reg (SImode, op1);
26941 emit_insn (gen_sse3_mwait (op0, op1));
26944 case IX86_BUILTIN_VEC_INIT_V2SI:
26945 case IX86_BUILTIN_VEC_INIT_V4HI:
26946 case IX86_BUILTIN_VEC_INIT_V8QI:
26947 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
26949 case IX86_BUILTIN_VEC_EXT_V2DF:
26950 case IX86_BUILTIN_VEC_EXT_V2DI:
26951 case IX86_BUILTIN_VEC_EXT_V4SF:
26952 case IX86_BUILTIN_VEC_EXT_V4SI:
26953 case IX86_BUILTIN_VEC_EXT_V8HI:
26954 case IX86_BUILTIN_VEC_EXT_V2SI:
26955 case IX86_BUILTIN_VEC_EXT_V4HI:
26956 case IX86_BUILTIN_VEC_EXT_V16QI:
26957 return ix86_expand_vec_ext_builtin (exp, target);
26959 case IX86_BUILTIN_VEC_SET_V2DI:
26960 case IX86_BUILTIN_VEC_SET_V4SF:
26961 case IX86_BUILTIN_VEC_SET_V4SI:
26962 case IX86_BUILTIN_VEC_SET_V8HI:
26963 case IX86_BUILTIN_VEC_SET_V4HI:
26964 case IX86_BUILTIN_VEC_SET_V16QI:
26965 return ix86_expand_vec_set_builtin (exp);
26967 case IX86_BUILTIN_VEC_PERM_V2DF:
26968 case IX86_BUILTIN_VEC_PERM_V4SF:
26969 case IX86_BUILTIN_VEC_PERM_V2DI:
26970 case IX86_BUILTIN_VEC_PERM_V4SI:
26971 case IX86_BUILTIN_VEC_PERM_V8HI:
26972 case IX86_BUILTIN_VEC_PERM_V16QI:
26973 case IX86_BUILTIN_VEC_PERM_V2DI_U:
26974 case IX86_BUILTIN_VEC_PERM_V4SI_U:
26975 case IX86_BUILTIN_VEC_PERM_V8HI_U:
26976 case IX86_BUILTIN_VEC_PERM_V16QI_U:
26977 case IX86_BUILTIN_VEC_PERM_V4DF:
26978 case IX86_BUILTIN_VEC_PERM_V8SF:
26979 return ix86_expand_vec_perm_builtin (exp);
26981 case IX86_BUILTIN_INFQ:
26982 case IX86_BUILTIN_HUGE_VALQ:
26984 REAL_VALUE_TYPE inf;
26988 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
26990 tmp = validize_mem (force_const_mem (mode, tmp));
26993 target = gen_reg_rtx (mode);
26995 emit_move_insn (target, tmp);
26999 case IX86_BUILTIN_LLWPCB:
27000 arg0 = CALL_EXPR_ARG (exp, 0);
27001 op0 = expand_normal (arg0);
27002 icode = CODE_FOR_lwp_llwpcb;
27003 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27004 op0 = copy_to_mode_reg (Pmode, op0);
27005 emit_insn (gen_lwp_llwpcb (op0));
27008 case IX86_BUILTIN_SLWPCB:
27009 icode = CODE_FOR_lwp_slwpcb;
27011 || !insn_data[icode].operand[0].predicate (target, Pmode))
27012 target = gen_reg_rtx (Pmode);
27013 emit_insn (gen_lwp_slwpcb (target));
27016 case IX86_BUILTIN_BEXTRI32:
27017 case IX86_BUILTIN_BEXTRI64:
27018 arg0 = CALL_EXPR_ARG (exp, 0);
27019 arg1 = CALL_EXPR_ARG (exp, 1);
27020 op0 = expand_normal (arg0);
27021 op1 = expand_normal (arg1);
27022 icode = (fcode == IX86_BUILTIN_BEXTRI32
27023 ? CODE_FOR_tbm_bextri_si
27024 : CODE_FOR_tbm_bextri_di);
27025 if (!CONST_INT_P (op1))
27027 error ("last argument must be an immediate");
27032 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
27033 unsigned char lsb_index = INTVAL (op1) & 0xFF;
27034 op1 = GEN_INT (length);
27035 op2 = GEN_INT (lsb_index);
27036 pat = GEN_FCN (icode) (target, op0, op1, op2);
27042 case IX86_BUILTIN_RDRAND16_STEP:
27043 icode = CODE_FOR_rdrandhi_1;
27047 case IX86_BUILTIN_RDRAND32_STEP:
27048 icode = CODE_FOR_rdrandsi_1;
27052 case IX86_BUILTIN_RDRAND64_STEP:
27053 icode = CODE_FOR_rdranddi_1;
27057 op0 = gen_reg_rtx (mode0);
27058 emit_insn (GEN_FCN (icode) (op0));
27060 arg0 = CALL_EXPR_ARG (exp, 0);
27061 op1 = expand_normal (arg0);
27062 if (!address_operand (op1, VOIDmode))
27063 op1 = copy_addr_to_reg (op1);
27064 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
27066 op1 = gen_reg_rtx (SImode);
27067 emit_move_insn (op1, CONST1_RTX (SImode));
27069 /* Emit SImode conditional move. */
27070 if (mode0 == HImode)
27072 op2 = gen_reg_rtx (SImode);
27073 emit_insn (gen_zero_extendhisi2 (op2, op0));
27075 else if (mode0 == SImode)
27078 op2 = gen_rtx_SUBREG (SImode, op0, 0);
27081 target = gen_reg_rtx (SImode);
27083 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
27085 emit_insn (gen_rtx_SET (VOIDmode, target,
27086 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
27093 for (i = 0, d = bdesc_special_args;
27094 i < ARRAY_SIZE (bdesc_special_args);
27096 if (d->code == fcode)
27097 return ix86_expand_special_args_builtin (d, exp, target);
27099 for (i = 0, d = bdesc_args;
27100 i < ARRAY_SIZE (bdesc_args);
27102 if (d->code == fcode)
27105 case IX86_BUILTIN_FABSQ:
27106 case IX86_BUILTIN_COPYSIGNQ:
27108 /* Emit a normal call if SSE2 isn't available. */
27109 return expand_call (exp, target, ignore);
27111 return ix86_expand_args_builtin (d, exp, target);
27114 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27115 if (d->code == fcode)
27116 return ix86_expand_sse_comi (d, exp, target);
27118 for (i = 0, d = bdesc_pcmpestr;
27119 i < ARRAY_SIZE (bdesc_pcmpestr);
27121 if (d->code == fcode)
27122 return ix86_expand_sse_pcmpestr (d, exp, target);
27124 for (i = 0, d = bdesc_pcmpistr;
27125 i < ARRAY_SIZE (bdesc_pcmpistr);
27127 if (d->code == fcode)
27128 return ix86_expand_sse_pcmpistr (d, exp, target);
27130 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27131 if (d->code == fcode)
27132 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27133 (enum ix86_builtin_func_type)
27134 d->flag, d->comparison);
27136 gcc_unreachable ();
27139 /* Returns a function decl for a vectorized version of the builtin function
27140 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27141 if it is not available. */
27144 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27147 enum machine_mode in_mode, out_mode;
27149 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27151 if (TREE_CODE (type_out) != VECTOR_TYPE
27152 || TREE_CODE (type_in) != VECTOR_TYPE
27153 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27156 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27157 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27158 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27159 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27163 case BUILT_IN_SQRT:
27164 if (out_mode == DFmode && in_mode == DFmode)
27166 if (out_n == 2 && in_n == 2)
27167 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27168 else if (out_n == 4 && in_n == 4)
27169 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27173 case BUILT_IN_SQRTF:
27174 if (out_mode == SFmode && in_mode == SFmode)
27176 if (out_n == 4 && in_n == 4)
27177 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27178 else if (out_n == 8 && in_n == 8)
27179 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27183 case BUILT_IN_LRINT:
27184 if (out_mode == SImode && out_n == 4
27185 && in_mode == DFmode && in_n == 2)
27186 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27189 case BUILT_IN_LRINTF:
27190 if (out_mode == SImode && in_mode == SFmode)
27192 if (out_n == 4 && in_n == 4)
27193 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27194 else if (out_n == 8 && in_n == 8)
27195 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27199 case BUILT_IN_COPYSIGN:
27200 if (out_mode == DFmode && in_mode == DFmode)
27202 if (out_n == 2 && in_n == 2)
27203 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27204 else if (out_n == 4 && in_n == 4)
27205 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27209 case BUILT_IN_COPYSIGNF:
27210 if (out_mode == SFmode && in_mode == SFmode)
27212 if (out_n == 4 && in_n == 4)
27213 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27214 else if (out_n == 8 && in_n == 8)
27215 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27219 case BUILT_IN_FLOOR:
27220 /* The round insn does not trap on denormals. */
27221 if (flag_trapping_math || !TARGET_ROUND)
27224 if (out_mode == DFmode && in_mode == DFmode)
27226 if (out_n == 2 && in_n == 2)
27227 return ix86_builtins[IX86_BUILTIN_FLOORPD];
27228 else if (out_n == 4 && in_n == 4)
27229 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
27233 case BUILT_IN_FLOORF:
27234 /* The round insn does not trap on denormals. */
27235 if (flag_trapping_math || !TARGET_ROUND)
27238 if (out_mode == SFmode && in_mode == SFmode)
27240 if (out_n == 4 && in_n == 4)
27241 return ix86_builtins[IX86_BUILTIN_FLOORPS];
27242 else if (out_n == 8 && in_n == 8)
27243 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
27247 case BUILT_IN_CEIL:
27248 /* The round insn does not trap on denormals. */
27249 if (flag_trapping_math || !TARGET_ROUND)
27252 if (out_mode == DFmode && in_mode == DFmode)
27254 if (out_n == 2 && in_n == 2)
27255 return ix86_builtins[IX86_BUILTIN_CEILPD];
27256 else if (out_n == 4 && in_n == 4)
27257 return ix86_builtins[IX86_BUILTIN_CEILPD256];
27261 case BUILT_IN_CEILF:
27262 /* The round insn does not trap on denormals. */
27263 if (flag_trapping_math || !TARGET_ROUND)
27266 if (out_mode == SFmode && in_mode == SFmode)
27268 if (out_n == 4 && in_n == 4)
27269 return ix86_builtins[IX86_BUILTIN_CEILPS];
27270 else if (out_n == 8 && in_n == 8)
27271 return ix86_builtins[IX86_BUILTIN_CEILPS256];
27275 case BUILT_IN_TRUNC:
27276 /* The round insn does not trap on denormals. */
27277 if (flag_trapping_math || !TARGET_ROUND)
27280 if (out_mode == DFmode && in_mode == DFmode)
27282 if (out_n == 2 && in_n == 2)
27283 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
27284 else if (out_n == 4 && in_n == 4)
27285 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
27289 case BUILT_IN_TRUNCF:
27290 /* The round insn does not trap on denormals. */
27291 if (flag_trapping_math || !TARGET_ROUND)
27294 if (out_mode == SFmode && in_mode == SFmode)
27296 if (out_n == 4 && in_n == 4)
27297 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
27298 else if (out_n == 8 && in_n == 8)
27299 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
27303 case BUILT_IN_RINT:
27304 /* The round insn does not trap on denormals. */
27305 if (flag_trapping_math || !TARGET_ROUND)
27308 if (out_mode == DFmode && in_mode == DFmode)
27310 if (out_n == 2 && in_n == 2)
27311 return ix86_builtins[IX86_BUILTIN_RINTPD];
27312 else if (out_n == 4 && in_n == 4)
27313 return ix86_builtins[IX86_BUILTIN_RINTPD256];
27317 case BUILT_IN_RINTF:
27318 /* The round insn does not trap on denormals. */
27319 if (flag_trapping_math || !TARGET_ROUND)
27322 if (out_mode == SFmode && in_mode == SFmode)
27324 if (out_n == 4 && in_n == 4)
27325 return ix86_builtins[IX86_BUILTIN_RINTPS];
27326 else if (out_n == 8 && in_n == 8)
27327 return ix86_builtins[IX86_BUILTIN_RINTPS256];
27332 if (out_mode == DFmode && in_mode == DFmode)
27334 if (out_n == 2 && in_n == 2)
27335 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27336 if (out_n == 4 && in_n == 4)
27337 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27341 case BUILT_IN_FMAF:
27342 if (out_mode == SFmode && in_mode == SFmode)
27344 if (out_n == 4 && in_n == 4)
27345 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27346 if (out_n == 8 && in_n == 8)
27347 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27355 /* Dispatch to a handler for a vectorization library. */
27356 if (ix86_veclib_handler)
27357 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27363 /* Handler for an SVML-style interface to
27364 a library with vectorized intrinsics. */
27367 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27370 tree fntype, new_fndecl, args;
27373 enum machine_mode el_mode, in_mode;
27376 /* The SVML is suitable for unsafe math only. */
27377 if (!flag_unsafe_math_optimizations)
27380 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27381 n = TYPE_VECTOR_SUBPARTS (type_out);
27382 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27383 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27384 if (el_mode != in_mode
27392 case BUILT_IN_LOG10:
27394 case BUILT_IN_TANH:
27396 case BUILT_IN_ATAN:
27397 case BUILT_IN_ATAN2:
27398 case BUILT_IN_ATANH:
27399 case BUILT_IN_CBRT:
27400 case BUILT_IN_SINH:
27402 case BUILT_IN_ASINH:
27403 case BUILT_IN_ASIN:
27404 case BUILT_IN_COSH:
27406 case BUILT_IN_ACOSH:
27407 case BUILT_IN_ACOS:
27408 if (el_mode != DFmode || n != 2)
27412 case BUILT_IN_EXPF:
27413 case BUILT_IN_LOGF:
27414 case BUILT_IN_LOG10F:
27415 case BUILT_IN_POWF:
27416 case BUILT_IN_TANHF:
27417 case BUILT_IN_TANF:
27418 case BUILT_IN_ATANF:
27419 case BUILT_IN_ATAN2F:
27420 case BUILT_IN_ATANHF:
27421 case BUILT_IN_CBRTF:
27422 case BUILT_IN_SINHF:
27423 case BUILT_IN_SINF:
27424 case BUILT_IN_ASINHF:
27425 case BUILT_IN_ASINF:
27426 case BUILT_IN_COSHF:
27427 case BUILT_IN_COSF:
27428 case BUILT_IN_ACOSHF:
27429 case BUILT_IN_ACOSF:
27430 if (el_mode != SFmode || n != 4)
27438 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27440 if (fn == BUILT_IN_LOGF)
27441 strcpy (name, "vmlsLn4");
27442 else if (fn == BUILT_IN_LOG)
27443 strcpy (name, "vmldLn2");
27446 sprintf (name, "vmls%s", bname+10);
27447 name[strlen (name)-1] = '4';
27450 sprintf (name, "vmld%s2", bname+10);
27452 /* Convert to uppercase. */
27456 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27457 args = TREE_CHAIN (args))
27461 fntype = build_function_type_list (type_out, type_in, NULL);
27463 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27465 /* Build a function declaration for the vectorized function. */
27466 new_fndecl = build_decl (BUILTINS_LOCATION,
27467 FUNCTION_DECL, get_identifier (name), fntype);
27468 TREE_PUBLIC (new_fndecl) = 1;
27469 DECL_EXTERNAL (new_fndecl) = 1;
27470 DECL_IS_NOVOPS (new_fndecl) = 1;
27471 TREE_READONLY (new_fndecl) = 1;
27476 /* Handler for an ACML-style interface to
27477 a library with vectorized intrinsics. */
27480 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
27482 char name[20] = "__vr.._";
27483 tree fntype, new_fndecl, args;
27486 enum machine_mode el_mode, in_mode;
27489 /* The ACML is 64bits only and suitable for unsafe math only as
27490 it does not correctly support parts of IEEE with the required
27491 precision such as denormals. */
27493 || !flag_unsafe_math_optimizations)
27496 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27497 n = TYPE_VECTOR_SUBPARTS (type_out);
27498 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27499 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27500 if (el_mode != in_mode
27510 case BUILT_IN_LOG2:
27511 case BUILT_IN_LOG10:
27514 if (el_mode != DFmode
27519 case BUILT_IN_SINF:
27520 case BUILT_IN_COSF:
27521 case BUILT_IN_EXPF:
27522 case BUILT_IN_POWF:
27523 case BUILT_IN_LOGF:
27524 case BUILT_IN_LOG2F:
27525 case BUILT_IN_LOG10F:
27528 if (el_mode != SFmode
27537 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27538 sprintf (name + 7, "%s", bname+10);
27541 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27542 args = TREE_CHAIN (args))
27546 fntype = build_function_type_list (type_out, type_in, NULL);
27548 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27550 /* Build a function declaration for the vectorized function. */
27551 new_fndecl = build_decl (BUILTINS_LOCATION,
27552 FUNCTION_DECL, get_identifier (name), fntype);
27553 TREE_PUBLIC (new_fndecl) = 1;
27554 DECL_EXTERNAL (new_fndecl) = 1;
27555 DECL_IS_NOVOPS (new_fndecl) = 1;
27556 TREE_READONLY (new_fndecl) = 1;
27562 /* Returns a decl of a function that implements conversion of an integer vector
27563 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
27564 are the types involved when converting according to CODE.
27565 Return NULL_TREE if it is not available. */
27568 ix86_vectorize_builtin_conversion (unsigned int code,
27569 tree dest_type, tree src_type)
27577 switch (TYPE_MODE (src_type))
27580 switch (TYPE_MODE (dest_type))
27583 return (TYPE_UNSIGNED (src_type)
27584 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
27585 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
27587 return (TYPE_UNSIGNED (src_type)
27589 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
27595 switch (TYPE_MODE (dest_type))
27598 return (TYPE_UNSIGNED (src_type)
27600 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
27609 case FIX_TRUNC_EXPR:
27610 switch (TYPE_MODE (dest_type))
27613 switch (TYPE_MODE (src_type))
27616 return (TYPE_UNSIGNED (dest_type)
27618 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
27620 return (TYPE_UNSIGNED (dest_type)
27622 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
27629 switch (TYPE_MODE (src_type))
27632 return (TYPE_UNSIGNED (dest_type)
27634 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
27651 /* Returns a code for a target-specific builtin that implements
27652 reciprocal of the function, or NULL_TREE if not available. */
27655 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
27656 bool sqrt ATTRIBUTE_UNUSED)
27658 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
27659 && flag_finite_math_only && !flag_trapping_math
27660 && flag_unsafe_math_optimizations))
27664 /* Machine dependent builtins. */
27667 /* Vectorized version of sqrt to rsqrt conversion. */
27668 case IX86_BUILTIN_SQRTPS_NR:
27669 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
27671 case IX86_BUILTIN_SQRTPS_NR256:
27672 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
27678 /* Normal builtins. */
27681 /* Sqrt to rsqrt conversion. */
27682 case BUILT_IN_SQRTF:
27683 return ix86_builtins[IX86_BUILTIN_RSQRTF];
27690 /* Helper for avx_vpermilps256_operand et al. This is also used by
27691 the expansion functions to turn the parallel back into a mask.
27692 The return value is 0 for no match and the imm8+1 for a match. */
27695 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
27697 unsigned i, nelt = GET_MODE_NUNITS (mode);
27699 unsigned char ipar[8];
27701 if (XVECLEN (par, 0) != (int) nelt)
27704 /* Validate that all of the elements are constants, and not totally
27705 out of range. Copy the data into an integral array to make the
27706 subsequent checks easier. */
27707 for (i = 0; i < nelt; ++i)
27709 rtx er = XVECEXP (par, 0, i);
27710 unsigned HOST_WIDE_INT ei;
27712 if (!CONST_INT_P (er))
27723 /* In the 256-bit DFmode case, we can only move elements within
27725 for (i = 0; i < 2; ++i)
27729 mask |= ipar[i] << i;
27731 for (i = 2; i < 4; ++i)
27735 mask |= (ipar[i] - 2) << i;
27740 /* In the 256-bit SFmode case, we have full freedom of movement
27741 within the low 128-bit lane, but the high 128-bit lane must
27742 mirror the exact same pattern. */
27743 for (i = 0; i < 4; ++i)
27744 if (ipar[i] + 4 != ipar[i + 4])
27751 /* In the 128-bit case, we've full freedom in the placement of
27752 the elements from the source operand. */
27753 for (i = 0; i < nelt; ++i)
27754 mask |= ipar[i] << (i * (nelt / 2));
27758 gcc_unreachable ();
27761 /* Make sure success has a non-zero value by adding one. */
27765 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
27766 the expansion functions to turn the parallel back into a mask.
27767 The return value is 0 for no match and the imm8+1 for a match. */
27770 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
27772 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
27774 unsigned char ipar[8];
27776 if (XVECLEN (par, 0) != (int) nelt)
27779 /* Validate that all of the elements are constants, and not totally
27780 out of range. Copy the data into an integral array to make the
27781 subsequent checks easier. */
27782 for (i = 0; i < nelt; ++i)
27784 rtx er = XVECEXP (par, 0, i);
27785 unsigned HOST_WIDE_INT ei;
27787 if (!CONST_INT_P (er))
27790 if (ei >= 2 * nelt)
27795 /* Validate that the halves of the permute are halves. */
27796 for (i = 0; i < nelt2 - 1; ++i)
27797 if (ipar[i] + 1 != ipar[i + 1])
27799 for (i = nelt2; i < nelt - 1; ++i)
27800 if (ipar[i] + 1 != ipar[i + 1])
27803 /* Reconstruct the mask. */
27804 for (i = 0; i < 2; ++i)
27806 unsigned e = ipar[i * nelt2];
27810 mask |= e << (i * 4);
27813 /* Make sure success has a non-zero value by adding one. */
27818 /* Store OPERAND to the memory after reload is completed. This means
27819 that we can't easily use assign_stack_local. */
27821 ix86_force_to_memory (enum machine_mode mode, rtx operand)
27825 gcc_assert (reload_completed);
27826 if (ix86_using_red_zone ())
27828 result = gen_rtx_MEM (mode,
27829 gen_rtx_PLUS (Pmode,
27831 GEN_INT (-RED_ZONE_SIZE)));
27832 emit_move_insn (result, operand);
27834 else if (TARGET_64BIT)
27840 operand = gen_lowpart (DImode, operand);
27844 gen_rtx_SET (VOIDmode,
27845 gen_rtx_MEM (DImode,
27846 gen_rtx_PRE_DEC (DImode,
27847 stack_pointer_rtx)),
27851 gcc_unreachable ();
27853 result = gen_rtx_MEM (mode, stack_pointer_rtx);
27862 split_double_mode (mode, &operand, 1, operands, operands + 1);
27864 gen_rtx_SET (VOIDmode,
27865 gen_rtx_MEM (SImode,
27866 gen_rtx_PRE_DEC (Pmode,
27867 stack_pointer_rtx)),
27870 gen_rtx_SET (VOIDmode,
27871 gen_rtx_MEM (SImode,
27872 gen_rtx_PRE_DEC (Pmode,
27873 stack_pointer_rtx)),
27878 /* Store HImodes as SImodes. */
27879 operand = gen_lowpart (SImode, operand);
27883 gen_rtx_SET (VOIDmode,
27884 gen_rtx_MEM (GET_MODE (operand),
27885 gen_rtx_PRE_DEC (SImode,
27886 stack_pointer_rtx)),
27890 gcc_unreachable ();
27892 result = gen_rtx_MEM (mode, stack_pointer_rtx);
27897 /* Free operand from the memory. */
27899 ix86_free_from_memory (enum machine_mode mode)
27901 if (!ix86_using_red_zone ())
27905 if (mode == DImode || TARGET_64BIT)
27909 /* Use LEA to deallocate stack space. In peephole2 it will be converted
27910 to pop or add instruction if registers are available. */
27911 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27912 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
27917 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
27919 Put float CONST_DOUBLE in the constant pool instead of fp regs.
27920 QImode must go into class Q_REGS.
27921 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
27922 movdf to do mem-to-mem moves through integer regs. */
27925 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
27927 enum machine_mode mode = GET_MODE (x);
27929 /* We're only allowed to return a subclass of CLASS. Many of the
27930 following checks fail for NO_REGS, so eliminate that early. */
27931 if (regclass == NO_REGS)
27934 /* All classes can load zeros. */
27935 if (x == CONST0_RTX (mode))
27938 /* Force constants into memory if we are loading a (nonzero) constant into
27939 an MMX or SSE register. This is because there are no MMX/SSE instructions
27940 to load from a constant. */
27942 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
27945 /* Prefer SSE regs only, if we can use them for math. */
27946 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
27947 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
27949 /* Floating-point constants need more complex checks. */
27950 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
27952 /* General regs can load everything. */
27953 if (reg_class_subset_p (regclass, GENERAL_REGS))
27956 /* Floats can load 0 and 1 plus some others. Note that we eliminated
27957 zero above. We only want to wind up preferring 80387 registers if
27958 we plan on doing computation with them. */
27960 && standard_80387_constant_p (x) > 0)
27962 /* Limit class to non-sse. */
27963 if (regclass == FLOAT_SSE_REGS)
27965 if (regclass == FP_TOP_SSE_REGS)
27967 if (regclass == FP_SECOND_SSE_REGS)
27968 return FP_SECOND_REG;
27969 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
27976 /* Generally when we see PLUS here, it's the function invariant
27977 (plus soft-fp const_int). Which can only be computed into general
27979 if (GET_CODE (x) == PLUS)
27980 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
27982 /* QImode constants are easy to load, but non-constant QImode data
27983 must go into Q_REGS. */
27984 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
27986 if (reg_class_subset_p (regclass, Q_REGS))
27988 if (reg_class_subset_p (Q_REGS, regclass))
27996 /* Discourage putting floating-point values in SSE registers unless
27997 SSE math is being used, and likewise for the 387 registers. */
27999 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
28001 enum machine_mode mode = GET_MODE (x);
28003 /* Restrict the output reload class to the register bank that we are doing
28004 math on. If we would like not to return a subset of CLASS, reject this
28005 alternative: if reload cannot do this, it will still use its choice. */
28006 mode = GET_MODE (x);
28007 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
28008 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
28010 if (X87_FLOAT_MODE_P (mode))
28012 if (regclass == FP_TOP_SSE_REGS)
28014 else if (regclass == FP_SECOND_SSE_REGS)
28015 return FP_SECOND_REG;
28017 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
28024 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
28025 enum machine_mode mode,
28026 secondary_reload_info *sri ATTRIBUTE_UNUSED)
28028 /* QImode spills from non-QI registers require
28029 intermediate register on 32bit targets. */
28031 && !in_p && mode == QImode
28032 && (rclass == GENERAL_REGS
28033 || rclass == LEGACY_REGS
28034 || rclass == INDEX_REGS))
28043 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28044 regno = true_regnum (x);
28046 /* Return Q_REGS if the operand is in memory. */
28051 /* This condition handles corner case where an expression involving
28052 pointers gets vectorized. We're trying to use the address of a
28053 stack slot as a vector initializer.
28055 (set (reg:V2DI 74 [ vect_cst_.2 ])
28056 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
28058 Eventually frame gets turned into sp+offset like this:
28060 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28061 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28062 (const_int 392 [0x188]))))
28064 That later gets turned into:
28066 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28067 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28068 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
28070 We'll have the following reload recorded:
28072 Reload 0: reload_in (DI) =
28073 (plus:DI (reg/f:DI 7 sp)
28074 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
28075 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28076 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
28077 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
28078 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28079 reload_reg_rtx: (reg:V2DI 22 xmm1)
28081 Which isn't going to work since SSE instructions can't handle scalar
28082 additions. Returning GENERAL_REGS forces the addition into integer
28083 register and reload can handle subsequent reloads without problems. */
28085 if (in_p && GET_CODE (x) == PLUS
28086 && SSE_CLASS_P (rclass)
28087 && SCALAR_INT_MODE_P (mode))
28088 return GENERAL_REGS;
28093 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28096 ix86_class_likely_spilled_p (reg_class_t rclass)
28107 case SSE_FIRST_REG:
28109 case FP_SECOND_REG:
28119 /* If we are copying between general and FP registers, we need a memory
28120 location. The same is true for SSE and MMX registers.
28122 To optimize register_move_cost performance, allow inline variant.
28124 The macro can't work reliably when one of the CLASSES is class containing
28125 registers from multiple units (SSE, MMX, integer). We avoid this by never
28126 combining those units in single alternative in the machine description.
28127 Ensure that this constraint holds to avoid unexpected surprises.
28129 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28130 enforce these sanity checks. */
28133 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28134 enum machine_mode mode, int strict)
28136 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
28137 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
28138 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
28139 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
28140 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
28141 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
28143 gcc_assert (!strict);
28147 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
28150 /* ??? This is a lie. We do have moves between mmx/general, and for
28151 mmx/sse2. But by saying we need secondary memory we discourage the
28152 register allocator from using the mmx registers unless needed. */
28153 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
28156 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28158 /* SSE1 doesn't have any direct moves from other classes. */
28162 /* If the target says that inter-unit moves are more expensive
28163 than moving through memory, then don't generate them. */
28164 if (!TARGET_INTER_UNIT_MOVES)
28167 /* Between SSE and general, we have moves no larger than word size. */
28168 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
28176 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28177 enum machine_mode mode, int strict)
28179 return inline_secondary_memory_needed (class1, class2, mode, strict);
28182 /* Return true if the registers in CLASS cannot represent the change from
28183 modes FROM to TO. */
28186 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
28187 enum reg_class regclass)
28192 /* x87 registers can't do subreg at all, as all values are reformatted
28193 to extended precision. */
28194 if (MAYBE_FLOAT_CLASS_P (regclass))
28197 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
28199 /* Vector registers do not support QI or HImode loads. If we don't
28200 disallow a change to these modes, reload will assume it's ok to
28201 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28202 the vec_dupv4hi pattern. */
28203 if (GET_MODE_SIZE (from) < 4)
28206 /* Vector registers do not support subreg with nonzero offsets, which
28207 are otherwise valid for integer registers. Since we can't see
28208 whether we have a nonzero offset from here, prohibit all
28209 nonparadoxical subregs changing size. */
28210 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
28217 /* Return the cost of moving data of mode M between a
28218 register and memory. A value of 2 is the default; this cost is
28219 relative to those in `REGISTER_MOVE_COST'.
28221 This function is used extensively by register_move_cost that is used to
28222 build tables at startup. Make it inline in this case.
28223 When IN is 2, return maximum of in and out move cost.
28225 If moving between registers and memory is more expensive than
28226 between two registers, you should define this macro to express the
28229 Model also increased moving costs of QImode registers in non
28233 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28237 if (FLOAT_CLASS_P (regclass))
28255 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28256 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28258 if (SSE_CLASS_P (regclass))
28261 switch (GET_MODE_SIZE (mode))
28276 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28277 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28279 if (MMX_CLASS_P (regclass))
28282 switch (GET_MODE_SIZE (mode))
28294 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28295 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28297 switch (GET_MODE_SIZE (mode))
28300 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28303 return ix86_cost->int_store[0];
28304 if (TARGET_PARTIAL_REG_DEPENDENCY
28305 && optimize_function_for_speed_p (cfun))
28306 cost = ix86_cost->movzbl_load;
28308 cost = ix86_cost->int_load[0];
28310 return MAX (cost, ix86_cost->int_store[0]);
28316 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28318 return ix86_cost->movzbl_load;
28320 return ix86_cost->int_store[0] + 4;
28325 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28326 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28328 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28329 if (mode == TFmode)
28332 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28334 cost = ix86_cost->int_load[2];
28336 cost = ix86_cost->int_store[2];
28337 return (cost * (((int) GET_MODE_SIZE (mode)
28338 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28343 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28346 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28350 /* Return the cost of moving data from a register in class CLASS1 to
28351 one in class CLASS2.
28353 It is not required that the cost always equal 2 when FROM is the same as TO;
28354 on some machines it is expensive to move between registers if they are not
28355 general registers. */
28358 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28359 reg_class_t class2_i)
28361 enum reg_class class1 = (enum reg_class) class1_i;
28362 enum reg_class class2 = (enum reg_class) class2_i;
28364 /* In case we require secondary memory, compute cost of the store followed
28365 by load. In order to avoid bad register allocation choices, we need
28366 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28368 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28372 cost += inline_memory_move_cost (mode, class1, 2);
28373 cost += inline_memory_move_cost (mode, class2, 2);
28375 /* In case of copying from general_purpose_register we may emit multiple
28376 stores followed by single load causing memory size mismatch stall.
28377 Count this as arbitrarily high cost of 20. */
28378 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
28381 /* In the case of FP/MMX moves, the registers actually overlap, and we
28382 have to switch modes in order to treat them differently. */
28383 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28384 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28390 /* Moves between SSE/MMX and integer unit are expensive. */
28391 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28392 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28394 /* ??? By keeping returned value relatively high, we limit the number
28395 of moves between integer and MMX/SSE registers for all targets.
28396 Additionally, high value prevents problem with x86_modes_tieable_p(),
28397 where integer modes in MMX/SSE registers are not tieable
28398 because of missing QImode and HImode moves to, from or between
28399 MMX/SSE registers. */
28400 return MAX (8, ix86_cost->mmxsse_to_integer);
28402 if (MAYBE_FLOAT_CLASS_P (class1))
28403 return ix86_cost->fp_move;
28404 if (MAYBE_SSE_CLASS_P (class1))
28405 return ix86_cost->sse_move;
28406 if (MAYBE_MMX_CLASS_P (class1))
28407 return ix86_cost->mmx_move;
28411 /* Return TRUE if hard register REGNO can hold a value of machine-mode
28415 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28417 /* Flags and only flags can only hold CCmode values. */
28418 if (CC_REGNO_P (regno))
28419 return GET_MODE_CLASS (mode) == MODE_CC;
28420 if (GET_MODE_CLASS (mode) == MODE_CC
28421 || GET_MODE_CLASS (mode) == MODE_RANDOM
28422 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28424 if (FP_REGNO_P (regno))
28425 return VALID_FP_MODE_P (mode);
28426 if (SSE_REGNO_P (regno))
28428 /* We implement the move patterns for all vector modes into and
28429 out of SSE registers, even when no operation instructions
28430 are available. OImode move is available only when AVX is
28432 return ((TARGET_AVX && mode == OImode)
28433 || VALID_AVX256_REG_MODE (mode)
28434 || VALID_SSE_REG_MODE (mode)
28435 || VALID_SSE2_REG_MODE (mode)
28436 || VALID_MMX_REG_MODE (mode)
28437 || VALID_MMX_REG_MODE_3DNOW (mode));
28439 if (MMX_REGNO_P (regno))
28441 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28442 so if the register is available at all, then we can move data of
28443 the given mode into or out of it. */
28444 return (VALID_MMX_REG_MODE (mode)
28445 || VALID_MMX_REG_MODE_3DNOW (mode));
28448 if (mode == QImode)
28450 /* Take care for QImode values - they can be in non-QI regs,
28451 but then they do cause partial register stalls. */
28452 if (regno <= BX_REG || TARGET_64BIT)
28454 if (!TARGET_PARTIAL_REG_STALL)
28456 return !can_create_pseudo_p ();
28458 /* We handle both integer and floats in the general purpose registers. */
28459 else if (VALID_INT_MODE_P (mode))
28461 else if (VALID_FP_MODE_P (mode))
28463 else if (VALID_DFP_MODE_P (mode))
28465 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
28466 on to use that value in smaller contexts, this can easily force a
28467 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
28468 supporting DImode, allow it. */
28469 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
28475 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
28476 tieable integer mode. */
28479 ix86_tieable_integer_mode_p (enum machine_mode mode)
28488 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
28491 return TARGET_64BIT;
28498 /* Return true if MODE1 is accessible in a register that can hold MODE2
28499 without copying. That is, all register classes that can hold MODE2
28500 can also hold MODE1. */
28503 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
28505 if (mode1 == mode2)
28508 if (ix86_tieable_integer_mode_p (mode1)
28509 && ix86_tieable_integer_mode_p (mode2))
28512 /* MODE2 being XFmode implies fp stack or general regs, which means we
28513 can tie any smaller floating point modes to it. Note that we do not
28514 tie this with TFmode. */
28515 if (mode2 == XFmode)
28516 return mode1 == SFmode || mode1 == DFmode;
28518 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
28519 that we can tie it with SFmode. */
28520 if (mode2 == DFmode)
28521 return mode1 == SFmode;
28523 /* If MODE2 is only appropriate for an SSE register, then tie with
28524 any other mode acceptable to SSE registers. */
28525 if (GET_MODE_SIZE (mode2) == 16
28526 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
28527 return (GET_MODE_SIZE (mode1) == 16
28528 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
28530 /* If MODE2 is appropriate for an MMX register, then tie
28531 with any other mode acceptable to MMX registers. */
28532 if (GET_MODE_SIZE (mode2) == 8
28533 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
28534 return (GET_MODE_SIZE (mode1) == 8
28535 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
28540 /* Compute a (partial) cost for rtx X. Return true if the complete
28541 cost has been computed, and false if subexpressions should be
28542 scanned. In either case, *TOTAL contains the cost result. */
28545 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
28547 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
28548 enum machine_mode mode = GET_MODE (x);
28549 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
28557 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
28559 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
28561 else if (flag_pic && SYMBOLIC_CONST (x)
28563 || (!GET_CODE (x) != LABEL_REF
28564 && (GET_CODE (x) != SYMBOL_REF
28565 || !SYMBOL_REF_LOCAL_P (x)))))
28572 if (mode == VOIDmode)
28575 switch (standard_80387_constant_p (x))
28580 default: /* Other constants */
28585 /* Start with (MEM (SYMBOL_REF)), since that's where
28586 it'll probably end up. Add a penalty for size. */
28587 *total = (COSTS_N_INSNS (1)
28588 + (flag_pic != 0 && !TARGET_64BIT)
28589 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
28595 /* The zero extensions is often completely free on x86_64, so make
28596 it as cheap as possible. */
28597 if (TARGET_64BIT && mode == DImode
28598 && GET_MODE (XEXP (x, 0)) == SImode)
28600 else if (TARGET_ZERO_EXTEND_WITH_AND)
28601 *total = cost->add;
28603 *total = cost->movzx;
28607 *total = cost->movsx;
28611 if (CONST_INT_P (XEXP (x, 1))
28612 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
28614 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28617 *total = cost->add;
28620 if ((value == 2 || value == 3)
28621 && cost->lea <= cost->shift_const)
28623 *total = cost->lea;
28633 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
28635 if (CONST_INT_P (XEXP (x, 1)))
28637 if (INTVAL (XEXP (x, 1)) > 32)
28638 *total = cost->shift_const + COSTS_N_INSNS (2);
28640 *total = cost->shift_const * 2;
28644 if (GET_CODE (XEXP (x, 1)) == AND)
28645 *total = cost->shift_var * 2;
28647 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
28652 if (CONST_INT_P (XEXP (x, 1)))
28653 *total = cost->shift_const;
28655 *total = cost->shift_var;
28663 gcc_assert (FLOAT_MODE_P (mode));
28664 gcc_assert (TARGET_FMA || TARGET_FMA4);
28666 /* ??? SSE scalar/vector cost should be used here. */
28667 /* ??? Bald assumption that fma has the same cost as fmul. */
28668 *total = cost->fmul;
28669 *total += rtx_cost (XEXP (x, 1), FMA, speed);
28671 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
28673 if (GET_CODE (sub) == NEG)
28674 sub = XEXP (sub, 0);
28675 *total += rtx_cost (sub, FMA, speed);
28678 if (GET_CODE (sub) == NEG)
28679 sub = XEXP (sub, 0);
28680 *total += rtx_cost (sub, FMA, speed);
28685 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28687 /* ??? SSE scalar cost should be used here. */
28688 *total = cost->fmul;
28691 else if (X87_FLOAT_MODE_P (mode))
28693 *total = cost->fmul;
28696 else if (FLOAT_MODE_P (mode))
28698 /* ??? SSE vector cost should be used here. */
28699 *total = cost->fmul;
28704 rtx op0 = XEXP (x, 0);
28705 rtx op1 = XEXP (x, 1);
28707 if (CONST_INT_P (XEXP (x, 1)))
28709 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28710 for (nbits = 0; value != 0; value &= value - 1)
28714 /* This is arbitrary. */
28717 /* Compute costs correctly for widening multiplication. */
28718 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
28719 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
28720 == GET_MODE_SIZE (mode))
28722 int is_mulwiden = 0;
28723 enum machine_mode inner_mode = GET_MODE (op0);
28725 if (GET_CODE (op0) == GET_CODE (op1))
28726 is_mulwiden = 1, op1 = XEXP (op1, 0);
28727 else if (CONST_INT_P (op1))
28729 if (GET_CODE (op0) == SIGN_EXTEND)
28730 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
28733 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
28737 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
28740 *total = (cost->mult_init[MODE_INDEX (mode)]
28741 + nbits * cost->mult_bit
28742 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
28751 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28752 /* ??? SSE cost should be used here. */
28753 *total = cost->fdiv;
28754 else if (X87_FLOAT_MODE_P (mode))
28755 *total = cost->fdiv;
28756 else if (FLOAT_MODE_P (mode))
28757 /* ??? SSE vector cost should be used here. */
28758 *total = cost->fdiv;
28760 *total = cost->divide[MODE_INDEX (mode)];
28764 if (GET_MODE_CLASS (mode) == MODE_INT
28765 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
28767 if (GET_CODE (XEXP (x, 0)) == PLUS
28768 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
28769 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
28770 && CONSTANT_P (XEXP (x, 1)))
28772 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
28773 if (val == 2 || val == 4 || val == 8)
28775 *total = cost->lea;
28776 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28777 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
28778 outer_code, speed);
28779 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28783 else if (GET_CODE (XEXP (x, 0)) == MULT
28784 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
28786 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
28787 if (val == 2 || val == 4 || val == 8)
28789 *total = cost->lea;
28790 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28791 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28795 else if (GET_CODE (XEXP (x, 0)) == PLUS)
28797 *total = cost->lea;
28798 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28799 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28800 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28807 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28809 /* ??? SSE cost should be used here. */
28810 *total = cost->fadd;
28813 else if (X87_FLOAT_MODE_P (mode))
28815 *total = cost->fadd;
28818 else if (FLOAT_MODE_P (mode))
28820 /* ??? SSE vector cost should be used here. */
28821 *total = cost->fadd;
28829 if (!TARGET_64BIT && mode == DImode)
28831 *total = (cost->add * 2
28832 + (rtx_cost (XEXP (x, 0), outer_code, speed)
28833 << (GET_MODE (XEXP (x, 0)) != DImode))
28834 + (rtx_cost (XEXP (x, 1), outer_code, speed)
28835 << (GET_MODE (XEXP (x, 1)) != DImode)));
28841 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28843 /* ??? SSE cost should be used here. */
28844 *total = cost->fchs;
28847 else if (X87_FLOAT_MODE_P (mode))
28849 *total = cost->fchs;
28852 else if (FLOAT_MODE_P (mode))
28854 /* ??? SSE vector cost should be used here. */
28855 *total = cost->fchs;
28861 if (!TARGET_64BIT && mode == DImode)
28862 *total = cost->add * 2;
28864 *total = cost->add;
28868 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
28869 && XEXP (XEXP (x, 0), 1) == const1_rtx
28870 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
28871 && XEXP (x, 1) == const0_rtx)
28873 /* This kind of construct is implemented using test[bwl].
28874 Treat it as if we had an AND. */
28875 *total = (cost->add
28876 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
28877 + rtx_cost (const1_rtx, outer_code, speed));
28883 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
28888 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28889 /* ??? SSE cost should be used here. */
28890 *total = cost->fabs;
28891 else if (X87_FLOAT_MODE_P (mode))
28892 *total = cost->fabs;
28893 else if (FLOAT_MODE_P (mode))
28894 /* ??? SSE vector cost should be used here. */
28895 *total = cost->fabs;
28899 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28900 /* ??? SSE cost should be used here. */
28901 *total = cost->fsqrt;
28902 else if (X87_FLOAT_MODE_P (mode))
28903 *total = cost->fsqrt;
28904 else if (FLOAT_MODE_P (mode))
28905 /* ??? SSE vector cost should be used here. */
28906 *total = cost->fsqrt;
28910 if (XINT (x, 1) == UNSPEC_TP)
28917 case VEC_DUPLICATE:
28918 /* ??? Assume all of these vector manipulation patterns are
28919 recognizable. In which case they all pretty much have the
28921 *total = COSTS_N_INSNS (1);
28931 static int current_machopic_label_num;
28933 /* Given a symbol name and its associated stub, write out the
28934 definition of the stub. */
28937 machopic_output_stub (FILE *file, const char *symb, const char *stub)
28939 unsigned int length;
28940 char *binder_name, *symbol_name, lazy_ptr_name[32];
28941 int label = ++current_machopic_label_num;
28943 /* For 64-bit we shouldn't get here. */
28944 gcc_assert (!TARGET_64BIT);
28946 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
28947 symb = targetm.strip_name_encoding (symb);
28949 length = strlen (stub);
28950 binder_name = XALLOCAVEC (char, length + 32);
28951 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
28953 length = strlen (symb);
28954 symbol_name = XALLOCAVEC (char, length + 32);
28955 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
28957 sprintf (lazy_ptr_name, "L%d$lz", label);
28959 if (MACHOPIC_ATT_STUB)
28960 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
28961 else if (MACHOPIC_PURE)
28962 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
28964 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
28966 fprintf (file, "%s:\n", stub);
28967 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
28969 if (MACHOPIC_ATT_STUB)
28971 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
28973 else if (MACHOPIC_PURE)
28976 /* 25-byte PIC stub using "CALL get_pc_thunk". */
28977 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
28978 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
28979 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
28980 label, lazy_ptr_name, label);
28981 fprintf (file, "\tjmp\t*%%ecx\n");
28984 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
28986 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
28987 it needs no stub-binding-helper. */
28988 if (MACHOPIC_ATT_STUB)
28991 fprintf (file, "%s:\n", binder_name);
28995 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
28996 fprintf (file, "\tpushl\t%%ecx\n");
28999 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
29001 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
29003 /* N.B. Keep the correspondence of these
29004 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29005 old-pic/new-pic/non-pic stubs; altering this will break
29006 compatibility with existing dylibs. */
29009 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29010 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
29013 /* 16-byte -mdynamic-no-pic stub. */
29014 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
29016 fprintf (file, "%s:\n", lazy_ptr_name);
29017 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29018 fprintf (file, ASM_LONG "%s\n", binder_name);
29020 #endif /* TARGET_MACHO */
29022 /* Order the registers for register allocator. */
29025 x86_order_regs_for_local_alloc (void)
29030 /* First allocate the local general purpose registers. */
29031 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29032 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29033 reg_alloc_order [pos++] = i;
29035 /* Global general purpose registers. */
29036 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29037 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29038 reg_alloc_order [pos++] = i;
29040 /* x87 registers come first in case we are doing FP math
29042 if (!TARGET_SSE_MATH)
29043 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29044 reg_alloc_order [pos++] = i;
29046 /* SSE registers. */
29047 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29048 reg_alloc_order [pos++] = i;
29049 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29050 reg_alloc_order [pos++] = i;
29052 /* x87 registers. */
29053 if (TARGET_SSE_MATH)
29054 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29055 reg_alloc_order [pos++] = i;
29057 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29058 reg_alloc_order [pos++] = i;
29060 /* Initialize the rest of array as we do not allocate some registers
29062 while (pos < FIRST_PSEUDO_REGISTER)
29063 reg_alloc_order [pos++] = 0;
29066 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29067 in struct attribute_spec handler. */
29069 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
29071 int flags ATTRIBUTE_UNUSED,
29072 bool *no_add_attrs)
29074 if (TREE_CODE (*node) != FUNCTION_TYPE
29075 && TREE_CODE (*node) != METHOD_TYPE
29076 && TREE_CODE (*node) != FIELD_DECL
29077 && TREE_CODE (*node) != TYPE_DECL)
29079 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29081 *no_add_attrs = true;
29086 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
29088 *no_add_attrs = true;
29091 if (is_attribute_p ("callee_pop_aggregate_return", name))
29095 cst = TREE_VALUE (args);
29096 if (TREE_CODE (cst) != INTEGER_CST)
29098 warning (OPT_Wattributes,
29099 "%qE attribute requires an integer constant argument",
29101 *no_add_attrs = true;
29103 else if (compare_tree_int (cst, 0) != 0
29104 && compare_tree_int (cst, 1) != 0)
29106 warning (OPT_Wattributes,
29107 "argument to %qE attribute is neither zero, nor one",
29109 *no_add_attrs = true;
29118 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29119 struct attribute_spec.handler. */
29121 ix86_handle_abi_attribute (tree *node, tree name,
29122 tree args ATTRIBUTE_UNUSED,
29123 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29125 if (TREE_CODE (*node) != FUNCTION_TYPE
29126 && TREE_CODE (*node) != METHOD_TYPE
29127 && TREE_CODE (*node) != FIELD_DECL
29128 && TREE_CODE (*node) != TYPE_DECL)
29130 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29132 *no_add_attrs = true;
29137 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
29139 *no_add_attrs = true;
29143 /* Can combine regparm with all attributes but fastcall. */
29144 if (is_attribute_p ("ms_abi", name))
29146 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
29148 error ("ms_abi and sysv_abi attributes are not compatible");
29153 else if (is_attribute_p ("sysv_abi", name))
29155 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
29157 error ("ms_abi and sysv_abi attributes are not compatible");
29166 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29167 struct attribute_spec.handler. */
29169 ix86_handle_struct_attribute (tree *node, tree name,
29170 tree args ATTRIBUTE_UNUSED,
29171 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29174 if (DECL_P (*node))
29176 if (TREE_CODE (*node) == TYPE_DECL)
29177 type = &TREE_TYPE (*node);
29182 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29183 || TREE_CODE (*type) == UNION_TYPE)))
29185 warning (OPT_Wattributes, "%qE attribute ignored",
29187 *no_add_attrs = true;
29190 else if ((is_attribute_p ("ms_struct", name)
29191 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29192 || ((is_attribute_p ("gcc_struct", name)
29193 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29195 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29197 *no_add_attrs = true;
29204 ix86_handle_fndecl_attribute (tree *node, tree name,
29205 tree args ATTRIBUTE_UNUSED,
29206 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29208 if (TREE_CODE (*node) != FUNCTION_DECL)
29210 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29212 *no_add_attrs = true;
29218 ix86_ms_bitfield_layout_p (const_tree record_type)
29220 return ((TARGET_MS_BITFIELD_LAYOUT
29221 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29222 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
29225 /* Returns an expression indicating where the this parameter is
29226 located on entry to the FUNCTION. */
29229 x86_this_parameter (tree function)
29231 tree type = TREE_TYPE (function);
29232 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
29237 const int *parm_regs;
29239 if (ix86_function_type_abi (type) == MS_ABI)
29240 parm_regs = x86_64_ms_abi_int_parameter_registers;
29242 parm_regs = x86_64_int_parameter_registers;
29243 return gen_rtx_REG (DImode, parm_regs[aggr]);
29246 nregs = ix86_function_regparm (type, function);
29248 if (nregs > 0 && !stdarg_p (type))
29251 unsigned int ccvt = ix86_get_callcvt (type);
29253 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29254 regno = aggr ? DX_REG : CX_REG;
29255 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29259 return gen_rtx_MEM (SImode,
29260 plus_constant (stack_pointer_rtx, 4));
29269 return gen_rtx_MEM (SImode,
29270 plus_constant (stack_pointer_rtx, 4));
29273 return gen_rtx_REG (SImode, regno);
29276 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29279 /* Determine whether x86_output_mi_thunk can succeed. */
29282 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29283 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29284 HOST_WIDE_INT vcall_offset, const_tree function)
29286 /* 64-bit can handle anything. */
29290 /* For 32-bit, everything's fine if we have one free register. */
29291 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29294 /* Need a free register for vcall_offset. */
29298 /* Need a free register for GOT references. */
29299 if (flag_pic && !targetm.binds_local_p (function))
29302 /* Otherwise ok. */
29306 /* Output the assembler code for a thunk function. THUNK_DECL is the
29307 declaration for the thunk function itself, FUNCTION is the decl for
29308 the target function. DELTA is an immediate constant offset to be
29309 added to THIS. If VCALL_OFFSET is nonzero, the word at
29310 *(*this + vcall_offset) should be added to THIS. */
29313 x86_output_mi_thunk (FILE *file,
29314 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29315 HOST_WIDE_INT vcall_offset, tree function)
29317 rtx this_param = x86_this_parameter (function);
29318 rtx this_reg, tmp, fnaddr;
29320 emit_note (NOTE_INSN_PROLOGUE_END);
29322 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29323 pull it in now and let DELTA benefit. */
29324 if (REG_P (this_param))
29325 this_reg = this_param;
29326 else if (vcall_offset)
29328 /* Put the this parameter into %eax. */
29329 this_reg = gen_rtx_REG (Pmode, AX_REG);
29330 emit_move_insn (this_reg, this_param);
29333 this_reg = NULL_RTX;
29335 /* Adjust the this parameter by a fixed constant. */
29338 rtx delta_rtx = GEN_INT (delta);
29339 rtx delta_dst = this_reg ? this_reg : this_param;
29343 if (!x86_64_general_operand (delta_rtx, Pmode))
29345 tmp = gen_rtx_REG (Pmode, R10_REG);
29346 emit_move_insn (tmp, delta_rtx);
29351 emit_insn (ix86_gen_add3 (delta_dst, delta_dst, delta_rtx));
29354 /* Adjust the this parameter by a value stored in the vtable. */
29357 rtx vcall_addr, vcall_mem;
29358 unsigned int tmp_regno;
29361 tmp_regno = R10_REG;
29364 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
29365 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
29366 tmp_regno = AX_REG;
29368 tmp_regno = CX_REG;
29370 tmp = gen_rtx_REG (Pmode, tmp_regno);
29372 emit_move_insn (tmp, gen_rtx_MEM (ptr_mode, this_reg));
29374 /* Adjust the this parameter. */
29375 vcall_addr = plus_constant (tmp, vcall_offset);
29377 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
29379 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
29380 emit_move_insn (tmp2, GEN_INT (vcall_offset));
29381 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
29384 vcall_mem = gen_rtx_MEM (Pmode, vcall_addr);
29385 emit_insn (ix86_gen_add3 (this_reg, this_reg, vcall_mem));
29388 /* If necessary, drop THIS back to its stack slot. */
29389 if (this_reg && this_reg != this_param)
29390 emit_move_insn (this_param, this_reg);
29392 fnaddr = XEXP (DECL_RTL (function), 0);
29395 if (!flag_pic || targetm.binds_local_p (function)
29396 || cfun->machine->call_abi == MS_ABI)
29400 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
29401 tmp = gen_rtx_CONST (Pmode, tmp);
29402 fnaddr = gen_rtx_MEM (Pmode, tmp);
29407 if (!flag_pic || targetm.binds_local_p (function))
29410 else if (TARGET_MACHO)
29412 rtx sym_ref = XEXP (DECL_RTL (function), 0);
29413 if (TARGET_MACHO_BRANCH_ISLANDS)
29414 sym_ref = (gen_rtx_SYMBOL_REF
29416 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
29417 fnaddr = gen_rtx_MEM (Pmode, sym_ref);
29419 #endif /* TARGET_MACHO */
29422 tmp = gen_rtx_REG (Pmode, CX_REG);
29423 output_set_got (tmp, NULL_RTX);
29425 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
29426 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
29427 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
29431 /* Our sibling call patterns do not allow memories, because we have no
29432 predicate that can distinguish between frame and non-frame memory.
29433 For our purposes here, we can get away with (ab)using a jump pattern,
29434 because we're going to do no optimization. */
29435 if (MEM_P (fnaddr))
29436 emit_jump_insn (gen_indirect_jump (fnaddr));
29439 tmp = gen_rtx_MEM (QImode, fnaddr);
29440 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
29441 tmp = emit_call_insn (tmp);
29442 SIBLING_CALL_P (tmp) = 1;
29446 /* Emit just enough of rest_of_compilation to get the insns emitted.
29447 Note that use_thunk calls assemble_start_function et al. */
29448 tmp = get_insns ();
29449 insn_locators_alloc ();
29450 shorten_branches (tmp);
29451 final_start_function (tmp, file, 1);
29452 final (tmp, file, 1);
29453 final_end_function ();
29457 x86_file_start (void)
29459 default_file_start ();
29461 darwin_file_start ();
29463 if (X86_FILE_START_VERSION_DIRECTIVE)
29464 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
29465 if (X86_FILE_START_FLTUSED)
29466 fputs ("\t.global\t__fltused\n", asm_out_file);
29467 if (ix86_asm_dialect == ASM_INTEL)
29468 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
29472 x86_field_alignment (tree field, int computed)
29474 enum machine_mode mode;
29475 tree type = TREE_TYPE (field);
29477 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
29479 mode = TYPE_MODE (strip_array_types (type));
29480 if (mode == DFmode || mode == DCmode
29481 || GET_MODE_CLASS (mode) == MODE_INT
29482 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
29483 return MIN (32, computed);
29487 /* Output assembler code to FILE to increment profiler label # LABELNO
29488 for profiling a function entry. */
29490 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
29492 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
29497 #ifndef NO_PROFILE_COUNTERS
29498 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
29501 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
29502 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
29504 fprintf (file, "\tcall\t%s\n", mcount_name);
29508 #ifndef NO_PROFILE_COUNTERS
29509 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
29512 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
29516 #ifndef NO_PROFILE_COUNTERS
29517 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
29520 fprintf (file, "\tcall\t%s\n", mcount_name);
29524 /* We don't have exact information about the insn sizes, but we may assume
29525 quite safely that we are informed about all 1 byte insns and memory
29526 address sizes. This is enough to eliminate unnecessary padding in
29530 min_insn_size (rtx insn)
29534 if (!INSN_P (insn) || !active_insn_p (insn))
29537 /* Discard alignments we've emit and jump instructions. */
29538 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
29539 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
29541 if (JUMP_TABLE_DATA_P (insn))
29544 /* Important case - calls are always 5 bytes.
29545 It is common to have many calls in the row. */
29547 && symbolic_reference_mentioned_p (PATTERN (insn))
29548 && !SIBLING_CALL_P (insn))
29550 len = get_attr_length (insn);
29554 /* For normal instructions we rely on get_attr_length being exact,
29555 with a few exceptions. */
29556 if (!JUMP_P (insn))
29558 enum attr_type type = get_attr_type (insn);
29563 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
29564 || asm_noperands (PATTERN (insn)) >= 0)
29571 /* Otherwise trust get_attr_length. */
29575 l = get_attr_length_address (insn);
29576 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
29585 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29587 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
29591 ix86_avoid_jump_mispredicts (void)
29593 rtx insn, start = get_insns ();
29594 int nbytes = 0, njumps = 0;
29597 /* Look for all minimal intervals of instructions containing 4 jumps.
29598 The intervals are bounded by START and INSN. NBYTES is the total
29599 size of instructions in the interval including INSN and not including
29600 START. When the NBYTES is smaller than 16 bytes, it is possible
29601 that the end of START and INSN ends up in the same 16byte page.
29603 The smallest offset in the page INSN can start is the case where START
29604 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
29605 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
29607 for (insn = start; insn; insn = NEXT_INSN (insn))
29611 if (LABEL_P (insn))
29613 int align = label_to_alignment (insn);
29614 int max_skip = label_to_max_skip (insn);
29618 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
29619 already in the current 16 byte page, because otherwise
29620 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
29621 bytes to reach 16 byte boundary. */
29623 || (align <= 3 && max_skip != (1 << align) - 1))
29626 fprintf (dump_file, "Label %i with max_skip %i\n",
29627 INSN_UID (insn), max_skip);
29630 while (nbytes + max_skip >= 16)
29632 start = NEXT_INSN (start);
29633 if ((JUMP_P (start)
29634 && GET_CODE (PATTERN (start)) != ADDR_VEC
29635 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29637 njumps--, isjump = 1;
29640 nbytes -= min_insn_size (start);
29646 min_size = min_insn_size (insn);
29647 nbytes += min_size;
29649 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
29650 INSN_UID (insn), min_size);
29652 && GET_CODE (PATTERN (insn)) != ADDR_VEC
29653 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
29661 start = NEXT_INSN (start);
29662 if ((JUMP_P (start)
29663 && GET_CODE (PATTERN (start)) != ADDR_VEC
29664 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29666 njumps--, isjump = 1;
29669 nbytes -= min_insn_size (start);
29671 gcc_assert (njumps >= 0);
29673 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
29674 INSN_UID (start), INSN_UID (insn), nbytes);
29676 if (njumps == 3 && isjump && nbytes < 16)
29678 int padsize = 15 - nbytes + min_insn_size (insn);
29681 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
29682 INSN_UID (insn), padsize);
29683 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
29689 /* AMD Athlon works faster
29690 when RET is not destination of conditional jump or directly preceded
29691 by other jump instruction. We avoid the penalty by inserting NOP just
29692 before the RET instructions in such cases. */
29694 ix86_pad_returns (void)
29699 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29701 basic_block bb = e->src;
29702 rtx ret = BB_END (bb);
29704 bool replace = false;
29706 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
29707 || optimize_bb_for_size_p (bb))
29709 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
29710 if (active_insn_p (prev) || LABEL_P (prev))
29712 if (prev && LABEL_P (prev))
29717 FOR_EACH_EDGE (e, ei, bb->preds)
29718 if (EDGE_FREQUENCY (e) && e->src->index >= 0
29719 && !(e->flags & EDGE_FALLTHRU))
29724 prev = prev_active_insn (ret);
29726 && ((JUMP_P (prev) && any_condjump_p (prev))
29729 /* Empty functions get branch mispredict even when
29730 the jump destination is not visible to us. */
29731 if (!prev && !optimize_function_for_size_p (cfun))
29736 emit_jump_insn_before (gen_return_internal_long (), ret);
29742 /* Count the minimum number of instructions in BB. Return 4 if the
29743 number of instructions >= 4. */
29746 ix86_count_insn_bb (basic_block bb)
29749 int insn_count = 0;
29751 /* Count number of instructions in this block. Return 4 if the number
29752 of instructions >= 4. */
29753 FOR_BB_INSNS (bb, insn)
29755 /* Only happen in exit blocks. */
29757 && GET_CODE (PATTERN (insn)) == RETURN)
29760 if (NONDEBUG_INSN_P (insn)
29761 && GET_CODE (PATTERN (insn)) != USE
29762 && GET_CODE (PATTERN (insn)) != CLOBBER)
29765 if (insn_count >= 4)
29774 /* Count the minimum number of instructions in code path in BB.
29775 Return 4 if the number of instructions >= 4. */
29778 ix86_count_insn (basic_block bb)
29782 int min_prev_count;
29784 /* Only bother counting instructions along paths with no
29785 more than 2 basic blocks between entry and exit. Given
29786 that BB has an edge to exit, determine if a predecessor
29787 of BB has an edge from entry. If so, compute the number
29788 of instructions in the predecessor block. If there
29789 happen to be multiple such blocks, compute the minimum. */
29790 min_prev_count = 4;
29791 FOR_EACH_EDGE (e, ei, bb->preds)
29794 edge_iterator prev_ei;
29796 if (e->src == ENTRY_BLOCK_PTR)
29798 min_prev_count = 0;
29801 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
29803 if (prev_e->src == ENTRY_BLOCK_PTR)
29805 int count = ix86_count_insn_bb (e->src);
29806 if (count < min_prev_count)
29807 min_prev_count = count;
29813 if (min_prev_count < 4)
29814 min_prev_count += ix86_count_insn_bb (bb);
29816 return min_prev_count;
29819 /* Pad short funtion to 4 instructions. */
29822 ix86_pad_short_function (void)
29827 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29829 rtx ret = BB_END (e->src);
29830 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
29832 int insn_count = ix86_count_insn (e->src);
29834 /* Pad short function. */
29835 if (insn_count < 4)
29839 /* Find epilogue. */
29842 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
29843 insn = PREV_INSN (insn);
29848 /* Two NOPs count as one instruction. */
29849 insn_count = 2 * (4 - insn_count);
29850 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
29856 /* Implement machine specific optimizations. We implement padding of returns
29857 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
29861 /* We are freeing block_for_insn in the toplev to keep compatibility
29862 with old MDEP_REORGS that are not CFG based. Recompute it now. */
29863 compute_bb_for_insn ();
29865 /* Run the vzeroupper optimization if needed. */
29866 if (TARGET_VZEROUPPER)
29867 move_or_delete_vzeroupper ();
29869 if (optimize && optimize_function_for_speed_p (cfun))
29871 if (TARGET_PAD_SHORT_FUNCTION)
29872 ix86_pad_short_function ();
29873 else if (TARGET_PAD_RETURNS)
29874 ix86_pad_returns ();
29875 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29876 if (TARGET_FOUR_JUMP_LIMIT)
29877 ix86_avoid_jump_mispredicts ();
29882 /* Return nonzero when QImode register that must be represented via REX prefix
29885 x86_extended_QIreg_mentioned_p (rtx insn)
29888 extract_insn_cached (insn);
29889 for (i = 0; i < recog_data.n_operands; i++)
29890 if (REG_P (recog_data.operand[i])
29891 && REGNO (recog_data.operand[i]) > BX_REG)
29896 /* Return nonzero when P points to register encoded via REX prefix.
29897 Called via for_each_rtx. */
29899 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
29901 unsigned int regno;
29904 regno = REGNO (*p);
29905 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
29908 /* Return true when INSN mentions register that must be encoded using REX
29911 x86_extended_reg_mentioned_p (rtx insn)
29913 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
29914 extended_reg_mentioned_1, NULL);
29917 /* If profitable, negate (without causing overflow) integer constant
29918 of mode MODE at location LOC. Return true in this case. */
29920 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
29924 if (!CONST_INT_P (*loc))
29930 /* DImode x86_64 constants must fit in 32 bits. */
29931 gcc_assert (x86_64_immediate_operand (*loc, mode));
29942 gcc_unreachable ();
29945 /* Avoid overflows. */
29946 if (mode_signbit_p (mode, *loc))
29949 val = INTVAL (*loc);
29951 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
29952 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
29953 if ((val < 0 && val != -128)
29956 *loc = GEN_INT (-val);
29963 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
29964 optabs would emit if we didn't have TFmode patterns. */
29967 x86_emit_floatuns (rtx operands[2])
29969 rtx neglab, donelab, i0, i1, f0, in, out;
29970 enum machine_mode mode, inmode;
29972 inmode = GET_MODE (operands[1]);
29973 gcc_assert (inmode == SImode || inmode == DImode);
29976 in = force_reg (inmode, operands[1]);
29977 mode = GET_MODE (out);
29978 neglab = gen_label_rtx ();
29979 donelab = gen_label_rtx ();
29980 f0 = gen_reg_rtx (mode);
29982 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
29984 expand_float (out, in, 0);
29986 emit_jump_insn (gen_jump (donelab));
29989 emit_label (neglab);
29991 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
29993 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
29995 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
29997 expand_float (f0, i0, 0);
29999 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
30001 emit_label (donelab);
30004 /* AVX does not support 32-byte integer vector operations,
30005 thus the longest vector we are faced with is V16QImode. */
30006 #define MAX_VECT_LEN 16
30008 struct expand_vec_perm_d
30010 rtx target, op0, op1;
30011 unsigned char perm[MAX_VECT_LEN];
30012 enum machine_mode vmode;
30013 unsigned char nelt;
30017 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
30018 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
30020 /* Get a vector mode of the same size as the original but with elements
30021 twice as wide. This is only guaranteed to apply to integral vectors. */
30023 static inline enum machine_mode
30024 get_mode_wider_vector (enum machine_mode o)
30026 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30027 enum machine_mode n = GET_MODE_WIDER_MODE (o);
30028 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
30029 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
30033 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30034 with all elements equal to VAR. Return true if successful. */
30037 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
30038 rtx target, rtx val)
30061 /* First attempt to recognize VAL as-is. */
30062 dup = gen_rtx_VEC_DUPLICATE (mode, val);
30063 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30064 if (recog_memoized (insn) < 0)
30067 /* If that fails, force VAL into a register. */
30070 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30071 seq = get_insns ();
30074 emit_insn_before (seq, insn);
30076 ok = recog_memoized (insn) >= 0;
30085 if (TARGET_SSE || TARGET_3DNOW_A)
30089 val = gen_lowpart (SImode, val);
30090 x = gen_rtx_TRUNCATE (HImode, val);
30091 x = gen_rtx_VEC_DUPLICATE (mode, x);
30092 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30105 struct expand_vec_perm_d dperm;
30109 memset (&dperm, 0, sizeof (dperm));
30110 dperm.target = target;
30111 dperm.vmode = mode;
30112 dperm.nelt = GET_MODE_NUNITS (mode);
30113 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
30115 /* Extend to SImode using a paradoxical SUBREG. */
30116 tmp1 = gen_reg_rtx (SImode);
30117 emit_move_insn (tmp1, gen_lowpart (SImode, val));
30119 /* Insert the SImode value as low element of a V4SImode vector. */
30120 tmp2 = gen_lowpart (V4SImode, dperm.op0);
30121 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
30123 ok = (expand_vec_perm_1 (&dperm)
30124 || expand_vec_perm_broadcast_1 (&dperm));
30136 /* Replicate the value once into the next wider mode and recurse. */
30138 enum machine_mode smode, wsmode, wvmode;
30141 smode = GET_MODE_INNER (mode);
30142 wvmode = get_mode_wider_vector (mode);
30143 wsmode = GET_MODE_INNER (wvmode);
30145 val = convert_modes (wsmode, smode, val, true);
30146 x = expand_simple_binop (wsmode, ASHIFT, val,
30147 GEN_INT (GET_MODE_BITSIZE (smode)),
30148 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30149 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
30151 x = gen_lowpart (wvmode, target);
30152 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
30160 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
30161 rtx x = gen_reg_rtx (hvmode);
30163 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
30166 x = gen_rtx_VEC_CONCAT (mode, x, x);
30167 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30176 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30177 whose ONE_VAR element is VAR, and other elements are zero. Return true
30181 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
30182 rtx target, rtx var, int one_var)
30184 enum machine_mode vsimode;
30187 bool use_vector_set = false;
30192 /* For SSE4.1, we normally use vector set. But if the second
30193 element is zero and inter-unit moves are OK, we use movq
30195 use_vector_set = (TARGET_64BIT
30197 && !(TARGET_INTER_UNIT_MOVES
30203 use_vector_set = TARGET_SSE4_1;
30206 use_vector_set = TARGET_SSE2;
30209 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
30216 use_vector_set = TARGET_AVX;
30219 /* Use ix86_expand_vector_set in 64bit mode only. */
30220 use_vector_set = TARGET_AVX && TARGET_64BIT;
30226 if (use_vector_set)
30228 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
30229 var = force_reg (GET_MODE_INNER (mode), var);
30230 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30246 var = force_reg (GET_MODE_INNER (mode), var);
30247 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
30248 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30253 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
30254 new_target = gen_reg_rtx (mode);
30256 new_target = target;
30257 var = force_reg (GET_MODE_INNER (mode), var);
30258 x = gen_rtx_VEC_DUPLICATE (mode, var);
30259 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
30260 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
30263 /* We need to shuffle the value to the correct position, so
30264 create a new pseudo to store the intermediate result. */
30266 /* With SSE2, we can use the integer shuffle insns. */
30267 if (mode != V4SFmode && TARGET_SSE2)
30269 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
30271 GEN_INT (one_var == 1 ? 0 : 1),
30272 GEN_INT (one_var == 2 ? 0 : 1),
30273 GEN_INT (one_var == 3 ? 0 : 1)));
30274 if (target != new_target)
30275 emit_move_insn (target, new_target);
30279 /* Otherwise convert the intermediate result to V4SFmode and
30280 use the SSE1 shuffle instructions. */
30281 if (mode != V4SFmode)
30283 tmp = gen_reg_rtx (V4SFmode);
30284 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30289 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30291 GEN_INT (one_var == 1 ? 0 : 1),
30292 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30293 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30295 if (mode != V4SFmode)
30296 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30297 else if (tmp != target)
30298 emit_move_insn (target, tmp);
30300 else if (target != new_target)
30301 emit_move_insn (target, new_target);
30306 vsimode = V4SImode;
30312 vsimode = V2SImode;
30318 /* Zero extend the variable element to SImode and recurse. */
30319 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30321 x = gen_reg_rtx (vsimode);
30322 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30324 gcc_unreachable ();
30326 emit_move_insn (target, gen_lowpart (mode, x));
30334 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30335 consisting of the values in VALS. It is known that all elements
30336 except ONE_VAR are constants. Return true if successful. */
30339 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30340 rtx target, rtx vals, int one_var)
30342 rtx var = XVECEXP (vals, 0, one_var);
30343 enum machine_mode wmode;
30346 const_vec = copy_rtx (vals);
30347 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30348 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30356 /* For the two element vectors, it's just as easy to use
30357 the general case. */
30361 /* Use ix86_expand_vector_set in 64bit mode only. */
30384 /* There's no way to set one QImode entry easily. Combine
30385 the variable value with its adjacent constant value, and
30386 promote to an HImode set. */
30387 x = XVECEXP (vals, 0, one_var ^ 1);
30390 var = convert_modes (HImode, QImode, var, true);
30391 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30392 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30393 x = GEN_INT (INTVAL (x) & 0xff);
30397 var = convert_modes (HImode, QImode, var, true);
30398 x = gen_int_mode (INTVAL (x) << 8, HImode);
30400 if (x != const0_rtx)
30401 var = expand_simple_binop (HImode, IOR, var, x, var,
30402 1, OPTAB_LIB_WIDEN);
30404 x = gen_reg_rtx (wmode);
30405 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30406 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30408 emit_move_insn (target, gen_lowpart (mode, x));
30415 emit_move_insn (target, const_vec);
30416 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30420 /* A subroutine of ix86_expand_vector_init_general. Use vector
30421 concatenate to handle the most general case: all values variable,
30422 and none identical. */
30425 ix86_expand_vector_init_concat (enum machine_mode mode,
30426 rtx target, rtx *ops, int n)
30428 enum machine_mode cmode, hmode = VOIDmode;
30429 rtx first[8], second[4];
30469 gcc_unreachable ();
30472 if (!register_operand (ops[1], cmode))
30473 ops[1] = force_reg (cmode, ops[1]);
30474 if (!register_operand (ops[0], cmode))
30475 ops[0] = force_reg (cmode, ops[0]);
30476 emit_insn (gen_rtx_SET (VOIDmode, target,
30477 gen_rtx_VEC_CONCAT (mode, ops[0],
30497 gcc_unreachable ();
30513 gcc_unreachable ();
30518 /* FIXME: We process inputs backward to help RA. PR 36222. */
30521 for (; i > 0; i -= 2, j--)
30523 first[j] = gen_reg_rtx (cmode);
30524 v = gen_rtvec (2, ops[i - 1], ops[i]);
30525 ix86_expand_vector_init (false, first[j],
30526 gen_rtx_PARALLEL (cmode, v));
30532 gcc_assert (hmode != VOIDmode);
30533 for (i = j = 0; i < n; i += 2, j++)
30535 second[j] = gen_reg_rtx (hmode);
30536 ix86_expand_vector_init_concat (hmode, second [j],
30540 ix86_expand_vector_init_concat (mode, target, second, n);
30543 ix86_expand_vector_init_concat (mode, target, first, n);
30547 gcc_unreachable ();
30551 /* A subroutine of ix86_expand_vector_init_general. Use vector
30552 interleave to handle the most general case: all values variable,
30553 and none identical. */
30556 ix86_expand_vector_init_interleave (enum machine_mode mode,
30557 rtx target, rtx *ops, int n)
30559 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
30562 rtx (*gen_load_even) (rtx, rtx, rtx);
30563 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
30564 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
30569 gen_load_even = gen_vec_setv8hi;
30570 gen_interleave_first_low = gen_vec_interleave_lowv4si;
30571 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30572 inner_mode = HImode;
30573 first_imode = V4SImode;
30574 second_imode = V2DImode;
30575 third_imode = VOIDmode;
30578 gen_load_even = gen_vec_setv16qi;
30579 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
30580 gen_interleave_second_low = gen_vec_interleave_lowv4si;
30581 inner_mode = QImode;
30582 first_imode = V8HImode;
30583 second_imode = V4SImode;
30584 third_imode = V2DImode;
30587 gcc_unreachable ();
30590 for (i = 0; i < n; i++)
30592 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
30593 op0 = gen_reg_rtx (SImode);
30594 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
30596 /* Insert the SImode value as low element of V4SImode vector. */
30597 op1 = gen_reg_rtx (V4SImode);
30598 op0 = gen_rtx_VEC_MERGE (V4SImode,
30599 gen_rtx_VEC_DUPLICATE (V4SImode,
30601 CONST0_RTX (V4SImode),
30603 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
30605 /* Cast the V4SImode vector back to a vector in orignal mode. */
30606 op0 = gen_reg_rtx (mode);
30607 emit_move_insn (op0, gen_lowpart (mode, op1));
30609 /* Load even elements into the second positon. */
30610 emit_insn (gen_load_even (op0,
30611 force_reg (inner_mode,
30615 /* Cast vector to FIRST_IMODE vector. */
30616 ops[i] = gen_reg_rtx (first_imode);
30617 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
30620 /* Interleave low FIRST_IMODE vectors. */
30621 for (i = j = 0; i < n; i += 2, j++)
30623 op0 = gen_reg_rtx (first_imode);
30624 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
30626 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
30627 ops[j] = gen_reg_rtx (second_imode);
30628 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
30631 /* Interleave low SECOND_IMODE vectors. */
30632 switch (second_imode)
30635 for (i = j = 0; i < n / 2; i += 2, j++)
30637 op0 = gen_reg_rtx (second_imode);
30638 emit_insn (gen_interleave_second_low (op0, ops[i],
30641 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
30643 ops[j] = gen_reg_rtx (third_imode);
30644 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
30646 second_imode = V2DImode;
30647 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30651 op0 = gen_reg_rtx (second_imode);
30652 emit_insn (gen_interleave_second_low (op0, ops[0],
30655 /* Cast the SECOND_IMODE vector back to a vector on original
30657 emit_insn (gen_rtx_SET (VOIDmode, target,
30658 gen_lowpart (mode, op0)));
30662 gcc_unreachable ();
30666 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
30667 all values variable, and none identical. */
30670 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
30671 rtx target, rtx vals)
30673 rtx ops[32], op0, op1;
30674 enum machine_mode half_mode = VOIDmode;
30681 if (!mmx_ok && !TARGET_SSE)
30693 n = GET_MODE_NUNITS (mode);
30694 for (i = 0; i < n; i++)
30695 ops[i] = XVECEXP (vals, 0, i);
30696 ix86_expand_vector_init_concat (mode, target, ops, n);
30700 half_mode = V16QImode;
30704 half_mode = V8HImode;
30708 n = GET_MODE_NUNITS (mode);
30709 for (i = 0; i < n; i++)
30710 ops[i] = XVECEXP (vals, 0, i);
30711 op0 = gen_reg_rtx (half_mode);
30712 op1 = gen_reg_rtx (half_mode);
30713 ix86_expand_vector_init_interleave (half_mode, op0, ops,
30715 ix86_expand_vector_init_interleave (half_mode, op1,
30716 &ops [n >> 1], n >> 2);
30717 emit_insn (gen_rtx_SET (VOIDmode, target,
30718 gen_rtx_VEC_CONCAT (mode, op0, op1)));
30722 if (!TARGET_SSE4_1)
30730 /* Don't use ix86_expand_vector_init_interleave if we can't
30731 move from GPR to SSE register directly. */
30732 if (!TARGET_INTER_UNIT_MOVES)
30735 n = GET_MODE_NUNITS (mode);
30736 for (i = 0; i < n; i++)
30737 ops[i] = XVECEXP (vals, 0, i);
30738 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
30746 gcc_unreachable ();
30750 int i, j, n_elts, n_words, n_elt_per_word;
30751 enum machine_mode inner_mode;
30752 rtx words[4], shift;
30754 inner_mode = GET_MODE_INNER (mode);
30755 n_elts = GET_MODE_NUNITS (mode);
30756 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
30757 n_elt_per_word = n_elts / n_words;
30758 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
30760 for (i = 0; i < n_words; ++i)
30762 rtx word = NULL_RTX;
30764 for (j = 0; j < n_elt_per_word; ++j)
30766 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
30767 elt = convert_modes (word_mode, inner_mode, elt, true);
30773 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
30774 word, 1, OPTAB_LIB_WIDEN);
30775 word = expand_simple_binop (word_mode, IOR, word, elt,
30776 word, 1, OPTAB_LIB_WIDEN);
30784 emit_move_insn (target, gen_lowpart (mode, words[0]));
30785 else if (n_words == 2)
30787 rtx tmp = gen_reg_rtx (mode);
30788 emit_clobber (tmp);
30789 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
30790 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
30791 emit_move_insn (target, tmp);
30793 else if (n_words == 4)
30795 rtx tmp = gen_reg_rtx (V4SImode);
30796 gcc_assert (word_mode == SImode);
30797 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
30798 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
30799 emit_move_insn (target, gen_lowpart (mode, tmp));
30802 gcc_unreachable ();
30806 /* Initialize vector TARGET via VALS. Suppress the use of MMX
30807 instructions unless MMX_OK is true. */
30810 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
30812 enum machine_mode mode = GET_MODE (target);
30813 enum machine_mode inner_mode = GET_MODE_INNER (mode);
30814 int n_elts = GET_MODE_NUNITS (mode);
30815 int n_var = 0, one_var = -1;
30816 bool all_same = true, all_const_zero = true;
30820 for (i = 0; i < n_elts; ++i)
30822 x = XVECEXP (vals, 0, i);
30823 if (!(CONST_INT_P (x)
30824 || GET_CODE (x) == CONST_DOUBLE
30825 || GET_CODE (x) == CONST_FIXED))
30826 n_var++, one_var = i;
30827 else if (x != CONST0_RTX (inner_mode))
30828 all_const_zero = false;
30829 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
30833 /* Constants are best loaded from the constant pool. */
30836 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
30840 /* If all values are identical, broadcast the value. */
30842 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
30843 XVECEXP (vals, 0, 0)))
30846 /* Values where only one field is non-constant are best loaded from
30847 the pool and overwritten via move later. */
30851 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
30852 XVECEXP (vals, 0, one_var),
30856 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
30860 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
30864 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
30866 enum machine_mode mode = GET_MODE (target);
30867 enum machine_mode inner_mode = GET_MODE_INNER (mode);
30868 enum machine_mode half_mode;
30869 bool use_vec_merge = false;
30871 static rtx (*gen_extract[6][2]) (rtx, rtx)
30873 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
30874 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
30875 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
30876 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
30877 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
30878 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
30880 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
30882 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
30883 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
30884 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
30885 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
30886 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
30887 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
30897 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
30898 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
30900 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
30902 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
30903 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30909 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
30913 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
30914 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
30916 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
30918 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
30919 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30926 /* For the two element vectors, we implement a VEC_CONCAT with
30927 the extraction of the other element. */
30929 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
30930 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
30933 op0 = val, op1 = tmp;
30935 op0 = tmp, op1 = val;
30937 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
30938 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30943 use_vec_merge = TARGET_SSE4_1;
30950 use_vec_merge = true;
30954 /* tmp = target = A B C D */
30955 tmp = copy_to_reg (target);
30956 /* target = A A B B */
30957 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
30958 /* target = X A B B */
30959 ix86_expand_vector_set (false, target, val, 0);
30960 /* target = A X C D */
30961 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
30962 const1_rtx, const0_rtx,
30963 GEN_INT (2+4), GEN_INT (3+4)));
30967 /* tmp = target = A B C D */
30968 tmp = copy_to_reg (target);
30969 /* tmp = X B C D */
30970 ix86_expand_vector_set (false, tmp, val, 0);
30971 /* target = A B X D */
30972 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
30973 const0_rtx, const1_rtx,
30974 GEN_INT (0+4), GEN_INT (3+4)));
30978 /* tmp = target = A B C D */
30979 tmp = copy_to_reg (target);
30980 /* tmp = X B C D */
30981 ix86_expand_vector_set (false, tmp, val, 0);
30982 /* target = A B X D */
30983 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
30984 const0_rtx, const1_rtx,
30985 GEN_INT (2+4), GEN_INT (0+4)));
30989 gcc_unreachable ();
30994 use_vec_merge = TARGET_SSE4_1;
30998 /* Element 0 handled by vec_merge below. */
31001 use_vec_merge = true;
31007 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31008 store into element 0, then shuffle them back. */
31012 order[0] = GEN_INT (elt);
31013 order[1] = const1_rtx;
31014 order[2] = const2_rtx;
31015 order[3] = GEN_INT (3);
31016 order[elt] = const0_rtx;
31018 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31019 order[1], order[2], order[3]));
31021 ix86_expand_vector_set (false, target, val, 0);
31023 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31024 order[1], order[2], order[3]));
31028 /* For SSE1, we have to reuse the V4SF code. */
31029 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
31030 gen_lowpart (SFmode, val), elt);
31035 use_vec_merge = TARGET_SSE2;
31038 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31042 use_vec_merge = TARGET_SSE4_1;
31049 half_mode = V16QImode;
31055 half_mode = V8HImode;
31061 half_mode = V4SImode;
31067 half_mode = V2DImode;
31073 half_mode = V4SFmode;
31079 half_mode = V2DFmode;
31085 /* Compute offset. */
31089 gcc_assert (i <= 1);
31091 /* Extract the half. */
31092 tmp = gen_reg_rtx (half_mode);
31093 emit_insn (gen_extract[j][i] (tmp, target));
31095 /* Put val in tmp at elt. */
31096 ix86_expand_vector_set (false, tmp, val, elt);
31099 emit_insn (gen_insert[j][i] (target, target, tmp));
31108 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
31109 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
31110 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31114 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31116 emit_move_insn (mem, target);
31118 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31119 emit_move_insn (tmp, val);
31121 emit_move_insn (target, mem);
31126 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
31128 enum machine_mode mode = GET_MODE (vec);
31129 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31130 bool use_vec_extr = false;
31143 use_vec_extr = true;
31147 use_vec_extr = TARGET_SSE4_1;
31159 tmp = gen_reg_rtx (mode);
31160 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
31161 GEN_INT (elt), GEN_INT (elt),
31162 GEN_INT (elt+4), GEN_INT (elt+4)));
31166 tmp = gen_reg_rtx (mode);
31167 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
31171 gcc_unreachable ();
31174 use_vec_extr = true;
31179 use_vec_extr = TARGET_SSE4_1;
31193 tmp = gen_reg_rtx (mode);
31194 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
31195 GEN_INT (elt), GEN_INT (elt),
31196 GEN_INT (elt), GEN_INT (elt)));
31200 tmp = gen_reg_rtx (mode);
31201 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
31205 gcc_unreachable ();
31208 use_vec_extr = true;
31213 /* For SSE1, we have to reuse the V4SF code. */
31214 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
31215 gen_lowpart (V4SFmode, vec), elt);
31221 use_vec_extr = TARGET_SSE2;
31224 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31228 use_vec_extr = TARGET_SSE4_1;
31232 /* ??? Could extract the appropriate HImode element and shift. */
31239 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
31240 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
31242 /* Let the rtl optimizers know about the zero extension performed. */
31243 if (inner_mode == QImode || inner_mode == HImode)
31245 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
31246 target = gen_lowpart (SImode, target);
31249 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31253 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31255 emit_move_insn (mem, vec);
31257 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31258 emit_move_insn (target, tmp);
31262 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31263 pattern to reduce; DEST is the destination; IN is the input vector. */
31266 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
31268 rtx tmp1, tmp2, tmp3;
31270 tmp1 = gen_reg_rtx (V4SFmode);
31271 tmp2 = gen_reg_rtx (V4SFmode);
31272 tmp3 = gen_reg_rtx (V4SFmode);
31274 emit_insn (gen_sse_movhlps (tmp1, in, in));
31275 emit_insn (fn (tmp2, tmp1, in));
31277 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
31278 const1_rtx, const1_rtx,
31279 GEN_INT (1+4), GEN_INT (1+4)));
31280 emit_insn (fn (dest, tmp2, tmp3));
31283 /* Target hook for scalar_mode_supported_p. */
31285 ix86_scalar_mode_supported_p (enum machine_mode mode)
31287 if (DECIMAL_FLOAT_MODE_P (mode))
31288 return default_decimal_float_supported_p ();
31289 else if (mode == TFmode)
31292 return default_scalar_mode_supported_p (mode);
31295 /* Implements target hook vector_mode_supported_p. */
31297 ix86_vector_mode_supported_p (enum machine_mode mode)
31299 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31301 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31303 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31305 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31307 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31312 /* Target hook for c_mode_for_suffix. */
31313 static enum machine_mode
31314 ix86_c_mode_for_suffix (char suffix)
31324 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31326 We do this in the new i386 backend to maintain source compatibility
31327 with the old cc0-based compiler. */
31330 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31331 tree inputs ATTRIBUTE_UNUSED,
31334 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31336 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31341 /* Implements target vector targetm.asm.encode_section_info. This
31342 is not used by netware. */
31344 static void ATTRIBUTE_UNUSED
31345 ix86_encode_section_info (tree decl, rtx rtl, int first)
31347 default_encode_section_info (decl, rtl, first);
31349 if (TREE_CODE (decl) == VAR_DECL
31350 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31351 && ix86_in_large_data_p (decl))
31352 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31355 /* Worker function for REVERSE_CONDITION. */
31358 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31360 return (mode != CCFPmode && mode != CCFPUmode
31361 ? reverse_condition (code)
31362 : reverse_condition_maybe_unordered (code));
31365 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31369 output_387_reg_move (rtx insn, rtx *operands)
31371 if (REG_P (operands[0]))
31373 if (REG_P (operands[1])
31374 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31376 if (REGNO (operands[0]) == FIRST_STACK_REG)
31377 return output_387_ffreep (operands, 0);
31378 return "fstp\t%y0";
31380 if (STACK_TOP_P (operands[0]))
31381 return "fld%Z1\t%y1";
31384 else if (MEM_P (operands[0]))
31386 gcc_assert (REG_P (operands[1]));
31387 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31388 return "fstp%Z0\t%y0";
31391 /* There is no non-popping store to memory for XFmode.
31392 So if we need one, follow the store with a load. */
31393 if (GET_MODE (operands[0]) == XFmode)
31394 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31396 return "fst%Z0\t%y0";
31403 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31404 FP status register is set. */
31407 ix86_emit_fp_unordered_jump (rtx label)
31409 rtx reg = gen_reg_rtx (HImode);
31412 emit_insn (gen_x86_fnstsw_1 (reg));
31414 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31416 emit_insn (gen_x86_sahf_1 (reg));
31418 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31419 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31423 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31425 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31426 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
31429 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
31430 gen_rtx_LABEL_REF (VOIDmode, label),
31432 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
31434 emit_jump_insn (temp);
31435 predict_jump (REG_BR_PROB_BASE * 10 / 100);
31438 /* Output code to perform a log1p XFmode calculation. */
31440 void ix86_emit_i387_log1p (rtx op0, rtx op1)
31442 rtx label1 = gen_label_rtx ();
31443 rtx label2 = gen_label_rtx ();
31445 rtx tmp = gen_reg_rtx (XFmode);
31446 rtx tmp2 = gen_reg_rtx (XFmode);
31449 emit_insn (gen_absxf2 (tmp, op1));
31450 test = gen_rtx_GE (VOIDmode, tmp,
31451 CONST_DOUBLE_FROM_REAL_VALUE (
31452 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
31454 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
31456 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31457 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
31458 emit_jump (label2);
31460 emit_label (label1);
31461 emit_move_insn (tmp, CONST1_RTX (XFmode));
31462 emit_insn (gen_addxf3 (tmp, op1, tmp));
31463 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31464 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
31466 emit_label (label2);
31469 /* Output code to perform a Newton-Rhapson approximation of a single precision
31470 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
31472 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
31474 rtx x0, x1, e0, e1;
31476 x0 = gen_reg_rtx (mode);
31477 e0 = gen_reg_rtx (mode);
31478 e1 = gen_reg_rtx (mode);
31479 x1 = gen_reg_rtx (mode);
31481 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
31483 /* x0 = rcp(b) estimate */
31484 emit_insn (gen_rtx_SET (VOIDmode, x0,
31485 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
31488 emit_insn (gen_rtx_SET (VOIDmode, e0,
31489 gen_rtx_MULT (mode, x0, b)));
31492 emit_insn (gen_rtx_SET (VOIDmode, e0,
31493 gen_rtx_MULT (mode, x0, e0)));
31496 emit_insn (gen_rtx_SET (VOIDmode, e1,
31497 gen_rtx_PLUS (mode, x0, x0)));
31500 emit_insn (gen_rtx_SET (VOIDmode, x1,
31501 gen_rtx_MINUS (mode, e1, e0)));
31504 emit_insn (gen_rtx_SET (VOIDmode, res,
31505 gen_rtx_MULT (mode, a, x1)));
31508 /* Output code to perform a Newton-Rhapson approximation of a
31509 single precision floating point [reciprocal] square root. */
31511 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
31514 rtx x0, e0, e1, e2, e3, mthree, mhalf;
31517 x0 = gen_reg_rtx (mode);
31518 e0 = gen_reg_rtx (mode);
31519 e1 = gen_reg_rtx (mode);
31520 e2 = gen_reg_rtx (mode);
31521 e3 = gen_reg_rtx (mode);
31523 real_from_integer (&r, VOIDmode, -3, -1, 0);
31524 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31526 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
31527 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31529 if (VECTOR_MODE_P (mode))
31531 mthree = ix86_build_const_vector (mode, true, mthree);
31532 mhalf = ix86_build_const_vector (mode, true, mhalf);
31535 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
31536 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
31538 /* x0 = rsqrt(a) estimate */
31539 emit_insn (gen_rtx_SET (VOIDmode, x0,
31540 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
31543 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
31548 zero = gen_reg_rtx (mode);
31549 mask = gen_reg_rtx (mode);
31551 zero = force_reg (mode, CONST0_RTX(mode));
31552 emit_insn (gen_rtx_SET (VOIDmode, mask,
31553 gen_rtx_NE (mode, zero, a)));
31555 emit_insn (gen_rtx_SET (VOIDmode, x0,
31556 gen_rtx_AND (mode, x0, mask)));
31560 emit_insn (gen_rtx_SET (VOIDmode, e0,
31561 gen_rtx_MULT (mode, x0, a)));
31563 emit_insn (gen_rtx_SET (VOIDmode, e1,
31564 gen_rtx_MULT (mode, e0, x0)));
31567 mthree = force_reg (mode, mthree);
31568 emit_insn (gen_rtx_SET (VOIDmode, e2,
31569 gen_rtx_PLUS (mode, e1, mthree)));
31571 mhalf = force_reg (mode, mhalf);
31573 /* e3 = -.5 * x0 */
31574 emit_insn (gen_rtx_SET (VOIDmode, e3,
31575 gen_rtx_MULT (mode, x0, mhalf)));
31577 /* e3 = -.5 * e0 */
31578 emit_insn (gen_rtx_SET (VOIDmode, e3,
31579 gen_rtx_MULT (mode, e0, mhalf)));
31580 /* ret = e2 * e3 */
31581 emit_insn (gen_rtx_SET (VOIDmode, res,
31582 gen_rtx_MULT (mode, e2, e3)));
31585 #ifdef TARGET_SOLARIS
31586 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
31589 i386_solaris_elf_named_section (const char *name, unsigned int flags,
31592 /* With Binutils 2.15, the "@unwind" marker must be specified on
31593 every occurrence of the ".eh_frame" section, not just the first
31596 && strcmp (name, ".eh_frame") == 0)
31598 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
31599 flags & SECTION_WRITE ? "aw" : "a");
31604 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
31606 solaris_elf_asm_comdat_section (name, flags, decl);
31611 default_elf_asm_named_section (name, flags, decl);
31613 #endif /* TARGET_SOLARIS */
31615 /* Return the mangling of TYPE if it is an extended fundamental type. */
31617 static const char *
31618 ix86_mangle_type (const_tree type)
31620 type = TYPE_MAIN_VARIANT (type);
31622 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
31623 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
31626 switch (TYPE_MODE (type))
31629 /* __float128 is "g". */
31632 /* "long double" or __float80 is "e". */
31639 /* For 32-bit code we can save PIC register setup by using
31640 __stack_chk_fail_local hidden function instead of calling
31641 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
31642 register, so it is better to call __stack_chk_fail directly. */
31644 static tree ATTRIBUTE_UNUSED
31645 ix86_stack_protect_fail (void)
31647 return TARGET_64BIT
31648 ? default_external_stack_protect_fail ()
31649 : default_hidden_stack_protect_fail ();
31652 /* Select a format to encode pointers in exception handling data. CODE
31653 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
31654 true if the symbol may be affected by dynamic relocations.
31656 ??? All x86 object file formats are capable of representing this.
31657 After all, the relocation needed is the same as for the call insn.
31658 Whether or not a particular assembler allows us to enter such, I
31659 guess we'll have to see. */
31661 asm_preferred_eh_data_format (int code, int global)
31665 int type = DW_EH_PE_sdata8;
31667 || ix86_cmodel == CM_SMALL_PIC
31668 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
31669 type = DW_EH_PE_sdata4;
31670 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
31672 if (ix86_cmodel == CM_SMALL
31673 || (ix86_cmodel == CM_MEDIUM && code))
31674 return DW_EH_PE_udata4;
31675 return DW_EH_PE_absptr;
31678 /* Expand copysign from SIGN to the positive value ABS_VALUE
31679 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
31682 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
31684 enum machine_mode mode = GET_MODE (sign);
31685 rtx sgn = gen_reg_rtx (mode);
31686 if (mask == NULL_RTX)
31688 enum machine_mode vmode;
31690 if (mode == SFmode)
31692 else if (mode == DFmode)
31697 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
31698 if (!VECTOR_MODE_P (mode))
31700 /* We need to generate a scalar mode mask in this case. */
31701 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31702 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31703 mask = gen_reg_rtx (mode);
31704 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31708 mask = gen_rtx_NOT (mode, mask);
31709 emit_insn (gen_rtx_SET (VOIDmode, sgn,
31710 gen_rtx_AND (mode, mask, sign)));
31711 emit_insn (gen_rtx_SET (VOIDmode, result,
31712 gen_rtx_IOR (mode, abs_value, sgn)));
31715 /* Expand fabs (OP0) and return a new rtx that holds the result. The
31716 mask for masking out the sign-bit is stored in *SMASK, if that is
31719 ix86_expand_sse_fabs (rtx op0, rtx *smask)
31721 enum machine_mode vmode, mode = GET_MODE (op0);
31724 xa = gen_reg_rtx (mode);
31725 if (mode == SFmode)
31727 else if (mode == DFmode)
31731 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
31732 if (!VECTOR_MODE_P (mode))
31734 /* We need to generate a scalar mode mask in this case. */
31735 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31736 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31737 mask = gen_reg_rtx (mode);
31738 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31740 emit_insn (gen_rtx_SET (VOIDmode, xa,
31741 gen_rtx_AND (mode, op0, mask)));
31749 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
31750 swapping the operands if SWAP_OPERANDS is true. The expanded
31751 code is a forward jump to a newly created label in case the
31752 comparison is true. The generated label rtx is returned. */
31754 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
31755 bool swap_operands)
31766 label = gen_label_rtx ();
31767 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
31768 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31769 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
31770 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
31771 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
31772 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
31773 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
31774 JUMP_LABEL (tmp) = label;
31779 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
31780 using comparison code CODE. Operands are swapped for the comparison if
31781 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
31783 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
31784 bool swap_operands)
31786 rtx (*insn)(rtx, rtx, rtx, rtx);
31787 enum machine_mode mode = GET_MODE (op0);
31788 rtx mask = gen_reg_rtx (mode);
31797 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
31799 emit_insn (insn (mask, op0, op1,
31800 gen_rtx_fmt_ee (code, mode, op0, op1)));
31804 /* Generate and return a rtx of mode MODE for 2**n where n is the number
31805 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
31807 ix86_gen_TWO52 (enum machine_mode mode)
31809 REAL_VALUE_TYPE TWO52r;
31812 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
31813 TWO52 = const_double_from_real_value (TWO52r, mode);
31814 TWO52 = force_reg (mode, TWO52);
31819 /* Expand SSE sequence for computing lround from OP1 storing
31822 ix86_expand_lround (rtx op0, rtx op1)
31824 /* C code for the stuff we're doing below:
31825 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
31828 enum machine_mode mode = GET_MODE (op1);
31829 const struct real_format *fmt;
31830 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
31833 /* load nextafter (0.5, 0.0) */
31834 fmt = REAL_MODE_FORMAT (mode);
31835 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
31836 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
31838 /* adj = copysign (0.5, op1) */
31839 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
31840 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
31842 /* adj = op1 + adj */
31843 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
31845 /* op0 = (imode)adj */
31846 expand_fix (op0, adj, 0);
31849 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
31852 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
31854 /* C code for the stuff we're doing below (for do_floor):
31856 xi -= (double)xi > op1 ? 1 : 0;
31859 enum machine_mode fmode = GET_MODE (op1);
31860 enum machine_mode imode = GET_MODE (op0);
31861 rtx ireg, freg, label, tmp;
31863 /* reg = (long)op1 */
31864 ireg = gen_reg_rtx (imode);
31865 expand_fix (ireg, op1, 0);
31867 /* freg = (double)reg */
31868 freg = gen_reg_rtx (fmode);
31869 expand_float (freg, ireg, 0);
31871 /* ireg = (freg > op1) ? ireg - 1 : ireg */
31872 label = ix86_expand_sse_compare_and_jump (UNLE,
31873 freg, op1, !do_floor);
31874 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
31875 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
31876 emit_move_insn (ireg, tmp);
31878 emit_label (label);
31879 LABEL_NUSES (label) = 1;
31881 emit_move_insn (op0, ireg);
31884 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
31885 result in OPERAND0. */
31887 ix86_expand_rint (rtx operand0, rtx operand1)
31889 /* C code for the stuff we're doing below:
31890 xa = fabs (operand1);
31891 if (!isless (xa, 2**52))
31893 xa = xa + 2**52 - 2**52;
31894 return copysign (xa, operand1);
31896 enum machine_mode mode = GET_MODE (operand0);
31897 rtx res, xa, label, TWO52, mask;
31899 res = gen_reg_rtx (mode);
31900 emit_move_insn (res, operand1);
31902 /* xa = abs (operand1) */
31903 xa = ix86_expand_sse_fabs (res, &mask);
31905 /* if (!isless (xa, TWO52)) goto label; */
31906 TWO52 = ix86_gen_TWO52 (mode);
31907 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31909 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
31910 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
31912 ix86_sse_copysign_to_positive (res, xa, res, mask);
31914 emit_label (label);
31915 LABEL_NUSES (label) = 1;
31917 emit_move_insn (operand0, res);
31920 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
31923 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
31925 /* C code for the stuff we expand below.
31926 double xa = fabs (x), x2;
31927 if (!isless (xa, TWO52))
31929 xa = xa + TWO52 - TWO52;
31930 x2 = copysign (xa, x);
31939 enum machine_mode mode = GET_MODE (operand0);
31940 rtx xa, TWO52, tmp, label, one, res, mask;
31942 TWO52 = ix86_gen_TWO52 (mode);
31944 /* Temporary for holding the result, initialized to the input
31945 operand to ease control flow. */
31946 res = gen_reg_rtx (mode);
31947 emit_move_insn (res, operand1);
31949 /* xa = abs (operand1) */
31950 xa = ix86_expand_sse_fabs (res, &mask);
31952 /* if (!isless (xa, TWO52)) goto label; */
31953 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31955 /* xa = xa + TWO52 - TWO52; */
31956 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
31957 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
31959 /* xa = copysign (xa, operand1) */
31960 ix86_sse_copysign_to_positive (xa, xa, res, mask);
31962 /* generate 1.0 or -1.0 */
31963 one = force_reg (mode,
31964 const_double_from_real_value (do_floor
31965 ? dconst1 : dconstm1, mode));
31967 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
31968 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
31969 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31970 gen_rtx_AND (mode, one, tmp)));
31971 /* We always need to subtract here to preserve signed zero. */
31972 tmp = expand_simple_binop (mode, MINUS,
31973 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
31974 emit_move_insn (res, tmp);
31976 emit_label (label);
31977 LABEL_NUSES (label) = 1;
31979 emit_move_insn (operand0, res);
31982 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
31985 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
31987 /* C code for the stuff we expand below.
31988 double xa = fabs (x), x2;
31989 if (!isless (xa, TWO52))
31991 x2 = (double)(long)x;
31998 if (HONOR_SIGNED_ZEROS (mode))
31999 return copysign (x2, x);
32002 enum machine_mode mode = GET_MODE (operand0);
32003 rtx xa, xi, TWO52, tmp, label, one, res, mask;
32005 TWO52 = ix86_gen_TWO52 (mode);
32007 /* Temporary for holding the result, initialized to the input
32008 operand to ease control flow. */
32009 res = gen_reg_rtx (mode);
32010 emit_move_insn (res, operand1);
32012 /* xa = abs (operand1) */
32013 xa = ix86_expand_sse_fabs (res, &mask);
32015 /* if (!isless (xa, TWO52)) goto label; */
32016 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32018 /* xa = (double)(long)x */
32019 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32020 expand_fix (xi, res, 0);
32021 expand_float (xa, xi, 0);
32024 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32026 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32027 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32028 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32029 gen_rtx_AND (mode, one, tmp)));
32030 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
32031 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32032 emit_move_insn (res, tmp);
32034 if (HONOR_SIGNED_ZEROS (mode))
32035 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32037 emit_label (label);
32038 LABEL_NUSES (label) = 1;
32040 emit_move_insn (operand0, res);
32043 /* Expand SSE sequence for computing round from OPERAND1 storing
32044 into OPERAND0. Sequence that works without relying on DImode truncation
32045 via cvttsd2siq that is only available on 64bit targets. */
32047 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
32049 /* C code for the stuff we expand below.
32050 double xa = fabs (x), xa2, x2;
32051 if (!isless (xa, TWO52))
32053 Using the absolute value and copying back sign makes
32054 -0.0 -> -0.0 correct.
32055 xa2 = xa + TWO52 - TWO52;
32060 else if (dxa > 0.5)
32062 x2 = copysign (xa2, x);
32065 enum machine_mode mode = GET_MODE (operand0);
32066 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
32068 TWO52 = ix86_gen_TWO52 (mode);
32070 /* Temporary for holding the result, initialized to the input
32071 operand to ease control flow. */
32072 res = gen_reg_rtx (mode);
32073 emit_move_insn (res, operand1);
32075 /* xa = abs (operand1) */
32076 xa = ix86_expand_sse_fabs (res, &mask);
32078 /* if (!isless (xa, TWO52)) goto label; */
32079 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32081 /* xa2 = xa + TWO52 - TWO52; */
32082 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32083 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
32085 /* dxa = xa2 - xa; */
32086 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
32088 /* generate 0.5, 1.0 and -0.5 */
32089 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
32090 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
32091 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
32095 tmp = gen_reg_rtx (mode);
32096 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32097 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
32098 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32099 gen_rtx_AND (mode, one, tmp)));
32100 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32101 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32102 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
32103 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32104 gen_rtx_AND (mode, one, tmp)));
32105 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32107 /* res = copysign (xa2, operand1) */
32108 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
32110 emit_label (label);
32111 LABEL_NUSES (label) = 1;
32113 emit_move_insn (operand0, res);
32116 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32119 ix86_expand_trunc (rtx operand0, rtx operand1)
32121 /* C code for SSE variant we expand below.
32122 double xa = fabs (x), x2;
32123 if (!isless (xa, TWO52))
32125 x2 = (double)(long)x;
32126 if (HONOR_SIGNED_ZEROS (mode))
32127 return copysign (x2, x);
32130 enum machine_mode mode = GET_MODE (operand0);
32131 rtx xa, xi, TWO52, label, res, mask;
32133 TWO52 = ix86_gen_TWO52 (mode);
32135 /* Temporary for holding the result, initialized to the input
32136 operand to ease control flow. */
32137 res = gen_reg_rtx (mode);
32138 emit_move_insn (res, operand1);
32140 /* xa = abs (operand1) */
32141 xa = ix86_expand_sse_fabs (res, &mask);
32143 /* if (!isless (xa, TWO52)) goto label; */
32144 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32146 /* x = (double)(long)x */
32147 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32148 expand_fix (xi, res, 0);
32149 expand_float (res, xi, 0);
32151 if (HONOR_SIGNED_ZEROS (mode))
32152 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32154 emit_label (label);
32155 LABEL_NUSES (label) = 1;
32157 emit_move_insn (operand0, res);
32160 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32163 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
32165 enum machine_mode mode = GET_MODE (operand0);
32166 rtx xa, mask, TWO52, label, one, res, smask, tmp;
32168 /* C code for SSE variant we expand below.
32169 double xa = fabs (x), x2;
32170 if (!isless (xa, TWO52))
32172 xa2 = xa + TWO52 - TWO52;
32176 x2 = copysign (xa2, x);
32180 TWO52 = ix86_gen_TWO52 (mode);
32182 /* Temporary for holding the result, initialized to the input
32183 operand to ease control flow. */
32184 res = gen_reg_rtx (mode);
32185 emit_move_insn (res, operand1);
32187 /* xa = abs (operand1) */
32188 xa = ix86_expand_sse_fabs (res, &smask);
32190 /* if (!isless (xa, TWO52)) goto label; */
32191 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32193 /* res = xa + TWO52 - TWO52; */
32194 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32195 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
32196 emit_move_insn (res, tmp);
32199 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32201 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32202 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
32203 emit_insn (gen_rtx_SET (VOIDmode, mask,
32204 gen_rtx_AND (mode, mask, one)));
32205 tmp = expand_simple_binop (mode, MINUS,
32206 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
32207 emit_move_insn (res, tmp);
32209 /* res = copysign (res, operand1) */
32210 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
32212 emit_label (label);
32213 LABEL_NUSES (label) = 1;
32215 emit_move_insn (operand0, res);
32218 /* Expand SSE sequence for computing round from OPERAND1 storing
32221 ix86_expand_round (rtx operand0, rtx operand1)
32223 /* C code for the stuff we're doing below:
32224 double xa = fabs (x);
32225 if (!isless (xa, TWO52))
32227 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32228 return copysign (xa, x);
32230 enum machine_mode mode = GET_MODE (operand0);
32231 rtx res, TWO52, xa, label, xi, half, mask;
32232 const struct real_format *fmt;
32233 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32235 /* Temporary for holding the result, initialized to the input
32236 operand to ease control flow. */
32237 res = gen_reg_rtx (mode);
32238 emit_move_insn (res, operand1);
32240 TWO52 = ix86_gen_TWO52 (mode);
32241 xa = ix86_expand_sse_fabs (res, &mask);
32242 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32244 /* load nextafter (0.5, 0.0) */
32245 fmt = REAL_MODE_FORMAT (mode);
32246 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32247 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32249 /* xa = xa + 0.5 */
32250 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32251 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
32253 /* xa = (double)(int64_t)xa */
32254 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32255 expand_fix (xi, xa, 0);
32256 expand_float (xa, xi, 0);
32258 /* res = copysign (xa, operand1) */
32259 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
32261 emit_label (label);
32262 LABEL_NUSES (label) = 1;
32264 emit_move_insn (operand0, res);
32268 /* Table of valid machine attributes. */
32269 static const struct attribute_spec ix86_attribute_table[] =
32271 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
32272 affects_type_identity } */
32273 /* Stdcall attribute says callee is responsible for popping arguments
32274 if they are not variable. */
32275 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32277 /* Fastcall attribute says callee is responsible for popping arguments
32278 if they are not variable. */
32279 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32281 /* Thiscall attribute says callee is responsible for popping arguments
32282 if they are not variable. */
32283 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32285 /* Cdecl attribute says the callee is a normal C declaration */
32286 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32288 /* Regparm attribute specifies how many integer arguments are to be
32289 passed in registers. */
32290 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
32292 /* Sseregparm attribute says we are using x86_64 calling conventions
32293 for FP arguments. */
32294 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32296 /* force_align_arg_pointer says this function realigns the stack at entry. */
32297 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
32298 false, true, true, ix86_handle_cconv_attribute, false },
32299 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32300 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
32301 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
32302 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
32305 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32307 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32309 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32310 SUBTARGET_ATTRIBUTE_TABLE,
32312 /* ms_abi and sysv_abi calling convention function attributes. */
32313 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32314 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32315 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
32317 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32318 ix86_handle_callee_pop_aggregate_return, true },
32320 { NULL, 0, 0, false, false, false, NULL, false }
32323 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32325 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32326 tree vectype ATTRIBUTE_UNUSED,
32327 int misalign ATTRIBUTE_UNUSED)
32329 switch (type_of_cost)
32332 return ix86_cost->scalar_stmt_cost;
32335 return ix86_cost->scalar_load_cost;
32338 return ix86_cost->scalar_store_cost;
32341 return ix86_cost->vec_stmt_cost;
32344 return ix86_cost->vec_align_load_cost;
32347 return ix86_cost->vec_store_cost;
32349 case vec_to_scalar:
32350 return ix86_cost->vec_to_scalar_cost;
32352 case scalar_to_vec:
32353 return ix86_cost->scalar_to_vec_cost;
32355 case unaligned_load:
32356 case unaligned_store:
32357 return ix86_cost->vec_unalign_load_cost;
32359 case cond_branch_taken:
32360 return ix86_cost->cond_taken_branch_cost;
32362 case cond_branch_not_taken:
32363 return ix86_cost->cond_not_taken_branch_cost;
32369 gcc_unreachable ();
32374 /* Implement targetm.vectorize.builtin_vec_perm. */
32377 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32379 tree itype = TREE_TYPE (vec_type);
32380 bool u = TYPE_UNSIGNED (itype);
32381 enum machine_mode vmode = TYPE_MODE (vec_type);
32382 enum ix86_builtins fcode;
32383 bool ok = TARGET_SSE2;
32389 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32392 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32394 itype = ix86_get_builtin_type (IX86_BT_DI);
32399 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32403 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32405 itype = ix86_get_builtin_type (IX86_BT_SI);
32409 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32412 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32415 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32418 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32428 *mask_type = itype;
32429 return ix86_builtins[(int) fcode];
32432 /* Return a vector mode with twice as many elements as VMODE. */
32433 /* ??? Consider moving this to a table generated by genmodes.c. */
32435 static enum machine_mode
32436 doublesize_vector_mode (enum machine_mode vmode)
32440 case V2SFmode: return V4SFmode;
32441 case V1DImode: return V2DImode;
32442 case V2SImode: return V4SImode;
32443 case V4HImode: return V8HImode;
32444 case V8QImode: return V16QImode;
32446 case V2DFmode: return V4DFmode;
32447 case V4SFmode: return V8SFmode;
32448 case V2DImode: return V4DImode;
32449 case V4SImode: return V8SImode;
32450 case V8HImode: return V16HImode;
32451 case V16QImode: return V32QImode;
32453 case V4DFmode: return V8DFmode;
32454 case V8SFmode: return V16SFmode;
32455 case V4DImode: return V8DImode;
32456 case V8SImode: return V16SImode;
32457 case V16HImode: return V32HImode;
32458 case V32QImode: return V64QImode;
32461 gcc_unreachable ();
32465 /* Construct (set target (vec_select op0 (parallel perm))) and
32466 return true if that's a valid instruction in the active ISA. */
32469 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
32471 rtx rperm[MAX_VECT_LEN], x;
32474 for (i = 0; i < nelt; ++i)
32475 rperm[i] = GEN_INT (perm[i]);
32477 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
32478 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
32479 x = gen_rtx_SET (VOIDmode, target, x);
32482 if (recog_memoized (x) < 0)
32490 /* Similar, but generate a vec_concat from op0 and op1 as well. */
32493 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
32494 const unsigned char *perm, unsigned nelt)
32496 enum machine_mode v2mode;
32499 v2mode = doublesize_vector_mode (GET_MODE (op0));
32500 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
32501 return expand_vselect (target, x, perm, nelt);
32504 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32505 in terms of blendp[sd] / pblendw / pblendvb. */
32508 expand_vec_perm_blend (struct expand_vec_perm_d *d)
32510 enum machine_mode vmode = d->vmode;
32511 unsigned i, mask, nelt = d->nelt;
32512 rtx target, op0, op1, x;
32514 if (!TARGET_SSE4_1 || d->op0 == d->op1)
32516 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
32519 /* This is a blend, not a permute. Elements must stay in their
32520 respective lanes. */
32521 for (i = 0; i < nelt; ++i)
32523 unsigned e = d->perm[i];
32524 if (!(e == i || e == i + nelt))
32531 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
32532 decision should be extracted elsewhere, so that we only try that
32533 sequence once all budget==3 options have been tried. */
32535 /* For bytes, see if bytes move in pairs so we can use pblendw with
32536 an immediate argument, rather than pblendvb with a vector argument. */
32537 if (vmode == V16QImode)
32539 bool pblendw_ok = true;
32540 for (i = 0; i < 16 && pblendw_ok; i += 2)
32541 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
32545 rtx rperm[16], vperm;
32547 for (i = 0; i < nelt; ++i)
32548 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
32550 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32551 vperm = force_reg (V16QImode, vperm);
32553 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
32558 target = d->target;
32570 for (i = 0; i < nelt; ++i)
32571 mask |= (d->perm[i] >= nelt) << i;
32575 for (i = 0; i < 2; ++i)
32576 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
32580 for (i = 0; i < 4; ++i)
32581 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
32585 for (i = 0; i < 8; ++i)
32586 mask |= (d->perm[i * 2] >= 16) << i;
32590 target = gen_lowpart (vmode, target);
32591 op0 = gen_lowpart (vmode, op0);
32592 op1 = gen_lowpart (vmode, op1);
32596 gcc_unreachable ();
32599 /* This matches five different patterns with the different modes. */
32600 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
32601 x = gen_rtx_SET (VOIDmode, target, x);
32607 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32608 in terms of the variable form of vpermilps.
32610 Note that we will have already failed the immediate input vpermilps,
32611 which requires that the high and low part shuffle be identical; the
32612 variable form doesn't require that. */
32615 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
32617 rtx rperm[8], vperm;
32620 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
32623 /* We can only permute within the 128-bit lane. */
32624 for (i = 0; i < 8; ++i)
32626 unsigned e = d->perm[i];
32627 if (i < 4 ? e >= 4 : e < 4)
32634 for (i = 0; i < 8; ++i)
32636 unsigned e = d->perm[i];
32638 /* Within each 128-bit lane, the elements of op0 are numbered
32639 from 0 and the elements of op1 are numbered from 4. */
32645 rperm[i] = GEN_INT (e);
32648 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
32649 vperm = force_reg (V8SImode, vperm);
32650 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
32655 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32656 in terms of pshufb or vpperm. */
32659 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
32661 unsigned i, nelt, eltsz;
32662 rtx rperm[16], vperm, target, op0, op1;
32664 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
32666 if (GET_MODE_SIZE (d->vmode) != 16)
32673 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
32675 for (i = 0; i < nelt; ++i)
32677 unsigned j, e = d->perm[i];
32678 for (j = 0; j < eltsz; ++j)
32679 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
32682 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32683 vperm = force_reg (V16QImode, vperm);
32685 target = gen_lowpart (V16QImode, d->target);
32686 op0 = gen_lowpart (V16QImode, d->op0);
32687 if (d->op0 == d->op1)
32688 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
32691 op1 = gen_lowpart (V16QImode, d->op1);
32692 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
32698 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
32699 in a single instruction. */
32702 expand_vec_perm_1 (struct expand_vec_perm_d *d)
32704 unsigned i, nelt = d->nelt;
32705 unsigned char perm2[MAX_VECT_LEN];
32707 /* Check plain VEC_SELECT first, because AVX has instructions that could
32708 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
32709 input where SEL+CONCAT may not. */
32710 if (d->op0 == d->op1)
32712 int mask = nelt - 1;
32714 for (i = 0; i < nelt; i++)
32715 perm2[i] = d->perm[i] & mask;
32717 if (expand_vselect (d->target, d->op0, perm2, nelt))
32720 /* There are plenty of patterns in sse.md that are written for
32721 SEL+CONCAT and are not replicated for a single op. Perhaps
32722 that should be changed, to avoid the nastiness here. */
32724 /* Recognize interleave style patterns, which means incrementing
32725 every other permutation operand. */
32726 for (i = 0; i < nelt; i += 2)
32728 perm2[i] = d->perm[i] & mask;
32729 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
32731 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32734 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
32737 for (i = 0; i < nelt; i += 4)
32739 perm2[i + 0] = d->perm[i + 0] & mask;
32740 perm2[i + 1] = d->perm[i + 1] & mask;
32741 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
32742 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
32745 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32750 /* Finally, try the fully general two operand permute. */
32751 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
32754 /* Recognize interleave style patterns with reversed operands. */
32755 if (d->op0 != d->op1)
32757 for (i = 0; i < nelt; ++i)
32759 unsigned e = d->perm[i];
32767 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
32771 /* Try the SSE4.1 blend variable merge instructions. */
32772 if (expand_vec_perm_blend (d))
32775 /* Try one of the AVX vpermil variable permutations. */
32776 if (expand_vec_perm_vpermil (d))
32779 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
32780 if (expand_vec_perm_pshufb (d))
32786 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32787 in terms of a pair of pshuflw + pshufhw instructions. */
32790 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
32792 unsigned char perm2[MAX_VECT_LEN];
32796 if (d->vmode != V8HImode || d->op0 != d->op1)
32799 /* The two permutations only operate in 64-bit lanes. */
32800 for (i = 0; i < 4; ++i)
32801 if (d->perm[i] >= 4)
32803 for (i = 4; i < 8; ++i)
32804 if (d->perm[i] < 4)
32810 /* Emit the pshuflw. */
32811 memcpy (perm2, d->perm, 4);
32812 for (i = 4; i < 8; ++i)
32814 ok = expand_vselect (d->target, d->op0, perm2, 8);
32817 /* Emit the pshufhw. */
32818 memcpy (perm2 + 4, d->perm + 4, 4);
32819 for (i = 0; i < 4; ++i)
32821 ok = expand_vselect (d->target, d->target, perm2, 8);
32827 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
32828 the permutation using the SSSE3 palignr instruction. This succeeds
32829 when all of the elements in PERM fit within one vector and we merely
32830 need to shift them down so that a single vector permutation has a
32831 chance to succeed. */
32834 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
32836 unsigned i, nelt = d->nelt;
32841 /* Even with AVX, palignr only operates on 128-bit vectors. */
32842 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
32845 min = nelt, max = 0;
32846 for (i = 0; i < nelt; ++i)
32848 unsigned e = d->perm[i];
32854 if (min == 0 || max - min >= nelt)
32857 /* Given that we have SSSE3, we know we'll be able to implement the
32858 single operand permutation after the palignr with pshufb. */
32862 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
32863 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
32864 gen_lowpart (TImode, d->op1),
32865 gen_lowpart (TImode, d->op0), shift));
32867 d->op0 = d->op1 = d->target;
32870 for (i = 0; i < nelt; ++i)
32872 unsigned e = d->perm[i] - min;
32878 /* Test for the degenerate case where the alignment by itself
32879 produces the desired permutation. */
32883 ok = expand_vec_perm_1 (d);
32889 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
32890 a two vector permutation into a single vector permutation by using
32891 an interleave operation to merge the vectors. */
32894 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
32896 struct expand_vec_perm_d dremap, dfinal;
32897 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
32898 unsigned contents, h1, h2, h3, h4;
32899 unsigned char remap[2 * MAX_VECT_LEN];
32903 if (d->op0 == d->op1)
32906 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
32907 lanes. We can use similar techniques with the vperm2f128 instruction,
32908 but it requires slightly different logic. */
32909 if (GET_MODE_SIZE (d->vmode) != 16)
32912 /* Examine from whence the elements come. */
32914 for (i = 0; i < nelt; ++i)
32915 contents |= 1u << d->perm[i];
32917 /* Split the two input vectors into 4 halves. */
32918 h1 = (1u << nelt2) - 1;
32923 memset (remap, 0xff, sizeof (remap));
32926 /* If the elements from the low halves use interleave low, and similarly
32927 for interleave high. If the elements are from mis-matched halves, we
32928 can use shufps for V4SF/V4SI or do a DImode shuffle. */
32929 if ((contents & (h1 | h3)) == contents)
32931 for (i = 0; i < nelt2; ++i)
32934 remap[i + nelt] = i * 2 + 1;
32935 dremap.perm[i * 2] = i;
32936 dremap.perm[i * 2 + 1] = i + nelt;
32939 else if ((contents & (h2 | h4)) == contents)
32941 for (i = 0; i < nelt2; ++i)
32943 remap[i + nelt2] = i * 2;
32944 remap[i + nelt + nelt2] = i * 2 + 1;
32945 dremap.perm[i * 2] = i + nelt2;
32946 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
32949 else if ((contents & (h1 | h4)) == contents)
32951 for (i = 0; i < nelt2; ++i)
32954 remap[i + nelt + nelt2] = i + nelt2;
32955 dremap.perm[i] = i;
32956 dremap.perm[i + nelt2] = i + nelt + nelt2;
32960 dremap.vmode = V2DImode;
32962 dremap.perm[0] = 0;
32963 dremap.perm[1] = 3;
32966 else if ((contents & (h2 | h3)) == contents)
32968 for (i = 0; i < nelt2; ++i)
32970 remap[i + nelt2] = i;
32971 remap[i + nelt] = i + nelt2;
32972 dremap.perm[i] = i + nelt2;
32973 dremap.perm[i + nelt2] = i + nelt;
32977 dremap.vmode = V2DImode;
32979 dremap.perm[0] = 1;
32980 dremap.perm[1] = 2;
32986 /* Use the remapping array set up above to move the elements from their
32987 swizzled locations into their final destinations. */
32989 for (i = 0; i < nelt; ++i)
32991 unsigned e = remap[d->perm[i]];
32992 gcc_assert (e < nelt);
32993 dfinal.perm[i] = e;
32995 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
32996 dfinal.op1 = dfinal.op0;
32997 dremap.target = dfinal.op0;
32999 /* Test if the final remap can be done with a single insn. For V4SFmode or
33000 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
33002 ok = expand_vec_perm_1 (&dfinal);
33003 seq = get_insns ();
33009 if (dremap.vmode != dfinal.vmode)
33011 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
33012 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
33013 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
33016 ok = expand_vec_perm_1 (&dremap);
33023 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33024 permutation with two pshufb insns and an ior. We should have already
33025 failed all two instruction sequences. */
33028 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
33030 rtx rperm[2][16], vperm, l, h, op, m128;
33031 unsigned int i, nelt, eltsz;
33033 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33035 gcc_assert (d->op0 != d->op1);
33038 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33040 /* Generate two permutation masks. If the required element is within
33041 the given vector it is shuffled into the proper lane. If the required
33042 element is in the other vector, force a zero into the lane by setting
33043 bit 7 in the permutation mask. */
33044 m128 = GEN_INT (-128);
33045 for (i = 0; i < nelt; ++i)
33047 unsigned j, e = d->perm[i];
33048 unsigned which = (e >= nelt);
33052 for (j = 0; j < eltsz; ++j)
33054 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
33055 rperm[1-which][i*eltsz + j] = m128;
33059 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
33060 vperm = force_reg (V16QImode, vperm);
33062 l = gen_reg_rtx (V16QImode);
33063 op = gen_lowpart (V16QImode, d->op0);
33064 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
33066 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
33067 vperm = force_reg (V16QImode, vperm);
33069 h = gen_reg_rtx (V16QImode);
33070 op = gen_lowpart (V16QImode, d->op1);
33071 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
33073 op = gen_lowpart (V16QImode, d->target);
33074 emit_insn (gen_iorv16qi3 (op, l, h));
33079 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33080 and extract-odd permutations. */
33083 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
33090 t1 = gen_reg_rtx (V4DFmode);
33091 t2 = gen_reg_rtx (V4DFmode);
33093 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33094 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
33095 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
33097 /* Now an unpck[lh]pd will produce the result required. */
33099 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
33101 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
33107 int mask = odd ? 0xdd : 0x88;
33109 t1 = gen_reg_rtx (V8SFmode);
33110 t2 = gen_reg_rtx (V8SFmode);
33111 t3 = gen_reg_rtx (V8SFmode);
33113 /* Shuffle within the 128-bit lanes to produce:
33114 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33115 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
33118 /* Shuffle the lanes around to produce:
33119 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33120 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
33123 /* Shuffle within the 128-bit lanes to produce:
33124 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33125 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
33127 /* Shuffle within the 128-bit lanes to produce:
33128 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33129 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
33131 /* Shuffle the lanes around to produce:
33132 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33133 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
33142 /* These are always directly implementable by expand_vec_perm_1. */
33143 gcc_unreachable ();
33147 return expand_vec_perm_pshufb2 (d);
33150 /* We need 2*log2(N)-1 operations to achieve odd/even
33151 with interleave. */
33152 t1 = gen_reg_rtx (V8HImode);
33153 t2 = gen_reg_rtx (V8HImode);
33154 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
33155 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
33156 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
33157 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
33159 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
33161 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
33168 return expand_vec_perm_pshufb2 (d);
33171 t1 = gen_reg_rtx (V16QImode);
33172 t2 = gen_reg_rtx (V16QImode);
33173 t3 = gen_reg_rtx (V16QImode);
33174 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
33175 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
33176 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
33177 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
33178 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
33179 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
33181 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
33183 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
33189 gcc_unreachable ();
33195 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33196 extract-even and extract-odd permutations. */
33199 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
33201 unsigned i, odd, nelt = d->nelt;
33204 if (odd != 0 && odd != 1)
33207 for (i = 1; i < nelt; ++i)
33208 if (d->perm[i] != 2 * i + odd)
33211 return expand_vec_perm_even_odd_1 (d, odd);
33214 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33215 permutations. We assume that expand_vec_perm_1 has already failed. */
33218 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
33220 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
33221 enum machine_mode vmode = d->vmode;
33222 unsigned char perm2[4];
33230 /* These are special-cased in sse.md so that we can optionally
33231 use the vbroadcast instruction. They expand to two insns
33232 if the input happens to be in a register. */
33233 gcc_unreachable ();
33239 /* These are always implementable using standard shuffle patterns. */
33240 gcc_unreachable ();
33244 /* These can be implemented via interleave. We save one insn by
33245 stopping once we have promoted to V4SImode and then use pshufd. */
33248 optab otab = vec_interleave_low_optab;
33252 otab = vec_interleave_high_optab;
33257 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
33258 vmode = get_mode_wider_vector (vmode);
33259 op0 = gen_lowpart (vmode, op0);
33261 while (vmode != V4SImode);
33263 memset (perm2, elt, 4);
33264 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
33269 gcc_unreachable ();
33273 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33274 broadcast permutations. */
33277 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
33279 unsigned i, elt, nelt = d->nelt;
33281 if (d->op0 != d->op1)
33285 for (i = 1; i < nelt; ++i)
33286 if (d->perm[i] != elt)
33289 return expand_vec_perm_broadcast_1 (d);
33292 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33293 With all of the interface bits taken care of, perform the expansion
33294 in D and return true on success. */
33297 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
33299 /* Try a single instruction expansion. */
33300 if (expand_vec_perm_1 (d))
33303 /* Try sequences of two instructions. */
33305 if (expand_vec_perm_pshuflw_pshufhw (d))
33308 if (expand_vec_perm_palignr (d))
33311 if (expand_vec_perm_interleave2 (d))
33314 if (expand_vec_perm_broadcast (d))
33317 /* Try sequences of three instructions. */
33319 if (expand_vec_perm_pshufb2 (d))
33322 /* ??? Look for narrow permutations whose element orderings would
33323 allow the promotion to a wider mode. */
33325 /* ??? Look for sequences of interleave or a wider permute that place
33326 the data into the correct lanes for a half-vector shuffle like
33327 pshuf[lh]w or vpermilps. */
33329 /* ??? Look for sequences of interleave that produce the desired results.
33330 The combinatorics of punpck[lh] get pretty ugly... */
33332 if (expand_vec_perm_even_odd (d))
33338 /* Extract the values from the vector CST into the permutation array in D.
33339 Return 0 on error, 1 if all values from the permutation come from the
33340 first vector, 2 if all values from the second vector, and 3 otherwise. */
33343 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33345 tree list = TREE_VECTOR_CST_ELTS (cst);
33346 unsigned i, nelt = d->nelt;
33349 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33351 unsigned HOST_WIDE_INT e;
33353 if (!host_integerp (TREE_VALUE (list), 1))
33355 e = tree_low_cst (TREE_VALUE (list), 1);
33359 ret |= (e < nelt ? 1 : 2);
33362 gcc_assert (list == NULL);
33364 /* For all elements from second vector, fold the elements to first. */
33366 for (i = 0; i < nelt; ++i)
33367 d->perm[i] -= nelt;
33373 ix86_expand_vec_perm_builtin (tree exp)
33375 struct expand_vec_perm_d d;
33376 tree arg0, arg1, arg2;
33378 arg0 = CALL_EXPR_ARG (exp, 0);
33379 arg1 = CALL_EXPR_ARG (exp, 1);
33380 arg2 = CALL_EXPR_ARG (exp, 2);
33382 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33383 d.nelt = GET_MODE_NUNITS (d.vmode);
33384 d.testing_p = false;
33385 gcc_assert (VECTOR_MODE_P (d.vmode));
33387 if (TREE_CODE (arg2) != VECTOR_CST)
33389 error_at (EXPR_LOCATION (exp),
33390 "vector permutation requires vector constant");
33394 switch (extract_vec_perm_cst (&d, arg2))
33400 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33404 if (!operand_equal_p (arg0, arg1, 0))
33406 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33407 d.op0 = force_reg (d.vmode, d.op0);
33408 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33409 d.op1 = force_reg (d.vmode, d.op1);
33413 /* The elements of PERM do not suggest that only the first operand
33414 is used, but both operands are identical. Allow easier matching
33415 of the permutation by folding the permutation into the single
33418 unsigned i, nelt = d.nelt;
33419 for (i = 0; i < nelt; ++i)
33420 if (d.perm[i] >= nelt)
33426 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33427 d.op0 = force_reg (d.vmode, d.op0);
33432 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33433 d.op0 = force_reg (d.vmode, d.op0);
33438 d.target = gen_reg_rtx (d.vmode);
33439 if (ix86_expand_vec_perm_builtin_1 (&d))
33442 /* For compiler generated permutations, we should never got here, because
33443 the compiler should also be checking the ok hook. But since this is a
33444 builtin the user has access too, so don't abort. */
33448 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
33451 sorry ("vector permutation (%d %d %d %d)",
33452 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
33455 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
33456 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33457 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
33460 sorry ("vector permutation "
33461 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
33462 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33463 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
33464 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
33465 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
33468 gcc_unreachable ();
33471 return CONST0_RTX (d.vmode);
33474 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
33477 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
33479 struct expand_vec_perm_d d;
33483 d.vmode = TYPE_MODE (vec_type);
33484 d.nelt = GET_MODE_NUNITS (d.vmode);
33485 d.testing_p = true;
33487 /* Given sufficient ISA support we can just return true here
33488 for selected vector modes. */
33489 if (GET_MODE_SIZE (d.vmode) == 16)
33491 /* All implementable with a single vpperm insn. */
33494 /* All implementable with 2 pshufb + 1 ior. */
33497 /* All implementable with shufpd or unpck[lh]pd. */
33502 vec_mask = extract_vec_perm_cst (&d, mask);
33504 /* This hook is cannot be called in response to something that the
33505 user does (unlike the builtin expander) so we shouldn't ever see
33506 an error generated from the extract. */
33507 gcc_assert (vec_mask > 0 && vec_mask <= 3);
33508 one_vec = (vec_mask != 3);
33510 /* Implementable with shufps or pshufd. */
33511 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
33514 /* Otherwise we have to go through the motions and see if we can
33515 figure out how to generate the requested permutation. */
33516 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
33517 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
33519 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
33522 ret = ix86_expand_vec_perm_builtin_1 (&d);
33529 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
33531 struct expand_vec_perm_d d;
33537 d.vmode = GET_MODE (targ);
33538 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
33539 d.testing_p = false;
33541 for (i = 0; i < nelt; ++i)
33542 d.perm[i] = i * 2 + odd;
33544 /* We'll either be able to implement the permutation directly... */
33545 if (expand_vec_perm_1 (&d))
33548 /* ... or we use the special-case patterns. */
33549 expand_vec_perm_even_odd_1 (&d, odd);
33552 /* Expand an insert into a vector register through pinsr insn.
33553 Return true if successful. */
33556 ix86_expand_pinsr (rtx *operands)
33558 rtx dst = operands[0];
33559 rtx src = operands[3];
33561 unsigned int size = INTVAL (operands[1]);
33562 unsigned int pos = INTVAL (operands[2]);
33564 if (GET_CODE (dst) == SUBREG)
33566 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
33567 dst = SUBREG_REG (dst);
33570 if (GET_CODE (src) == SUBREG)
33571 src = SUBREG_REG (src);
33573 switch (GET_MODE (dst))
33580 enum machine_mode srcmode, dstmode;
33581 rtx (*pinsr)(rtx, rtx, rtx, rtx);
33583 srcmode = mode_for_size (size, MODE_INT, 0);
33588 if (!TARGET_SSE4_1)
33590 dstmode = V16QImode;
33591 pinsr = gen_sse4_1_pinsrb;
33597 dstmode = V8HImode;
33598 pinsr = gen_sse2_pinsrw;
33602 if (!TARGET_SSE4_1)
33604 dstmode = V4SImode;
33605 pinsr = gen_sse4_1_pinsrd;
33609 gcc_assert (TARGET_64BIT);
33610 if (!TARGET_SSE4_1)
33612 dstmode = V2DImode;
33613 pinsr = gen_sse4_1_pinsrq;
33620 dst = gen_lowpart (dstmode, dst);
33621 src = gen_lowpart (srcmode, src);
33625 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
33634 /* This function returns the calling abi specific va_list type node.
33635 It returns the FNDECL specific va_list type. */
33638 ix86_fn_abi_va_list (tree fndecl)
33641 return va_list_type_node;
33642 gcc_assert (fndecl != NULL_TREE);
33644 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
33645 return ms_va_list_type_node;
33647 return sysv_va_list_type_node;
33650 /* Returns the canonical va_list type specified by TYPE. If there
33651 is no valid TYPE provided, it return NULL_TREE. */
33654 ix86_canonical_va_list_type (tree type)
33658 /* Resolve references and pointers to va_list type. */
33659 if (TREE_CODE (type) == MEM_REF)
33660 type = TREE_TYPE (type);
33661 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
33662 type = TREE_TYPE (type);
33663 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
33664 type = TREE_TYPE (type);
33666 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
33668 wtype = va_list_type_node;
33669 gcc_assert (wtype != NULL_TREE);
33671 if (TREE_CODE (wtype) == ARRAY_TYPE)
33673 /* If va_list is an array type, the argument may have decayed
33674 to a pointer type, e.g. by being passed to another function.
33675 In that case, unwrap both types so that we can compare the
33676 underlying records. */
33677 if (TREE_CODE (htype) == ARRAY_TYPE
33678 || POINTER_TYPE_P (htype))
33680 wtype = TREE_TYPE (wtype);
33681 htype = TREE_TYPE (htype);
33684 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33685 return va_list_type_node;
33686 wtype = sysv_va_list_type_node;
33687 gcc_assert (wtype != NULL_TREE);
33689 if (TREE_CODE (wtype) == ARRAY_TYPE)
33691 /* If va_list is an array type, the argument may have decayed
33692 to a pointer type, e.g. by being passed to another function.
33693 In that case, unwrap both types so that we can compare the
33694 underlying records. */
33695 if (TREE_CODE (htype) == ARRAY_TYPE
33696 || POINTER_TYPE_P (htype))
33698 wtype = TREE_TYPE (wtype);
33699 htype = TREE_TYPE (htype);
33702 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33703 return sysv_va_list_type_node;
33704 wtype = ms_va_list_type_node;
33705 gcc_assert (wtype != NULL_TREE);
33707 if (TREE_CODE (wtype) == ARRAY_TYPE)
33709 /* If va_list is an array type, the argument may have decayed
33710 to a pointer type, e.g. by being passed to another function.
33711 In that case, unwrap both types so that we can compare the
33712 underlying records. */
33713 if (TREE_CODE (htype) == ARRAY_TYPE
33714 || POINTER_TYPE_P (htype))
33716 wtype = TREE_TYPE (wtype);
33717 htype = TREE_TYPE (htype);
33720 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33721 return ms_va_list_type_node;
33724 return std_canonical_va_list_type (type);
33727 /* Iterate through the target-specific builtin types for va_list.
33728 IDX denotes the iterator, *PTREE is set to the result type of
33729 the va_list builtin, and *PNAME to its internal type.
33730 Returns zero if there is no element for this index, otherwise
33731 IDX should be increased upon the next call.
33732 Note, do not iterate a base builtin's name like __builtin_va_list.
33733 Used from c_common_nodes_and_builtins. */
33736 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
33746 *ptree = ms_va_list_type_node;
33747 *pname = "__builtin_ms_va_list";
33751 *ptree = sysv_va_list_type_node;
33752 *pname = "__builtin_sysv_va_list";
33760 #undef TARGET_SCHED_DISPATCH
33761 #define TARGET_SCHED_DISPATCH has_dispatch
33762 #undef TARGET_SCHED_DISPATCH_DO
33763 #define TARGET_SCHED_DISPATCH_DO do_dispatch
33765 /* The size of the dispatch window is the total number of bytes of
33766 object code allowed in a window. */
33767 #define DISPATCH_WINDOW_SIZE 16
33769 /* Number of dispatch windows considered for scheduling. */
33770 #define MAX_DISPATCH_WINDOWS 3
33772 /* Maximum number of instructions in a window. */
33775 /* Maximum number of immediate operands in a window. */
33778 /* Maximum number of immediate bits allowed in a window. */
33779 #define MAX_IMM_SIZE 128
33781 /* Maximum number of 32 bit immediates allowed in a window. */
33782 #define MAX_IMM_32 4
33784 /* Maximum number of 64 bit immediates allowed in a window. */
33785 #define MAX_IMM_64 2
33787 /* Maximum total of loads or prefetches allowed in a window. */
33790 /* Maximum total of stores allowed in a window. */
33791 #define MAX_STORE 1
33797 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
33798 enum dispatch_group {
33813 /* Number of allowable groups in a dispatch window. It is an array
33814 indexed by dispatch_group enum. 100 is used as a big number,
33815 because the number of these kind of operations does not have any
33816 effect in dispatch window, but we need them for other reasons in
33818 static unsigned int num_allowable_groups[disp_last] = {
33819 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
33822 char group_name[disp_last + 1][16] = {
33823 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
33824 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
33825 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
33828 /* Instruction path. */
33831 path_single, /* Single micro op. */
33832 path_double, /* Double micro op. */
33833 path_multi, /* Instructions with more than 2 micro op.. */
33837 /* sched_insn_info defines a window to the instructions scheduled in
33838 the basic block. It contains a pointer to the insn_info table and
33839 the instruction scheduled.
33841 Windows are allocated for each basic block and are linked
33843 typedef struct sched_insn_info_s {
33845 enum dispatch_group group;
33846 enum insn_path path;
33851 /* Linked list of dispatch windows. This is a two way list of
33852 dispatch windows of a basic block. It contains information about
33853 the number of uops in the window and the total number of
33854 instructions and of bytes in the object code for this dispatch
33856 typedef struct dispatch_windows_s {
33857 int num_insn; /* Number of insn in the window. */
33858 int num_uops; /* Number of uops in the window. */
33859 int window_size; /* Number of bytes in the window. */
33860 int window_num; /* Window number between 0 or 1. */
33861 int num_imm; /* Number of immediates in an insn. */
33862 int num_imm_32; /* Number of 32 bit immediates in an insn. */
33863 int num_imm_64; /* Number of 64 bit immediates in an insn. */
33864 int imm_size; /* Total immediates in the window. */
33865 int num_loads; /* Total memory loads in the window. */
33866 int num_stores; /* Total memory stores in the window. */
33867 int violation; /* Violation exists in window. */
33868 sched_insn_info *window; /* Pointer to the window. */
33869 struct dispatch_windows_s *next;
33870 struct dispatch_windows_s *prev;
33871 } dispatch_windows;
33873 /* Immediate valuse used in an insn. */
33874 typedef struct imm_info_s
33881 static dispatch_windows *dispatch_window_list;
33882 static dispatch_windows *dispatch_window_list1;
33884 /* Get dispatch group of insn. */
33886 static enum dispatch_group
33887 get_mem_group (rtx insn)
33889 enum attr_memory memory;
33891 if (INSN_CODE (insn) < 0)
33892 return disp_no_group;
33893 memory = get_attr_memory (insn);
33894 if (memory == MEMORY_STORE)
33897 if (memory == MEMORY_LOAD)
33900 if (memory == MEMORY_BOTH)
33901 return disp_load_store;
33903 return disp_no_group;
33906 /* Return true if insn is a compare instruction. */
33911 enum attr_type type;
33913 type = get_attr_type (insn);
33914 return (type == TYPE_TEST
33915 || type == TYPE_ICMP
33916 || type == TYPE_FCMP
33917 || GET_CODE (PATTERN (insn)) == COMPARE);
33920 /* Return true if a dispatch violation encountered. */
33923 dispatch_violation (void)
33925 if (dispatch_window_list->next)
33926 return dispatch_window_list->next->violation;
33927 return dispatch_window_list->violation;
33930 /* Return true if insn is a branch instruction. */
33933 is_branch (rtx insn)
33935 return (CALL_P (insn) || JUMP_P (insn));
33938 /* Return true if insn is a prefetch instruction. */
33941 is_prefetch (rtx insn)
33943 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
33946 /* This function initializes a dispatch window and the list container holding a
33947 pointer to the window. */
33950 init_window (int window_num)
33953 dispatch_windows *new_list;
33955 if (window_num == 0)
33956 new_list = dispatch_window_list;
33958 new_list = dispatch_window_list1;
33960 new_list->num_insn = 0;
33961 new_list->num_uops = 0;
33962 new_list->window_size = 0;
33963 new_list->next = NULL;
33964 new_list->prev = NULL;
33965 new_list->window_num = window_num;
33966 new_list->num_imm = 0;
33967 new_list->num_imm_32 = 0;
33968 new_list->num_imm_64 = 0;
33969 new_list->imm_size = 0;
33970 new_list->num_loads = 0;
33971 new_list->num_stores = 0;
33972 new_list->violation = false;
33974 for (i = 0; i < MAX_INSN; i++)
33976 new_list->window[i].insn = NULL;
33977 new_list->window[i].group = disp_no_group;
33978 new_list->window[i].path = no_path;
33979 new_list->window[i].byte_len = 0;
33980 new_list->window[i].imm_bytes = 0;
33985 /* This function allocates and initializes a dispatch window and the
33986 list container holding a pointer to the window. */
33988 static dispatch_windows *
33989 allocate_window (void)
33991 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
33992 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
33997 /* This routine initializes the dispatch scheduling information. It
33998 initiates building dispatch scheduler tables and constructs the
33999 first dispatch window. */
34002 init_dispatch_sched (void)
34004 /* Allocate a dispatch list and a window. */
34005 dispatch_window_list = allocate_window ();
34006 dispatch_window_list1 = allocate_window ();
34011 /* This function returns true if a branch is detected. End of a basic block
34012 does not have to be a branch, but here we assume only branches end a
34016 is_end_basic_block (enum dispatch_group group)
34018 return group == disp_branch;
34021 /* This function is called when the end of a window processing is reached. */
34024 process_end_window (void)
34026 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
34027 if (dispatch_window_list->next)
34029 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
34030 gcc_assert (dispatch_window_list->window_size
34031 + dispatch_window_list1->window_size <= 48);
34037 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
34038 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
34039 for 48 bytes of instructions. Note that these windows are not dispatch
34040 windows that their sizes are DISPATCH_WINDOW_SIZE. */
34042 static dispatch_windows *
34043 allocate_next_window (int window_num)
34045 if (window_num == 0)
34047 if (dispatch_window_list->next)
34050 return dispatch_window_list;
34053 dispatch_window_list->next = dispatch_window_list1;
34054 dispatch_window_list1->prev = dispatch_window_list;
34056 return dispatch_window_list1;
34059 /* Increment the number of immediate operands of an instruction. */
34062 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
34067 switch ( GET_CODE (*in_rtx))
34072 (imm_values->imm)++;
34073 if (x86_64_immediate_operand (*in_rtx, SImode))
34074 (imm_values->imm32)++;
34076 (imm_values->imm64)++;
34080 (imm_values->imm)++;
34081 (imm_values->imm64)++;
34085 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
34087 (imm_values->imm)++;
34088 (imm_values->imm32)++;
34099 /* Compute number of immediate operands of an instruction. */
34102 find_constant (rtx in_rtx, imm_info *imm_values)
34104 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
34105 (rtx_function) find_constant_1, (void *) imm_values);
34108 /* Return total size of immediate operands of an instruction along with number
34109 of corresponding immediate-operands. It initializes its parameters to zero
34110 befor calling FIND_CONSTANT.
34111 INSN is the input instruction. IMM is the total of immediates.
34112 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34116 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
34118 imm_info imm_values = {0, 0, 0};
34120 find_constant (insn, &imm_values);
34121 *imm = imm_values.imm;
34122 *imm32 = imm_values.imm32;
34123 *imm64 = imm_values.imm64;
34124 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
34127 /* This function indicates if an operand of an instruction is an
34131 has_immediate (rtx insn)
34133 int num_imm_operand;
34134 int num_imm32_operand;
34135 int num_imm64_operand;
34138 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34139 &num_imm64_operand);
34143 /* Return single or double path for instructions. */
34145 static enum insn_path
34146 get_insn_path (rtx insn)
34148 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
34150 if ((int)path == 0)
34151 return path_single;
34153 if ((int)path == 1)
34154 return path_double;
34159 /* Return insn dispatch group. */
34161 static enum dispatch_group
34162 get_insn_group (rtx insn)
34164 enum dispatch_group group = get_mem_group (insn);
34168 if (is_branch (insn))
34169 return disp_branch;
34174 if (has_immediate (insn))
34177 if (is_prefetch (insn))
34178 return disp_prefetch;
34180 return disp_no_group;
34183 /* Count number of GROUP restricted instructions in a dispatch
34184 window WINDOW_LIST. */
34187 count_num_restricted (rtx insn, dispatch_windows *window_list)
34189 enum dispatch_group group = get_insn_group (insn);
34191 int num_imm_operand;
34192 int num_imm32_operand;
34193 int num_imm64_operand;
34195 if (group == disp_no_group)
34198 if (group == disp_imm)
34200 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34201 &num_imm64_operand);
34202 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
34203 || num_imm_operand + window_list->num_imm > MAX_IMM
34204 || (num_imm32_operand > 0
34205 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
34206 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
34207 || (num_imm64_operand > 0
34208 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
34209 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
34210 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
34211 && num_imm64_operand > 0
34212 && ((window_list->num_imm_64 > 0
34213 && window_list->num_insn >= 2)
34214 || window_list->num_insn >= 3)))
34220 if ((group == disp_load_store
34221 && (window_list->num_loads >= MAX_LOAD
34222 || window_list->num_stores >= MAX_STORE))
34223 || ((group == disp_load
34224 || group == disp_prefetch)
34225 && window_list->num_loads >= MAX_LOAD)
34226 || (group == disp_store
34227 && window_list->num_stores >= MAX_STORE))
34233 /* This function returns true if insn satisfies dispatch rules on the
34234 last window scheduled. */
34237 fits_dispatch_window (rtx insn)
34239 dispatch_windows *window_list = dispatch_window_list;
34240 dispatch_windows *window_list_next = dispatch_window_list->next;
34241 unsigned int num_restrict;
34242 enum dispatch_group group = get_insn_group (insn);
34243 enum insn_path path = get_insn_path (insn);
34246 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34247 instructions should be given the lowest priority in the
34248 scheduling process in Haifa scheduler to make sure they will be
34249 scheduled in the same dispatch window as the refrence to them. */
34250 if (group == disp_jcc || group == disp_cmp)
34253 /* Check nonrestricted. */
34254 if (group == disp_no_group || group == disp_branch)
34257 /* Get last dispatch window. */
34258 if (window_list_next)
34259 window_list = window_list_next;
34261 if (window_list->window_num == 1)
34263 sum = window_list->prev->window_size + window_list->window_size;
34266 || (min_insn_size (insn) + sum) >= 48)
34267 /* Window 1 is full. Go for next window. */
34271 num_restrict = count_num_restricted (insn, window_list);
34273 if (num_restrict > num_allowable_groups[group])
34276 /* See if it fits in the first window. */
34277 if (window_list->window_num == 0)
34279 /* The first widow should have only single and double path
34281 if (path == path_double
34282 && (window_list->num_uops + 2) > MAX_INSN)
34284 else if (path != path_single)
34290 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34291 dispatch window WINDOW_LIST. */
34294 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
34296 int byte_len = min_insn_size (insn);
34297 int num_insn = window_list->num_insn;
34299 sched_insn_info *window = window_list->window;
34300 enum dispatch_group group = get_insn_group (insn);
34301 enum insn_path path = get_insn_path (insn);
34302 int num_imm_operand;
34303 int num_imm32_operand;
34304 int num_imm64_operand;
34306 if (!window_list->violation && group != disp_cmp
34307 && !fits_dispatch_window (insn))
34308 window_list->violation = true;
34310 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34311 &num_imm64_operand);
34313 /* Initialize window with new instruction. */
34314 window[num_insn].insn = insn;
34315 window[num_insn].byte_len = byte_len;
34316 window[num_insn].group = group;
34317 window[num_insn].path = path;
34318 window[num_insn].imm_bytes = imm_size;
34320 window_list->window_size += byte_len;
34321 window_list->num_insn = num_insn + 1;
34322 window_list->num_uops = window_list->num_uops + num_uops;
34323 window_list->imm_size += imm_size;
34324 window_list->num_imm += num_imm_operand;
34325 window_list->num_imm_32 += num_imm32_operand;
34326 window_list->num_imm_64 += num_imm64_operand;
34328 if (group == disp_store)
34329 window_list->num_stores += 1;
34330 else if (group == disp_load
34331 || group == disp_prefetch)
34332 window_list->num_loads += 1;
34333 else if (group == disp_load_store)
34335 window_list->num_stores += 1;
34336 window_list->num_loads += 1;
34340 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34341 If the total bytes of instructions or the number of instructions in
34342 the window exceed allowable, it allocates a new window. */
34345 add_to_dispatch_window (rtx insn)
34348 dispatch_windows *window_list;
34349 dispatch_windows *next_list;
34350 dispatch_windows *window0_list;
34351 enum insn_path path;
34352 enum dispatch_group insn_group;
34360 if (INSN_CODE (insn) < 0)
34363 byte_len = min_insn_size (insn);
34364 window_list = dispatch_window_list;
34365 next_list = window_list->next;
34366 path = get_insn_path (insn);
34367 insn_group = get_insn_group (insn);
34369 /* Get the last dispatch window. */
34371 window_list = dispatch_window_list->next;
34373 if (path == path_single)
34375 else if (path == path_double)
34378 insn_num_uops = (int) path;
34380 /* If current window is full, get a new window.
34381 Window number zero is full, if MAX_INSN uops are scheduled in it.
34382 Window number one is full, if window zero's bytes plus window
34383 one's bytes is 32, or if the bytes of the new instruction added
34384 to the total makes it greater than 48, or it has already MAX_INSN
34385 instructions in it. */
34386 num_insn = window_list->num_insn;
34387 num_uops = window_list->num_uops;
34388 window_num = window_list->window_num;
34389 insn_fits = fits_dispatch_window (insn);
34391 if (num_insn >= MAX_INSN
34392 || num_uops + insn_num_uops > MAX_INSN
34395 window_num = ~window_num & 1;
34396 window_list = allocate_next_window (window_num);
34399 if (window_num == 0)
34401 add_insn_window (insn, window_list, insn_num_uops);
34402 if (window_list->num_insn >= MAX_INSN
34403 && insn_group == disp_branch)
34405 process_end_window ();
34409 else if (window_num == 1)
34411 window0_list = window_list->prev;
34412 sum = window0_list->window_size + window_list->window_size;
34414 || (byte_len + sum) >= 48)
34416 process_end_window ();
34417 window_list = dispatch_window_list;
34420 add_insn_window (insn, window_list, insn_num_uops);
34423 gcc_unreachable ();
34425 if (is_end_basic_block (insn_group))
34427 /* End of basic block is reached do end-basic-block process. */
34428 process_end_window ();
34433 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34435 DEBUG_FUNCTION static void
34436 debug_dispatch_window_file (FILE *file, int window_num)
34438 dispatch_windows *list;
34441 if (window_num == 0)
34442 list = dispatch_window_list;
34444 list = dispatch_window_list1;
34446 fprintf (file, "Window #%d:\n", list->window_num);
34447 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
34448 list->num_insn, list->num_uops, list->window_size);
34449 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34450 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
34452 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
34454 fprintf (file, " insn info:\n");
34456 for (i = 0; i < MAX_INSN; i++)
34458 if (!list->window[i].insn)
34460 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
34461 i, group_name[list->window[i].group],
34462 i, (void *)list->window[i].insn,
34463 i, list->window[i].path,
34464 i, list->window[i].byte_len,
34465 i, list->window[i].imm_bytes);
34469 /* Print to stdout a dispatch window. */
34471 DEBUG_FUNCTION void
34472 debug_dispatch_window (int window_num)
34474 debug_dispatch_window_file (stdout, window_num);
34477 /* Print INSN dispatch information to FILE. */
34479 DEBUG_FUNCTION static void
34480 debug_insn_dispatch_info_file (FILE *file, rtx insn)
34483 enum insn_path path;
34484 enum dispatch_group group;
34486 int num_imm_operand;
34487 int num_imm32_operand;
34488 int num_imm64_operand;
34490 if (INSN_CODE (insn) < 0)
34493 byte_len = min_insn_size (insn);
34494 path = get_insn_path (insn);
34495 group = get_insn_group (insn);
34496 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34497 &num_imm64_operand);
34499 fprintf (file, " insn info:\n");
34500 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
34501 group_name[group], path, byte_len);
34502 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34503 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
34506 /* Print to STDERR the status of the ready list with respect to
34507 dispatch windows. */
34509 DEBUG_FUNCTION void
34510 debug_ready_dispatch (void)
34513 int no_ready = number_in_ready ();
34515 fprintf (stdout, "Number of ready: %d\n", no_ready);
34517 for (i = 0; i < no_ready; i++)
34518 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
34521 /* This routine is the driver of the dispatch scheduler. */
34524 do_dispatch (rtx insn, int mode)
34526 if (mode == DISPATCH_INIT)
34527 init_dispatch_sched ();
34528 else if (mode == ADD_TO_DISPATCH_WINDOW)
34529 add_to_dispatch_window (insn);
34532 /* Return TRUE if Dispatch Scheduling is supported. */
34535 has_dispatch (rtx insn, int action)
34537 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
34543 case IS_DISPATCH_ON:
34548 return is_cmp (insn);
34550 case DISPATCH_VIOLATION:
34551 return dispatch_violation ();
34553 case FITS_DISPATCH_WINDOW:
34554 return fits_dispatch_window (insn);
34560 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
34561 place emms and femms instructions. */
34563 static enum machine_mode
34564 ix86_preferred_simd_mode (enum machine_mode mode)
34581 if (TARGET_AVX && !TARGET_PREFER_AVX128)
34587 if (!TARGET_VECTORIZE_DOUBLE)
34589 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
34591 else if (TARGET_SSE2)
34600 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
34603 static unsigned int
34604 ix86_autovectorize_vector_sizes (void)
34606 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
34609 /* Initialize the GCC target structure. */
34610 #undef TARGET_RETURN_IN_MEMORY
34611 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
34613 #undef TARGET_LEGITIMIZE_ADDRESS
34614 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
34616 #undef TARGET_ATTRIBUTE_TABLE
34617 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
34618 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34619 # undef TARGET_MERGE_DECL_ATTRIBUTES
34620 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
34623 #undef TARGET_COMP_TYPE_ATTRIBUTES
34624 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
34626 #undef TARGET_INIT_BUILTINS
34627 #define TARGET_INIT_BUILTINS ix86_init_builtins
34628 #undef TARGET_BUILTIN_DECL
34629 #define TARGET_BUILTIN_DECL ix86_builtin_decl
34630 #undef TARGET_EXPAND_BUILTIN
34631 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
34633 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
34634 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
34635 ix86_builtin_vectorized_function
34637 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
34638 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
34640 #undef TARGET_BUILTIN_RECIPROCAL
34641 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
34643 #undef TARGET_ASM_FUNCTION_EPILOGUE
34644 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
34646 #undef TARGET_ENCODE_SECTION_INFO
34647 #ifndef SUBTARGET_ENCODE_SECTION_INFO
34648 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
34650 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
34653 #undef TARGET_ASM_OPEN_PAREN
34654 #define TARGET_ASM_OPEN_PAREN ""
34655 #undef TARGET_ASM_CLOSE_PAREN
34656 #define TARGET_ASM_CLOSE_PAREN ""
34658 #undef TARGET_ASM_BYTE_OP
34659 #define TARGET_ASM_BYTE_OP ASM_BYTE
34661 #undef TARGET_ASM_ALIGNED_HI_OP
34662 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
34663 #undef TARGET_ASM_ALIGNED_SI_OP
34664 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
34666 #undef TARGET_ASM_ALIGNED_DI_OP
34667 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
34670 #undef TARGET_PROFILE_BEFORE_PROLOGUE
34671 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
34673 #undef TARGET_ASM_UNALIGNED_HI_OP
34674 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
34675 #undef TARGET_ASM_UNALIGNED_SI_OP
34676 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
34677 #undef TARGET_ASM_UNALIGNED_DI_OP
34678 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
34680 #undef TARGET_PRINT_OPERAND
34681 #define TARGET_PRINT_OPERAND ix86_print_operand
34682 #undef TARGET_PRINT_OPERAND_ADDRESS
34683 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
34684 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
34685 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
34686 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
34687 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
34689 #undef TARGET_SCHED_INIT_GLOBAL
34690 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
34691 #undef TARGET_SCHED_ADJUST_COST
34692 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
34693 #undef TARGET_SCHED_ISSUE_RATE
34694 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
34695 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
34696 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
34697 ia32_multipass_dfa_lookahead
34699 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
34700 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
34703 #undef TARGET_HAVE_TLS
34704 #define TARGET_HAVE_TLS true
34706 #undef TARGET_CANNOT_FORCE_CONST_MEM
34707 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
34708 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
34709 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
34711 #undef TARGET_DELEGITIMIZE_ADDRESS
34712 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
34714 #undef TARGET_MS_BITFIELD_LAYOUT_P
34715 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
34718 #undef TARGET_BINDS_LOCAL_P
34719 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
34721 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34722 #undef TARGET_BINDS_LOCAL_P
34723 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
34726 #undef TARGET_ASM_OUTPUT_MI_THUNK
34727 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
34728 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
34729 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
34731 #undef TARGET_ASM_FILE_START
34732 #define TARGET_ASM_FILE_START x86_file_start
34734 #undef TARGET_OPTION_OVERRIDE
34735 #define TARGET_OPTION_OVERRIDE ix86_option_override
34737 #undef TARGET_REGISTER_MOVE_COST
34738 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
34739 #undef TARGET_MEMORY_MOVE_COST
34740 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
34741 #undef TARGET_RTX_COSTS
34742 #define TARGET_RTX_COSTS ix86_rtx_costs
34743 #undef TARGET_ADDRESS_COST
34744 #define TARGET_ADDRESS_COST ix86_address_cost
34746 #undef TARGET_FIXED_CONDITION_CODE_REGS
34747 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
34748 #undef TARGET_CC_MODES_COMPATIBLE
34749 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
34751 #undef TARGET_MACHINE_DEPENDENT_REORG
34752 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
34754 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
34755 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
34757 #undef TARGET_BUILD_BUILTIN_VA_LIST
34758 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
34760 #undef TARGET_ENUM_VA_LIST_P
34761 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
34763 #undef TARGET_FN_ABI_VA_LIST
34764 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
34766 #undef TARGET_CANONICAL_VA_LIST_TYPE
34767 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
34769 #undef TARGET_EXPAND_BUILTIN_VA_START
34770 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
34772 #undef TARGET_MD_ASM_CLOBBERS
34773 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
34775 #undef TARGET_PROMOTE_PROTOTYPES
34776 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
34777 #undef TARGET_STRUCT_VALUE_RTX
34778 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
34779 #undef TARGET_SETUP_INCOMING_VARARGS
34780 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
34781 #undef TARGET_MUST_PASS_IN_STACK
34782 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
34783 #undef TARGET_FUNCTION_ARG_ADVANCE
34784 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
34785 #undef TARGET_FUNCTION_ARG
34786 #define TARGET_FUNCTION_ARG ix86_function_arg
34787 #undef TARGET_FUNCTION_ARG_BOUNDARY
34788 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
34789 #undef TARGET_PASS_BY_REFERENCE
34790 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
34791 #undef TARGET_INTERNAL_ARG_POINTER
34792 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
34793 #undef TARGET_UPDATE_STACK_BOUNDARY
34794 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
34795 #undef TARGET_GET_DRAP_RTX
34796 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
34797 #undef TARGET_STRICT_ARGUMENT_NAMING
34798 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
34799 #undef TARGET_STATIC_CHAIN
34800 #define TARGET_STATIC_CHAIN ix86_static_chain
34801 #undef TARGET_TRAMPOLINE_INIT
34802 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
34803 #undef TARGET_RETURN_POPS_ARGS
34804 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
34806 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
34807 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
34809 #undef TARGET_SCALAR_MODE_SUPPORTED_P
34810 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
34812 #undef TARGET_VECTOR_MODE_SUPPORTED_P
34813 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
34815 #undef TARGET_C_MODE_FOR_SUFFIX
34816 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
34819 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
34820 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
34823 #ifdef SUBTARGET_INSERT_ATTRIBUTES
34824 #undef TARGET_INSERT_ATTRIBUTES
34825 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
34828 #undef TARGET_MANGLE_TYPE
34829 #define TARGET_MANGLE_TYPE ix86_mangle_type
34831 #ifndef TARGET_MACHO
34832 #undef TARGET_STACK_PROTECT_FAIL
34833 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
34836 #undef TARGET_FUNCTION_VALUE
34837 #define TARGET_FUNCTION_VALUE ix86_function_value
34839 #undef TARGET_FUNCTION_VALUE_REGNO_P
34840 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
34842 #undef TARGET_SECONDARY_RELOAD
34843 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
34845 #undef TARGET_PREFERRED_RELOAD_CLASS
34846 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
34847 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
34848 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
34849 #undef TARGET_CLASS_LIKELY_SPILLED_P
34850 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
34852 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
34853 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
34854 ix86_builtin_vectorization_cost
34855 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
34856 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
34857 ix86_vectorize_builtin_vec_perm
34858 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
34859 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
34860 ix86_vectorize_builtin_vec_perm_ok
34861 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
34862 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
34863 ix86_preferred_simd_mode
34864 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
34865 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
34866 ix86_autovectorize_vector_sizes
34868 #undef TARGET_SET_CURRENT_FUNCTION
34869 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
34871 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
34872 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
34874 #undef TARGET_OPTION_SAVE
34875 #define TARGET_OPTION_SAVE ix86_function_specific_save
34877 #undef TARGET_OPTION_RESTORE
34878 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
34880 #undef TARGET_OPTION_PRINT
34881 #define TARGET_OPTION_PRINT ix86_function_specific_print
34883 #undef TARGET_CAN_INLINE_P
34884 #define TARGET_CAN_INLINE_P ix86_can_inline_p
34886 #undef TARGET_EXPAND_TO_RTL_HOOK
34887 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
34889 #undef TARGET_LEGITIMATE_ADDRESS_P
34890 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
34892 #undef TARGET_LEGITIMATE_CONSTANT_P
34893 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
34895 #undef TARGET_FRAME_POINTER_REQUIRED
34896 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
34898 #undef TARGET_CAN_ELIMINATE
34899 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
34901 #undef TARGET_EXTRA_LIVE_ON_ENTRY
34902 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
34904 #undef TARGET_ASM_CODE_END
34905 #define TARGET_ASM_CODE_END ix86_code_end
34907 #undef TARGET_CONDITIONAL_REGISTER_USAGE
34908 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
34911 #undef TARGET_INIT_LIBFUNCS
34912 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
34915 struct gcc_target targetm = TARGET_INITIALIZER;
34917 #include "gt-i386.h"