1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
63 enum upper_128bits_state
70 typedef struct block_info_def
72 /* State of the upper 128bits of AVX registers at exit. */
73 enum upper_128bits_state state;
74 /* TRUE if state of the upper 128bits of AVX registers is unchanged
77 /* TRUE if block has been processed. */
79 /* TRUE if block has been scanned. */
81 /* Previous state of the upper 128bits of AVX registers at entry. */
82 enum upper_128bits_state prev;
85 #define BLOCK_INFO(B) ((block_info) (B)->aux)
87 enum call_avx256_state
89 /* Callee returns 256bit AVX register. */
90 callee_return_avx256 = -1,
91 /* Callee returns and passes 256bit AVX register. */
92 callee_return_pass_avx256,
93 /* Callee passes 256bit AVX register. */
95 /* Callee doesn't return nor passe 256bit AVX register, or no
96 256bit AVX register in function return. */
98 /* vzeroupper intrinsic. */
102 /* Check if a 256bit AVX register is referenced in stores. */
105 check_avx256_stores (rtx dest, const_rtx set, void *data)
108 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
109 || (GET_CODE (set) == SET
110 && REG_P (SET_SRC (set))
111 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
113 enum upper_128bits_state *state
114 = (enum upper_128bits_state *) data;
119 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
120 in basic block BB. Delete it if upper 128bit AVX registers are
121 unused. If it isn't deleted, move it to just before a jump insn.
123 STATE is state of the upper 128bits of AVX registers at entry. */
126 move_or_delete_vzeroupper_2 (basic_block bb,
127 enum upper_128bits_state state)
130 rtx vzeroupper_insn = NULL_RTX;
135 if (BLOCK_INFO (bb)->unchanged)
138 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
141 BLOCK_INFO (bb)->state = state;
145 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
148 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
149 bb->index, BLOCK_INFO (bb)->state);
153 BLOCK_INFO (bb)->prev = state;
156 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
161 /* BB_END changes when it is deleted. */
162 bb_end = BB_END (bb);
164 while (insn != bb_end)
166 insn = NEXT_INSN (insn);
168 if (!NONDEBUG_INSN_P (insn))
171 /* Move vzeroupper before jump/call. */
172 if (JUMP_P (insn) || CALL_P (insn))
174 if (!vzeroupper_insn)
177 if (PREV_INSN (insn) != vzeroupper_insn)
181 fprintf (dump_file, "Move vzeroupper after:\n");
182 print_rtl_single (dump_file, PREV_INSN (insn));
183 fprintf (dump_file, "before:\n");
184 print_rtl_single (dump_file, insn);
186 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
189 vzeroupper_insn = NULL_RTX;
193 pat = PATTERN (insn);
195 /* Check insn for vzeroupper intrinsic. */
196 if (GET_CODE (pat) == UNSPEC_VOLATILE
197 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
201 /* Found vzeroupper intrinsic. */
202 fprintf (dump_file, "Found vzeroupper:\n");
203 print_rtl_single (dump_file, insn);
208 /* Check insn for vzeroall intrinsic. */
209 if (GET_CODE (pat) == PARALLEL
210 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
211 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
216 /* Delete pending vzeroupper insertion. */
219 delete_insn (vzeroupper_insn);
220 vzeroupper_insn = NULL_RTX;
223 else if (state != used)
225 note_stores (pat, check_avx256_stores, &state);
232 /* Process vzeroupper intrinsic. */
233 avx256 = INTVAL (XVECEXP (pat, 0, 0));
237 /* Since the upper 128bits are cleared, callee must not pass
238 256bit AVX register. We only need to check if callee
239 returns 256bit AVX register. */
240 if (avx256 == callee_return_avx256)
246 /* Remove unnecessary vzeroupper since upper 128bits are
250 fprintf (dump_file, "Delete redundant vzeroupper:\n");
251 print_rtl_single (dump_file, insn);
257 /* Set state to UNUSED if callee doesn't return 256bit AVX
259 if (avx256 != callee_return_pass_avx256)
262 if (avx256 == callee_return_pass_avx256
263 || avx256 == callee_pass_avx256)
265 /* Must remove vzeroupper since callee passes in 256bit
269 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
270 print_rtl_single (dump_file, insn);
276 vzeroupper_insn = insn;
282 BLOCK_INFO (bb)->state = state;
283 BLOCK_INFO (bb)->unchanged = unchanged;
284 BLOCK_INFO (bb)->scanned = true;
287 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
288 bb->index, unchanged ? "unchanged" : "changed",
292 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
293 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
294 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
298 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
302 enum upper_128bits_state state, old_state, new_state;
306 fprintf (dump_file, " Process [bb %i]: status: %d\n",
307 block->index, BLOCK_INFO (block)->processed);
309 if (BLOCK_INFO (block)->processed)
314 /* Check all predecessor edges of this block. */
315 seen_unknown = false;
316 FOR_EACH_EDGE (e, ei, block->preds)
320 switch (BLOCK_INFO (e->src)->state)
323 if (!unknown_is_unused)
337 old_state = BLOCK_INFO (block)->state;
338 move_or_delete_vzeroupper_2 (block, state);
339 new_state = BLOCK_INFO (block)->state;
341 if (state != unknown || new_state == used)
342 BLOCK_INFO (block)->processed = true;
344 /* Need to rescan if the upper 128bits of AVX registers are changed
346 if (new_state != old_state)
348 if (new_state == used)
349 cfun->machine->rescan_vzeroupper_p = 1;
356 /* Go through the instruction stream looking for vzeroupper. Delete
357 it if upper 128bit AVX registers are unused. If it isn't deleted,
358 move it to just before a jump insn. */
361 move_or_delete_vzeroupper (void)
366 fibheap_t worklist, pending, fibheap_swap;
367 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
372 /* Set up block info for each basic block. */
373 alloc_aux_for_blocks (sizeof (struct block_info_def));
375 /* Process outgoing edges of entry point. */
377 fprintf (dump_file, "Process outgoing edges of entry point\n");
379 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
381 move_or_delete_vzeroupper_2 (e->dest,
382 cfun->machine->caller_pass_avx256_p
384 BLOCK_INFO (e->dest)->processed = true;
387 /* Compute reverse completion order of depth first search of the CFG
388 so that the data-flow runs faster. */
389 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
390 bb_order = XNEWVEC (int, last_basic_block);
391 pre_and_rev_post_order_compute (NULL, rc_order, false);
392 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
393 bb_order[rc_order[i]] = i;
396 worklist = fibheap_new ();
397 pending = fibheap_new ();
398 visited = sbitmap_alloc (last_basic_block);
399 in_worklist = sbitmap_alloc (last_basic_block);
400 in_pending = sbitmap_alloc (last_basic_block);
401 sbitmap_zero (in_worklist);
403 /* Don't check outgoing edges of entry point. */
404 sbitmap_ones (in_pending);
406 if (BLOCK_INFO (bb)->processed)
407 RESET_BIT (in_pending, bb->index);
410 move_or_delete_vzeroupper_1 (bb, false);
411 fibheap_insert (pending, bb_order[bb->index], bb);
415 fprintf (dump_file, "Check remaining basic blocks\n");
417 while (!fibheap_empty (pending))
419 fibheap_swap = pending;
421 worklist = fibheap_swap;
422 sbitmap_swap = in_pending;
423 in_pending = in_worklist;
424 in_worklist = sbitmap_swap;
426 sbitmap_zero (visited);
428 cfun->machine->rescan_vzeroupper_p = 0;
430 while (!fibheap_empty (worklist))
432 bb = (basic_block) fibheap_extract_min (worklist);
433 RESET_BIT (in_worklist, bb->index);
434 gcc_assert (!TEST_BIT (visited, bb->index));
435 if (!TEST_BIT (visited, bb->index))
439 SET_BIT (visited, bb->index);
441 if (move_or_delete_vzeroupper_1 (bb, false))
442 FOR_EACH_EDGE (e, ei, bb->succs)
444 if (e->dest == EXIT_BLOCK_PTR
445 || BLOCK_INFO (e->dest)->processed)
448 if (TEST_BIT (visited, e->dest->index))
450 if (!TEST_BIT (in_pending, e->dest->index))
452 /* Send E->DEST to next round. */
453 SET_BIT (in_pending, e->dest->index);
454 fibheap_insert (pending,
455 bb_order[e->dest->index],
459 else if (!TEST_BIT (in_worklist, e->dest->index))
461 /* Add E->DEST to current round. */
462 SET_BIT (in_worklist, e->dest->index);
463 fibheap_insert (worklist, bb_order[e->dest->index],
470 if (!cfun->machine->rescan_vzeroupper_p)
475 fibheap_delete (worklist);
476 fibheap_delete (pending);
477 sbitmap_free (visited);
478 sbitmap_free (in_worklist);
479 sbitmap_free (in_pending);
482 fprintf (dump_file, "Process remaining basic blocks\n");
485 move_or_delete_vzeroupper_1 (bb, true);
487 free_aux_for_blocks ();
490 static rtx legitimize_dllimport_symbol (rtx, bool);
492 #ifndef CHECK_STACK_LIMIT
493 #define CHECK_STACK_LIMIT (-1)
496 /* Return index of given mode in mult and division cost tables. */
497 #define MODE_INDEX(mode) \
498 ((mode) == QImode ? 0 \
499 : (mode) == HImode ? 1 \
500 : (mode) == SImode ? 2 \
501 : (mode) == DImode ? 3 \
504 /* Processor costs (relative to an add) */
505 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
506 #define COSTS_N_BYTES(N) ((N) * 2)
508 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
511 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
512 COSTS_N_BYTES (2), /* cost of an add instruction */
513 COSTS_N_BYTES (3), /* cost of a lea instruction */
514 COSTS_N_BYTES (2), /* variable shift costs */
515 COSTS_N_BYTES (3), /* constant shift costs */
516 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
517 COSTS_N_BYTES (3), /* HI */
518 COSTS_N_BYTES (3), /* SI */
519 COSTS_N_BYTES (3), /* DI */
520 COSTS_N_BYTES (5)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
523 COSTS_N_BYTES (3), /* HI */
524 COSTS_N_BYTES (3), /* SI */
525 COSTS_N_BYTES (3), /* DI */
526 COSTS_N_BYTES (5)}, /* other */
527 COSTS_N_BYTES (3), /* cost of movsx */
528 COSTS_N_BYTES (3), /* cost of movzx */
529 0, /* "large" insn */
531 2, /* cost for loading QImode using movzbl */
532 {2, 2, 2}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 2, 2}, /* cost of storing integer registers */
536 2, /* cost of reg,reg fld/fst */
537 {2, 2, 2}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {2, 2, 2}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 3, /* cost of moving MMX register */
542 {3, 3}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {3, 3}, /* cost of storing MMX registers
545 in SImode and DImode */
546 3, /* cost of moving SSE register */
547 {3, 3, 3}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {3, 3, 3}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 3, /* MMX or SSE register to integer */
552 0, /* size of l1 cache */
553 0, /* size of l2 cache */
554 0, /* size of prefetch block */
555 0, /* number of parallel prefetches */
557 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
558 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
559 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
560 COSTS_N_BYTES (2), /* cost of FABS instruction. */
561 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
562 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
563 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
564 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
565 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
566 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
567 1, /* scalar_stmt_cost. */
568 1, /* scalar load_cost. */
569 1, /* scalar_store_cost. */
570 1, /* vec_stmt_cost. */
571 1, /* vec_to_scalar_cost. */
572 1, /* scalar_to_vec_cost. */
573 1, /* vec_align_load_cost. */
574 1, /* vec_unalign_load_cost. */
575 1, /* vec_store_cost. */
576 1, /* cond_taken_branch_cost. */
577 1, /* cond_not_taken_branch_cost. */
580 /* Processor costs (relative to an add) */
582 struct processor_costs i386_cost = { /* 386 specific costs */
583 COSTS_N_INSNS (1), /* cost of an add instruction */
584 COSTS_N_INSNS (1), /* cost of a lea instruction */
585 COSTS_N_INSNS (3), /* variable shift costs */
586 COSTS_N_INSNS (2), /* constant shift costs */
587 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
588 COSTS_N_INSNS (6), /* HI */
589 COSTS_N_INSNS (6), /* SI */
590 COSTS_N_INSNS (6), /* DI */
591 COSTS_N_INSNS (6)}, /* other */
592 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
593 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
594 COSTS_N_INSNS (23), /* HI */
595 COSTS_N_INSNS (23), /* SI */
596 COSTS_N_INSNS (23), /* DI */
597 COSTS_N_INSNS (23)}, /* other */
598 COSTS_N_INSNS (3), /* cost of movsx */
599 COSTS_N_INSNS (2), /* cost of movzx */
600 15, /* "large" insn */
602 4, /* cost for loading QImode using movzbl */
603 {2, 4, 2}, /* cost of loading integer registers
604 in QImode, HImode and SImode.
605 Relative to reg-reg move (2). */
606 {2, 4, 2}, /* cost of storing integer registers */
607 2, /* cost of reg,reg fld/fst */
608 {8, 8, 8}, /* cost of loading fp registers
609 in SFmode, DFmode and XFmode */
610 {8, 8, 8}, /* cost of storing fp registers
611 in SFmode, DFmode and XFmode */
612 2, /* cost of moving MMX register */
613 {4, 8}, /* cost of loading MMX registers
614 in SImode and DImode */
615 {4, 8}, /* cost of storing MMX registers
616 in SImode and DImode */
617 2, /* cost of moving SSE register */
618 {4, 8, 16}, /* cost of loading SSE registers
619 in SImode, DImode and TImode */
620 {4, 8, 16}, /* cost of storing SSE registers
621 in SImode, DImode and TImode */
622 3, /* MMX or SSE register to integer */
623 0, /* size of l1 cache */
624 0, /* size of l2 cache */
625 0, /* size of prefetch block */
626 0, /* number of parallel prefetches */
628 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
629 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
630 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
631 COSTS_N_INSNS (22), /* cost of FABS instruction. */
632 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
633 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
634 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
635 DUMMY_STRINGOP_ALGS},
636 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
637 DUMMY_STRINGOP_ALGS},
638 1, /* scalar_stmt_cost. */
639 1, /* scalar load_cost. */
640 1, /* scalar_store_cost. */
641 1, /* vec_stmt_cost. */
642 1, /* vec_to_scalar_cost. */
643 1, /* scalar_to_vec_cost. */
644 1, /* vec_align_load_cost. */
645 2, /* vec_unalign_load_cost. */
646 1, /* vec_store_cost. */
647 3, /* cond_taken_branch_cost. */
648 1, /* cond_not_taken_branch_cost. */
652 struct processor_costs i486_cost = { /* 486 specific costs */
653 COSTS_N_INSNS (1), /* cost of an add instruction */
654 COSTS_N_INSNS (1), /* cost of a lea instruction */
655 COSTS_N_INSNS (3), /* variable shift costs */
656 COSTS_N_INSNS (2), /* constant shift costs */
657 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
658 COSTS_N_INSNS (12), /* HI */
659 COSTS_N_INSNS (12), /* SI */
660 COSTS_N_INSNS (12), /* DI */
661 COSTS_N_INSNS (12)}, /* other */
662 1, /* cost of multiply per each bit set */
663 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
664 COSTS_N_INSNS (40), /* HI */
665 COSTS_N_INSNS (40), /* SI */
666 COSTS_N_INSNS (40), /* DI */
667 COSTS_N_INSNS (40)}, /* other */
668 COSTS_N_INSNS (3), /* cost of movsx */
669 COSTS_N_INSNS (2), /* cost of movzx */
670 15, /* "large" insn */
672 4, /* cost for loading QImode using movzbl */
673 {2, 4, 2}, /* cost of loading integer registers
674 in QImode, HImode and SImode.
675 Relative to reg-reg move (2). */
676 {2, 4, 2}, /* cost of storing integer registers */
677 2, /* cost of reg,reg fld/fst */
678 {8, 8, 8}, /* cost of loading fp registers
679 in SFmode, DFmode and XFmode */
680 {8, 8, 8}, /* cost of storing fp registers
681 in SFmode, DFmode and XFmode */
682 2, /* cost of moving MMX register */
683 {4, 8}, /* cost of loading MMX registers
684 in SImode and DImode */
685 {4, 8}, /* cost of storing MMX registers
686 in SImode and DImode */
687 2, /* cost of moving SSE register */
688 {4, 8, 16}, /* cost of loading SSE registers
689 in SImode, DImode and TImode */
690 {4, 8, 16}, /* cost of storing SSE registers
691 in SImode, DImode and TImode */
692 3, /* MMX or SSE register to integer */
693 4, /* size of l1 cache. 486 has 8kB cache
694 shared for code and data, so 4kB is
695 not really precise. */
696 4, /* size of l2 cache */
697 0, /* size of prefetch block */
698 0, /* number of parallel prefetches */
700 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
701 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
702 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
703 COSTS_N_INSNS (3), /* cost of FABS instruction. */
704 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
705 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
706 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
707 DUMMY_STRINGOP_ALGS},
708 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
709 DUMMY_STRINGOP_ALGS},
710 1, /* scalar_stmt_cost. */
711 1, /* scalar load_cost. */
712 1, /* scalar_store_cost. */
713 1, /* vec_stmt_cost. */
714 1, /* vec_to_scalar_cost. */
715 1, /* scalar_to_vec_cost. */
716 1, /* vec_align_load_cost. */
717 2, /* vec_unalign_load_cost. */
718 1, /* vec_store_cost. */
719 3, /* cond_taken_branch_cost. */
720 1, /* cond_not_taken_branch_cost. */
724 struct processor_costs pentium_cost = {
725 COSTS_N_INSNS (1), /* cost of an add instruction */
726 COSTS_N_INSNS (1), /* cost of a lea instruction */
727 COSTS_N_INSNS (4), /* variable shift costs */
728 COSTS_N_INSNS (1), /* constant shift costs */
729 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
730 COSTS_N_INSNS (11), /* HI */
731 COSTS_N_INSNS (11), /* SI */
732 COSTS_N_INSNS (11), /* DI */
733 COSTS_N_INSNS (11)}, /* other */
734 0, /* cost of multiply per each bit set */
735 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
736 COSTS_N_INSNS (25), /* HI */
737 COSTS_N_INSNS (25), /* SI */
738 COSTS_N_INSNS (25), /* DI */
739 COSTS_N_INSNS (25)}, /* other */
740 COSTS_N_INSNS (3), /* cost of movsx */
741 COSTS_N_INSNS (2), /* cost of movzx */
742 8, /* "large" insn */
744 6, /* cost for loading QImode using movzbl */
745 {2, 4, 2}, /* cost of loading integer registers
746 in QImode, HImode and SImode.
747 Relative to reg-reg move (2). */
748 {2, 4, 2}, /* cost of storing integer registers */
749 2, /* cost of reg,reg fld/fst */
750 {2, 2, 6}, /* cost of loading fp registers
751 in SFmode, DFmode and XFmode */
752 {4, 4, 6}, /* cost of storing fp registers
753 in SFmode, DFmode and XFmode */
754 8, /* cost of moving MMX register */
755 {8, 8}, /* cost of loading MMX registers
756 in SImode and DImode */
757 {8, 8}, /* cost of storing MMX registers
758 in SImode and DImode */
759 2, /* cost of moving SSE register */
760 {4, 8, 16}, /* cost of loading SSE registers
761 in SImode, DImode and TImode */
762 {4, 8, 16}, /* cost of storing SSE registers
763 in SImode, DImode and TImode */
764 3, /* MMX or SSE register to integer */
765 8, /* size of l1 cache. */
766 8, /* size of l2 cache */
767 0, /* size of prefetch block */
768 0, /* number of parallel prefetches */
770 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
771 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
772 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
773 COSTS_N_INSNS (1), /* cost of FABS instruction. */
774 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
775 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
776 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
777 DUMMY_STRINGOP_ALGS},
778 {{libcall, {{-1, rep_prefix_4_byte}}},
779 DUMMY_STRINGOP_ALGS},
780 1, /* scalar_stmt_cost. */
781 1, /* scalar load_cost. */
782 1, /* scalar_store_cost. */
783 1, /* vec_stmt_cost. */
784 1, /* vec_to_scalar_cost. */
785 1, /* scalar_to_vec_cost. */
786 1, /* vec_align_load_cost. */
787 2, /* vec_unalign_load_cost. */
788 1, /* vec_store_cost. */
789 3, /* cond_taken_branch_cost. */
790 1, /* cond_not_taken_branch_cost. */
794 struct processor_costs pentiumpro_cost = {
795 COSTS_N_INSNS (1), /* cost of an add instruction */
796 COSTS_N_INSNS (1), /* cost of a lea instruction */
797 COSTS_N_INSNS (1), /* variable shift costs */
798 COSTS_N_INSNS (1), /* constant shift costs */
799 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
800 COSTS_N_INSNS (4), /* HI */
801 COSTS_N_INSNS (4), /* SI */
802 COSTS_N_INSNS (4), /* DI */
803 COSTS_N_INSNS (4)}, /* other */
804 0, /* cost of multiply per each bit set */
805 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
806 COSTS_N_INSNS (17), /* HI */
807 COSTS_N_INSNS (17), /* SI */
808 COSTS_N_INSNS (17), /* DI */
809 COSTS_N_INSNS (17)}, /* other */
810 COSTS_N_INSNS (1), /* cost of movsx */
811 COSTS_N_INSNS (1), /* cost of movzx */
812 8, /* "large" insn */
814 2, /* cost for loading QImode using movzbl */
815 {4, 4, 4}, /* cost of loading integer registers
816 in QImode, HImode and SImode.
817 Relative to reg-reg move (2). */
818 {2, 2, 2}, /* cost of storing integer registers */
819 2, /* cost of reg,reg fld/fst */
820 {2, 2, 6}, /* cost of loading fp registers
821 in SFmode, DFmode and XFmode */
822 {4, 4, 6}, /* cost of storing fp registers
823 in SFmode, DFmode and XFmode */
824 2, /* cost of moving MMX register */
825 {2, 2}, /* cost of loading MMX registers
826 in SImode and DImode */
827 {2, 2}, /* cost of storing MMX registers
828 in SImode and DImode */
829 2, /* cost of moving SSE register */
830 {2, 2, 8}, /* cost of loading SSE registers
831 in SImode, DImode and TImode */
832 {2, 2, 8}, /* cost of storing SSE registers
833 in SImode, DImode and TImode */
834 3, /* MMX or SSE register to integer */
835 8, /* size of l1 cache. */
836 256, /* size of l2 cache */
837 32, /* size of prefetch block */
838 6, /* number of parallel prefetches */
840 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
841 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
842 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
843 COSTS_N_INSNS (2), /* cost of FABS instruction. */
844 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
845 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
846 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
847 (we ensure the alignment). For small blocks inline loop is still a
848 noticeable win, for bigger blocks either rep movsl or rep movsb is
849 way to go. Rep movsb has apparently more expensive startup time in CPU,
850 but after 4K the difference is down in the noise. */
851 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
852 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
853 DUMMY_STRINGOP_ALGS},
854 {{rep_prefix_4_byte, {{1024, unrolled_loop},
855 {8192, rep_prefix_4_byte}, {-1, libcall}}},
856 DUMMY_STRINGOP_ALGS},
857 1, /* scalar_stmt_cost. */
858 1, /* scalar load_cost. */
859 1, /* scalar_store_cost. */
860 1, /* vec_stmt_cost. */
861 1, /* vec_to_scalar_cost. */
862 1, /* scalar_to_vec_cost. */
863 1, /* vec_align_load_cost. */
864 2, /* vec_unalign_load_cost. */
865 1, /* vec_store_cost. */
866 3, /* cond_taken_branch_cost. */
867 1, /* cond_not_taken_branch_cost. */
871 struct processor_costs geode_cost = {
872 COSTS_N_INSNS (1), /* cost of an add instruction */
873 COSTS_N_INSNS (1), /* cost of a lea instruction */
874 COSTS_N_INSNS (2), /* variable shift costs */
875 COSTS_N_INSNS (1), /* constant shift costs */
876 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
877 COSTS_N_INSNS (4), /* HI */
878 COSTS_N_INSNS (7), /* SI */
879 COSTS_N_INSNS (7), /* DI */
880 COSTS_N_INSNS (7)}, /* other */
881 0, /* cost of multiply per each bit set */
882 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
883 COSTS_N_INSNS (23), /* HI */
884 COSTS_N_INSNS (39), /* SI */
885 COSTS_N_INSNS (39), /* DI */
886 COSTS_N_INSNS (39)}, /* other */
887 COSTS_N_INSNS (1), /* cost of movsx */
888 COSTS_N_INSNS (1), /* cost of movzx */
889 8, /* "large" insn */
891 1, /* cost for loading QImode using movzbl */
892 {1, 1, 1}, /* cost of loading integer registers
893 in QImode, HImode and SImode.
894 Relative to reg-reg move (2). */
895 {1, 1, 1}, /* cost of storing integer registers */
896 1, /* cost of reg,reg fld/fst */
897 {1, 1, 1}, /* cost of loading fp registers
898 in SFmode, DFmode and XFmode */
899 {4, 6, 6}, /* cost of storing fp registers
900 in SFmode, DFmode and XFmode */
902 1, /* cost of moving MMX register */
903 {1, 1}, /* cost of loading MMX registers
904 in SImode and DImode */
905 {1, 1}, /* cost of storing MMX registers
906 in SImode and DImode */
907 1, /* cost of moving SSE register */
908 {1, 1, 1}, /* cost of loading SSE registers
909 in SImode, DImode and TImode */
910 {1, 1, 1}, /* cost of storing SSE registers
911 in SImode, DImode and TImode */
912 1, /* MMX or SSE register to integer */
913 64, /* size of l1 cache. */
914 128, /* size of l2 cache. */
915 32, /* size of prefetch block */
916 1, /* number of parallel prefetches */
918 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
919 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
920 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
921 COSTS_N_INSNS (1), /* cost of FABS instruction. */
922 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
923 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
924 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
925 DUMMY_STRINGOP_ALGS},
926 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
927 DUMMY_STRINGOP_ALGS},
928 1, /* scalar_stmt_cost. */
929 1, /* scalar load_cost. */
930 1, /* scalar_store_cost. */
931 1, /* vec_stmt_cost. */
932 1, /* vec_to_scalar_cost. */
933 1, /* scalar_to_vec_cost. */
934 1, /* vec_align_load_cost. */
935 2, /* vec_unalign_load_cost. */
936 1, /* vec_store_cost. */
937 3, /* cond_taken_branch_cost. */
938 1, /* cond_not_taken_branch_cost. */
942 struct processor_costs k6_cost = {
943 COSTS_N_INSNS (1), /* cost of an add instruction */
944 COSTS_N_INSNS (2), /* cost of a lea instruction */
945 COSTS_N_INSNS (1), /* variable shift costs */
946 COSTS_N_INSNS (1), /* constant shift costs */
947 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
948 COSTS_N_INSNS (3), /* HI */
949 COSTS_N_INSNS (3), /* SI */
950 COSTS_N_INSNS (3), /* DI */
951 COSTS_N_INSNS (3)}, /* other */
952 0, /* cost of multiply per each bit set */
953 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
954 COSTS_N_INSNS (18), /* HI */
955 COSTS_N_INSNS (18), /* SI */
956 COSTS_N_INSNS (18), /* DI */
957 COSTS_N_INSNS (18)}, /* other */
958 COSTS_N_INSNS (2), /* cost of movsx */
959 COSTS_N_INSNS (2), /* cost of movzx */
960 8, /* "large" insn */
962 3, /* cost for loading QImode using movzbl */
963 {4, 5, 4}, /* cost of loading integer registers
964 in QImode, HImode and SImode.
965 Relative to reg-reg move (2). */
966 {2, 3, 2}, /* cost of storing integer registers */
967 4, /* cost of reg,reg fld/fst */
968 {6, 6, 6}, /* cost of loading fp registers
969 in SFmode, DFmode and XFmode */
970 {4, 4, 4}, /* cost of storing fp registers
971 in SFmode, DFmode and XFmode */
972 2, /* cost of moving MMX register */
973 {2, 2}, /* cost of loading MMX registers
974 in SImode and DImode */
975 {2, 2}, /* cost of storing MMX registers
976 in SImode and DImode */
977 2, /* cost of moving SSE register */
978 {2, 2, 8}, /* cost of loading SSE registers
979 in SImode, DImode and TImode */
980 {2, 2, 8}, /* cost of storing SSE registers
981 in SImode, DImode and TImode */
982 6, /* MMX or SSE register to integer */
983 32, /* size of l1 cache. */
984 32, /* size of l2 cache. Some models
985 have integrated l2 cache, but
986 optimizing for k6 is not important
987 enough to worry about that. */
988 32, /* size of prefetch block */
989 1, /* number of parallel prefetches */
991 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
992 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
993 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
994 COSTS_N_INSNS (2), /* cost of FABS instruction. */
995 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
996 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
997 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
998 DUMMY_STRINGOP_ALGS},
999 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1000 DUMMY_STRINGOP_ALGS},
1001 1, /* scalar_stmt_cost. */
1002 1, /* scalar load_cost. */
1003 1, /* scalar_store_cost. */
1004 1, /* vec_stmt_cost. */
1005 1, /* vec_to_scalar_cost. */
1006 1, /* scalar_to_vec_cost. */
1007 1, /* vec_align_load_cost. */
1008 2, /* vec_unalign_load_cost. */
1009 1, /* vec_store_cost. */
1010 3, /* cond_taken_branch_cost. */
1011 1, /* cond_not_taken_branch_cost. */
1015 struct processor_costs athlon_cost = {
1016 COSTS_N_INSNS (1), /* cost of an add instruction */
1017 COSTS_N_INSNS (2), /* cost of a lea instruction */
1018 COSTS_N_INSNS (1), /* variable shift costs */
1019 COSTS_N_INSNS (1), /* constant shift costs */
1020 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1021 COSTS_N_INSNS (5), /* HI */
1022 COSTS_N_INSNS (5), /* SI */
1023 COSTS_N_INSNS (5), /* DI */
1024 COSTS_N_INSNS (5)}, /* other */
1025 0, /* cost of multiply per each bit set */
1026 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1027 COSTS_N_INSNS (26), /* HI */
1028 COSTS_N_INSNS (42), /* SI */
1029 COSTS_N_INSNS (74), /* DI */
1030 COSTS_N_INSNS (74)}, /* other */
1031 COSTS_N_INSNS (1), /* cost of movsx */
1032 COSTS_N_INSNS (1), /* cost of movzx */
1033 8, /* "large" insn */
1035 4, /* cost for loading QImode using movzbl */
1036 {3, 4, 3}, /* cost of loading integer registers
1037 in QImode, HImode and SImode.
1038 Relative to reg-reg move (2). */
1039 {3, 4, 3}, /* cost of storing integer registers */
1040 4, /* cost of reg,reg fld/fst */
1041 {4, 4, 12}, /* cost of loading fp registers
1042 in SFmode, DFmode and XFmode */
1043 {6, 6, 8}, /* cost of storing fp registers
1044 in SFmode, DFmode and XFmode */
1045 2, /* cost of moving MMX register */
1046 {4, 4}, /* cost of loading MMX registers
1047 in SImode and DImode */
1048 {4, 4}, /* cost of storing MMX registers
1049 in SImode and DImode */
1050 2, /* cost of moving SSE register */
1051 {4, 4, 6}, /* cost of loading SSE registers
1052 in SImode, DImode and TImode */
1053 {4, 4, 5}, /* cost of storing SSE registers
1054 in SImode, DImode and TImode */
1055 5, /* MMX or SSE register to integer */
1056 64, /* size of l1 cache. */
1057 256, /* size of l2 cache. */
1058 64, /* size of prefetch block */
1059 6, /* number of parallel prefetches */
1060 5, /* Branch cost */
1061 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1062 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1063 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1064 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1065 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1066 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1067 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1068 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1069 128 bytes for memset. */
1070 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1071 DUMMY_STRINGOP_ALGS},
1072 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1073 DUMMY_STRINGOP_ALGS},
1074 1, /* scalar_stmt_cost. */
1075 1, /* scalar load_cost. */
1076 1, /* scalar_store_cost. */
1077 1, /* vec_stmt_cost. */
1078 1, /* vec_to_scalar_cost. */
1079 1, /* scalar_to_vec_cost. */
1080 1, /* vec_align_load_cost. */
1081 2, /* vec_unalign_load_cost. */
1082 1, /* vec_store_cost. */
1083 3, /* cond_taken_branch_cost. */
1084 1, /* cond_not_taken_branch_cost. */
1088 struct processor_costs k8_cost = {
1089 COSTS_N_INSNS (1), /* cost of an add instruction */
1090 COSTS_N_INSNS (2), /* cost of a lea instruction */
1091 COSTS_N_INSNS (1), /* variable shift costs */
1092 COSTS_N_INSNS (1), /* constant shift costs */
1093 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1094 COSTS_N_INSNS (4), /* HI */
1095 COSTS_N_INSNS (3), /* SI */
1096 COSTS_N_INSNS (4), /* DI */
1097 COSTS_N_INSNS (5)}, /* other */
1098 0, /* cost of multiply per each bit set */
1099 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1100 COSTS_N_INSNS (26), /* HI */
1101 COSTS_N_INSNS (42), /* SI */
1102 COSTS_N_INSNS (74), /* DI */
1103 COSTS_N_INSNS (74)}, /* other */
1104 COSTS_N_INSNS (1), /* cost of movsx */
1105 COSTS_N_INSNS (1), /* cost of movzx */
1106 8, /* "large" insn */
1108 4, /* cost for loading QImode using movzbl */
1109 {3, 4, 3}, /* cost of loading integer registers
1110 in QImode, HImode and SImode.
1111 Relative to reg-reg move (2). */
1112 {3, 4, 3}, /* cost of storing integer registers */
1113 4, /* cost of reg,reg fld/fst */
1114 {4, 4, 12}, /* cost of loading fp registers
1115 in SFmode, DFmode and XFmode */
1116 {6, 6, 8}, /* cost of storing fp registers
1117 in SFmode, DFmode and XFmode */
1118 2, /* cost of moving MMX register */
1119 {3, 3}, /* cost of loading MMX registers
1120 in SImode and DImode */
1121 {4, 4}, /* cost of storing MMX registers
1122 in SImode and DImode */
1123 2, /* cost of moving SSE register */
1124 {4, 3, 6}, /* cost of loading SSE registers
1125 in SImode, DImode and TImode */
1126 {4, 4, 5}, /* cost of storing SSE registers
1127 in SImode, DImode and TImode */
1128 5, /* MMX or SSE register to integer */
1129 64, /* size of l1 cache. */
1130 512, /* size of l2 cache. */
1131 64, /* size of prefetch block */
1132 /* New AMD processors never drop prefetches; if they cannot be performed
1133 immediately, they are queued. We set number of simultaneous prefetches
1134 to a large constant to reflect this (it probably is not a good idea not
1135 to limit number of prefetches at all, as their execution also takes some
1137 100, /* number of parallel prefetches */
1138 3, /* Branch cost */
1139 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1140 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1141 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1142 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1143 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1144 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1145 /* K8 has optimized REP instruction for medium sized blocks, but for very
1146 small blocks it is better to use loop. For large blocks, libcall can
1147 do nontemporary accesses and beat inline considerably. */
1148 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1149 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1150 {{libcall, {{8, loop}, {24, unrolled_loop},
1151 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1152 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1153 4, /* scalar_stmt_cost. */
1154 2, /* scalar load_cost. */
1155 2, /* scalar_store_cost. */
1156 5, /* vec_stmt_cost. */
1157 0, /* vec_to_scalar_cost. */
1158 2, /* scalar_to_vec_cost. */
1159 2, /* vec_align_load_cost. */
1160 3, /* vec_unalign_load_cost. */
1161 3, /* vec_store_cost. */
1162 3, /* cond_taken_branch_cost. */
1163 2, /* cond_not_taken_branch_cost. */
1166 struct processor_costs amdfam10_cost = {
1167 COSTS_N_INSNS (1), /* cost of an add instruction */
1168 COSTS_N_INSNS (2), /* cost of a lea instruction */
1169 COSTS_N_INSNS (1), /* variable shift costs */
1170 COSTS_N_INSNS (1), /* constant shift costs */
1171 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1172 COSTS_N_INSNS (4), /* HI */
1173 COSTS_N_INSNS (3), /* SI */
1174 COSTS_N_INSNS (4), /* DI */
1175 COSTS_N_INSNS (5)}, /* other */
1176 0, /* cost of multiply per each bit set */
1177 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1178 COSTS_N_INSNS (35), /* HI */
1179 COSTS_N_INSNS (51), /* SI */
1180 COSTS_N_INSNS (83), /* DI */
1181 COSTS_N_INSNS (83)}, /* other */
1182 COSTS_N_INSNS (1), /* cost of movsx */
1183 COSTS_N_INSNS (1), /* cost of movzx */
1184 8, /* "large" insn */
1186 4, /* cost for loading QImode using movzbl */
1187 {3, 4, 3}, /* cost of loading integer registers
1188 in QImode, HImode and SImode.
1189 Relative to reg-reg move (2). */
1190 {3, 4, 3}, /* cost of storing integer registers */
1191 4, /* cost of reg,reg fld/fst */
1192 {4, 4, 12}, /* cost of loading fp registers
1193 in SFmode, DFmode and XFmode */
1194 {6, 6, 8}, /* cost of storing fp registers
1195 in SFmode, DFmode and XFmode */
1196 2, /* cost of moving MMX register */
1197 {3, 3}, /* cost of loading MMX registers
1198 in SImode and DImode */
1199 {4, 4}, /* cost of storing MMX registers
1200 in SImode and DImode */
1201 2, /* cost of moving SSE register */
1202 {4, 4, 3}, /* cost of loading SSE registers
1203 in SImode, DImode and TImode */
1204 {4, 4, 5}, /* cost of storing SSE registers
1205 in SImode, DImode and TImode */
1206 3, /* MMX or SSE register to integer */
1208 MOVD reg64, xmmreg Double FSTORE 4
1209 MOVD reg32, xmmreg Double FSTORE 4
1211 MOVD reg64, xmmreg Double FADD 3
1213 MOVD reg32, xmmreg Double FADD 3
1215 64, /* size of l1 cache. */
1216 512, /* size of l2 cache. */
1217 64, /* size of prefetch block */
1218 /* New AMD processors never drop prefetches; if they cannot be performed
1219 immediately, they are queued. We set number of simultaneous prefetches
1220 to a large constant to reflect this (it probably is not a good idea not
1221 to limit number of prefetches at all, as their execution also takes some
1223 100, /* number of parallel prefetches */
1224 2, /* Branch cost */
1225 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1226 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1227 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1228 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1229 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1230 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1232 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1233 very small blocks it is better to use loop. For large blocks, libcall can
1234 do nontemporary accesses and beat inline considerably. */
1235 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1236 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1237 {{libcall, {{8, loop}, {24, unrolled_loop},
1238 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1239 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1240 4, /* scalar_stmt_cost. */
1241 2, /* scalar load_cost. */
1242 2, /* scalar_store_cost. */
1243 6, /* vec_stmt_cost. */
1244 0, /* vec_to_scalar_cost. */
1245 2, /* scalar_to_vec_cost. */
1246 2, /* vec_align_load_cost. */
1247 2, /* vec_unalign_load_cost. */
1248 2, /* vec_store_cost. */
1249 2, /* cond_taken_branch_cost. */
1250 1, /* cond_not_taken_branch_cost. */
1253 struct processor_costs bdver1_cost = {
1254 COSTS_N_INSNS (1), /* cost of an add instruction */
1255 COSTS_N_INSNS (1), /* cost of a lea instruction */
1256 COSTS_N_INSNS (1), /* variable shift costs */
1257 COSTS_N_INSNS (1), /* constant shift costs */
1258 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1259 COSTS_N_INSNS (4), /* HI */
1260 COSTS_N_INSNS (4), /* SI */
1261 COSTS_N_INSNS (6), /* DI */
1262 COSTS_N_INSNS (6)}, /* other */
1263 0, /* cost of multiply per each bit set */
1264 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1265 COSTS_N_INSNS (35), /* HI */
1266 COSTS_N_INSNS (51), /* SI */
1267 COSTS_N_INSNS (83), /* DI */
1268 COSTS_N_INSNS (83)}, /* other */
1269 COSTS_N_INSNS (1), /* cost of movsx */
1270 COSTS_N_INSNS (1), /* cost of movzx */
1271 8, /* "large" insn */
1273 4, /* cost for loading QImode using movzbl */
1274 {5, 5, 4}, /* cost of loading integer registers
1275 in QImode, HImode and SImode.
1276 Relative to reg-reg move (2). */
1277 {4, 4, 4}, /* cost of storing integer registers */
1278 2, /* cost of reg,reg fld/fst */
1279 {5, 5, 12}, /* cost of loading fp registers
1280 in SFmode, DFmode and XFmode */
1281 {4, 4, 8}, /* cost of storing fp registers
1282 in SFmode, DFmode and XFmode */
1283 2, /* cost of moving MMX register */
1284 {4, 4}, /* cost of loading MMX registers
1285 in SImode and DImode */
1286 {4, 4}, /* cost of storing MMX registers
1287 in SImode and DImode */
1288 2, /* cost of moving SSE register */
1289 {4, 4, 4}, /* cost of loading SSE registers
1290 in SImode, DImode and TImode */
1291 {4, 4, 4}, /* cost of storing SSE registers
1292 in SImode, DImode and TImode */
1293 2, /* MMX or SSE register to integer */
1295 MOVD reg64, xmmreg Double FSTORE 4
1296 MOVD reg32, xmmreg Double FSTORE 4
1298 MOVD reg64, xmmreg Double FADD 3
1300 MOVD reg32, xmmreg Double FADD 3
1302 16, /* size of l1 cache. */
1303 2048, /* size of l2 cache. */
1304 64, /* size of prefetch block */
1305 /* New AMD processors never drop prefetches; if they cannot be performed
1306 immediately, they are queued. We set number of simultaneous prefetches
1307 to a large constant to reflect this (it probably is not a good idea not
1308 to limit number of prefetches at all, as their execution also takes some
1310 100, /* number of parallel prefetches */
1311 2, /* Branch cost */
1312 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1313 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1314 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1315 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1316 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1317 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1319 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1320 very small blocks it is better to use loop. For large blocks, libcall
1321 can do nontemporary accesses and beat inline considerably. */
1322 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1323 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1324 {{libcall, {{8, loop}, {24, unrolled_loop},
1325 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1326 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1327 6, /* scalar_stmt_cost. */
1328 4, /* scalar load_cost. */
1329 4, /* scalar_store_cost. */
1330 6, /* vec_stmt_cost. */
1331 0, /* vec_to_scalar_cost. */
1332 2, /* scalar_to_vec_cost. */
1333 4, /* vec_align_load_cost. */
1334 4, /* vec_unalign_load_cost. */
1335 4, /* vec_store_cost. */
1336 2, /* cond_taken_branch_cost. */
1337 1, /* cond_not_taken_branch_cost. */
1340 struct processor_costs btver1_cost = {
1341 COSTS_N_INSNS (1), /* cost of an add instruction */
1342 COSTS_N_INSNS (2), /* cost of a lea instruction */
1343 COSTS_N_INSNS (1), /* variable shift costs */
1344 COSTS_N_INSNS (1), /* constant shift costs */
1345 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1346 COSTS_N_INSNS (4), /* HI */
1347 COSTS_N_INSNS (3), /* SI */
1348 COSTS_N_INSNS (4), /* DI */
1349 COSTS_N_INSNS (5)}, /* other */
1350 0, /* cost of multiply per each bit set */
1351 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1352 COSTS_N_INSNS (35), /* HI */
1353 COSTS_N_INSNS (51), /* SI */
1354 COSTS_N_INSNS (83), /* DI */
1355 COSTS_N_INSNS (83)}, /* other */
1356 COSTS_N_INSNS (1), /* cost of movsx */
1357 COSTS_N_INSNS (1), /* cost of movzx */
1358 8, /* "large" insn */
1360 4, /* cost for loading QImode using movzbl */
1361 {3, 4, 3}, /* cost of loading integer registers
1362 in QImode, HImode and SImode.
1363 Relative to reg-reg move (2). */
1364 {3, 4, 3}, /* cost of storing integer registers */
1365 4, /* cost of reg,reg fld/fst */
1366 {4, 4, 12}, /* cost of loading fp registers
1367 in SFmode, DFmode and XFmode */
1368 {6, 6, 8}, /* cost of storing fp registers
1369 in SFmode, DFmode and XFmode */
1370 2, /* cost of moving MMX register */
1371 {3, 3}, /* cost of loading MMX registers
1372 in SImode and DImode */
1373 {4, 4}, /* cost of storing MMX registers
1374 in SImode and DImode */
1375 2, /* cost of moving SSE register */
1376 {4, 4, 3}, /* cost of loading SSE registers
1377 in SImode, DImode and TImode */
1378 {4, 4, 5}, /* cost of storing SSE registers
1379 in SImode, DImode and TImode */
1380 3, /* MMX or SSE register to integer */
1382 MOVD reg64, xmmreg Double FSTORE 4
1383 MOVD reg32, xmmreg Double FSTORE 4
1385 MOVD reg64, xmmreg Double FADD 3
1387 MOVD reg32, xmmreg Double FADD 3
1389 32, /* size of l1 cache. */
1390 512, /* size of l2 cache. */
1391 64, /* size of prefetch block */
1392 100, /* number of parallel prefetches */
1393 2, /* Branch cost */
1394 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1395 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1396 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1397 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1398 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1399 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1401 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1402 very small blocks it is better to use loop. For large blocks, libcall can
1403 do nontemporary accesses and beat inline considerably. */
1404 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1405 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1406 {{libcall, {{8, loop}, {24, unrolled_loop},
1407 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1408 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1409 4, /* scalar_stmt_cost. */
1410 2, /* scalar load_cost. */
1411 2, /* scalar_store_cost. */
1412 6, /* vec_stmt_cost. */
1413 0, /* vec_to_scalar_cost. */
1414 2, /* scalar_to_vec_cost. */
1415 2, /* vec_align_load_cost. */
1416 2, /* vec_unalign_load_cost. */
1417 2, /* vec_store_cost. */
1418 2, /* cond_taken_branch_cost. */
1419 1, /* cond_not_taken_branch_cost. */
1423 struct processor_costs pentium4_cost = {
1424 COSTS_N_INSNS (1), /* cost of an add instruction */
1425 COSTS_N_INSNS (3), /* cost of a lea instruction */
1426 COSTS_N_INSNS (4), /* variable shift costs */
1427 COSTS_N_INSNS (4), /* constant shift costs */
1428 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1429 COSTS_N_INSNS (15), /* HI */
1430 COSTS_N_INSNS (15), /* SI */
1431 COSTS_N_INSNS (15), /* DI */
1432 COSTS_N_INSNS (15)}, /* other */
1433 0, /* cost of multiply per each bit set */
1434 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1435 COSTS_N_INSNS (56), /* HI */
1436 COSTS_N_INSNS (56), /* SI */
1437 COSTS_N_INSNS (56), /* DI */
1438 COSTS_N_INSNS (56)}, /* other */
1439 COSTS_N_INSNS (1), /* cost of movsx */
1440 COSTS_N_INSNS (1), /* cost of movzx */
1441 16, /* "large" insn */
1443 2, /* cost for loading QImode using movzbl */
1444 {4, 5, 4}, /* cost of loading integer registers
1445 in QImode, HImode and SImode.
1446 Relative to reg-reg move (2). */
1447 {2, 3, 2}, /* cost of storing integer registers */
1448 2, /* cost of reg,reg fld/fst */
1449 {2, 2, 6}, /* cost of loading fp registers
1450 in SFmode, DFmode and XFmode */
1451 {4, 4, 6}, /* cost of storing fp registers
1452 in SFmode, DFmode and XFmode */
1453 2, /* cost of moving MMX register */
1454 {2, 2}, /* cost of loading MMX registers
1455 in SImode and DImode */
1456 {2, 2}, /* cost of storing MMX registers
1457 in SImode and DImode */
1458 12, /* cost of moving SSE register */
1459 {12, 12, 12}, /* cost of loading SSE registers
1460 in SImode, DImode and TImode */
1461 {2, 2, 8}, /* cost of storing SSE registers
1462 in SImode, DImode and TImode */
1463 10, /* MMX or SSE register to integer */
1464 8, /* size of l1 cache. */
1465 256, /* size of l2 cache. */
1466 64, /* size of prefetch block */
1467 6, /* number of parallel prefetches */
1468 2, /* Branch cost */
1469 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1470 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1471 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1474 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1475 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1476 DUMMY_STRINGOP_ALGS},
1477 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1479 DUMMY_STRINGOP_ALGS},
1480 1, /* scalar_stmt_cost. */
1481 1, /* scalar load_cost. */
1482 1, /* scalar_store_cost. */
1483 1, /* vec_stmt_cost. */
1484 1, /* vec_to_scalar_cost. */
1485 1, /* scalar_to_vec_cost. */
1486 1, /* vec_align_load_cost. */
1487 2, /* vec_unalign_load_cost. */
1488 1, /* vec_store_cost. */
1489 3, /* cond_taken_branch_cost. */
1490 1, /* cond_not_taken_branch_cost. */
1494 struct processor_costs nocona_cost = {
1495 COSTS_N_INSNS (1), /* cost of an add instruction */
1496 COSTS_N_INSNS (1), /* cost of a lea instruction */
1497 COSTS_N_INSNS (1), /* variable shift costs */
1498 COSTS_N_INSNS (1), /* constant shift costs */
1499 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1500 COSTS_N_INSNS (10), /* HI */
1501 COSTS_N_INSNS (10), /* SI */
1502 COSTS_N_INSNS (10), /* DI */
1503 COSTS_N_INSNS (10)}, /* other */
1504 0, /* cost of multiply per each bit set */
1505 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1506 COSTS_N_INSNS (66), /* HI */
1507 COSTS_N_INSNS (66), /* SI */
1508 COSTS_N_INSNS (66), /* DI */
1509 COSTS_N_INSNS (66)}, /* other */
1510 COSTS_N_INSNS (1), /* cost of movsx */
1511 COSTS_N_INSNS (1), /* cost of movzx */
1512 16, /* "large" insn */
1513 17, /* MOVE_RATIO */
1514 4, /* cost for loading QImode using movzbl */
1515 {4, 4, 4}, /* cost of loading integer registers
1516 in QImode, HImode and SImode.
1517 Relative to reg-reg move (2). */
1518 {4, 4, 4}, /* cost of storing integer registers */
1519 3, /* cost of reg,reg fld/fst */
1520 {12, 12, 12}, /* cost of loading fp registers
1521 in SFmode, DFmode and XFmode */
1522 {4, 4, 4}, /* cost of storing fp registers
1523 in SFmode, DFmode and XFmode */
1524 6, /* cost of moving MMX register */
1525 {12, 12}, /* cost of loading MMX registers
1526 in SImode and DImode */
1527 {12, 12}, /* cost of storing MMX registers
1528 in SImode and DImode */
1529 6, /* cost of moving SSE register */
1530 {12, 12, 12}, /* cost of loading SSE registers
1531 in SImode, DImode and TImode */
1532 {12, 12, 12}, /* cost of storing SSE registers
1533 in SImode, DImode and TImode */
1534 8, /* MMX or SSE register to integer */
1535 8, /* size of l1 cache. */
1536 1024, /* size of l2 cache. */
1537 128, /* size of prefetch block */
1538 8, /* number of parallel prefetches */
1539 1, /* Branch cost */
1540 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1541 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1542 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1543 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1544 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1545 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1546 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1547 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1548 {100000, unrolled_loop}, {-1, libcall}}}},
1549 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1551 {libcall, {{24, loop}, {64, unrolled_loop},
1552 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1553 1, /* scalar_stmt_cost. */
1554 1, /* scalar load_cost. */
1555 1, /* scalar_store_cost. */
1556 1, /* vec_stmt_cost. */
1557 1, /* vec_to_scalar_cost. */
1558 1, /* scalar_to_vec_cost. */
1559 1, /* vec_align_load_cost. */
1560 2, /* vec_unalign_load_cost. */
1561 1, /* vec_store_cost. */
1562 3, /* cond_taken_branch_cost. */
1563 1, /* cond_not_taken_branch_cost. */
1567 struct processor_costs atom_cost = {
1568 COSTS_N_INSNS (1), /* cost of an add instruction */
1569 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1570 COSTS_N_INSNS (1), /* variable shift costs */
1571 COSTS_N_INSNS (1), /* constant shift costs */
1572 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1573 COSTS_N_INSNS (4), /* HI */
1574 COSTS_N_INSNS (3), /* SI */
1575 COSTS_N_INSNS (4), /* DI */
1576 COSTS_N_INSNS (2)}, /* other */
1577 0, /* cost of multiply per each bit set */
1578 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1579 COSTS_N_INSNS (26), /* HI */
1580 COSTS_N_INSNS (42), /* SI */
1581 COSTS_N_INSNS (74), /* DI */
1582 COSTS_N_INSNS (74)}, /* other */
1583 COSTS_N_INSNS (1), /* cost of movsx */
1584 COSTS_N_INSNS (1), /* cost of movzx */
1585 8, /* "large" insn */
1586 17, /* MOVE_RATIO */
1587 2, /* cost for loading QImode using movzbl */
1588 {4, 4, 4}, /* cost of loading integer registers
1589 in QImode, HImode and SImode.
1590 Relative to reg-reg move (2). */
1591 {4, 4, 4}, /* cost of storing integer registers */
1592 4, /* cost of reg,reg fld/fst */
1593 {12, 12, 12}, /* cost of loading fp registers
1594 in SFmode, DFmode and XFmode */
1595 {6, 6, 8}, /* cost of storing fp registers
1596 in SFmode, DFmode and XFmode */
1597 2, /* cost of moving MMX register */
1598 {8, 8}, /* cost of loading MMX registers
1599 in SImode and DImode */
1600 {8, 8}, /* cost of storing MMX registers
1601 in SImode and DImode */
1602 2, /* cost of moving SSE register */
1603 {8, 8, 8}, /* cost of loading SSE registers
1604 in SImode, DImode and TImode */
1605 {8, 8, 8}, /* cost of storing SSE registers
1606 in SImode, DImode and TImode */
1607 5, /* MMX or SSE register to integer */
1608 32, /* size of l1 cache. */
1609 256, /* size of l2 cache. */
1610 64, /* size of prefetch block */
1611 6, /* number of parallel prefetches */
1612 3, /* Branch cost */
1613 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1614 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1615 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1616 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1617 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1618 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1619 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1620 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1621 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1622 {{libcall, {{8, loop}, {15, unrolled_loop},
1623 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1624 {libcall, {{24, loop}, {32, unrolled_loop},
1625 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1626 1, /* scalar_stmt_cost. */
1627 1, /* scalar load_cost. */
1628 1, /* scalar_store_cost. */
1629 1, /* vec_stmt_cost. */
1630 1, /* vec_to_scalar_cost. */
1631 1, /* scalar_to_vec_cost. */
1632 1, /* vec_align_load_cost. */
1633 2, /* vec_unalign_load_cost. */
1634 1, /* vec_store_cost. */
1635 3, /* cond_taken_branch_cost. */
1636 1, /* cond_not_taken_branch_cost. */
1639 /* Generic64 should produce code tuned for Nocona and K8. */
1641 struct processor_costs generic64_cost = {
1642 COSTS_N_INSNS (1), /* cost of an add instruction */
1643 /* On all chips taken into consideration lea is 2 cycles and more. With
1644 this cost however our current implementation of synth_mult results in
1645 use of unnecessary temporary registers causing regression on several
1646 SPECfp benchmarks. */
1647 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1648 COSTS_N_INSNS (1), /* variable shift costs */
1649 COSTS_N_INSNS (1), /* constant shift costs */
1650 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1651 COSTS_N_INSNS (4), /* HI */
1652 COSTS_N_INSNS (3), /* SI */
1653 COSTS_N_INSNS (4), /* DI */
1654 COSTS_N_INSNS (2)}, /* other */
1655 0, /* cost of multiply per each bit set */
1656 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1657 COSTS_N_INSNS (26), /* HI */
1658 COSTS_N_INSNS (42), /* SI */
1659 COSTS_N_INSNS (74), /* DI */
1660 COSTS_N_INSNS (74)}, /* other */
1661 COSTS_N_INSNS (1), /* cost of movsx */
1662 COSTS_N_INSNS (1), /* cost of movzx */
1663 8, /* "large" insn */
1664 17, /* MOVE_RATIO */
1665 4, /* cost for loading QImode using movzbl */
1666 {4, 4, 4}, /* cost of loading integer registers
1667 in QImode, HImode and SImode.
1668 Relative to reg-reg move (2). */
1669 {4, 4, 4}, /* cost of storing integer registers */
1670 4, /* cost of reg,reg fld/fst */
1671 {12, 12, 12}, /* cost of loading fp registers
1672 in SFmode, DFmode and XFmode */
1673 {6, 6, 8}, /* cost of storing fp registers
1674 in SFmode, DFmode and XFmode */
1675 2, /* cost of moving MMX register */
1676 {8, 8}, /* cost of loading MMX registers
1677 in SImode and DImode */
1678 {8, 8}, /* cost of storing MMX registers
1679 in SImode and DImode */
1680 2, /* cost of moving SSE register */
1681 {8, 8, 8}, /* cost of loading SSE registers
1682 in SImode, DImode and TImode */
1683 {8, 8, 8}, /* cost of storing SSE registers
1684 in SImode, DImode and TImode */
1685 5, /* MMX or SSE register to integer */
1686 32, /* size of l1 cache. */
1687 512, /* size of l2 cache. */
1688 64, /* size of prefetch block */
1689 6, /* number of parallel prefetches */
1690 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1691 value is increased to perhaps more appropriate value of 5. */
1692 3, /* Branch cost */
1693 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1694 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1695 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1696 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1697 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1698 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1699 {DUMMY_STRINGOP_ALGS,
1700 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1701 {DUMMY_STRINGOP_ALGS,
1702 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1703 1, /* scalar_stmt_cost. */
1704 1, /* scalar load_cost. */
1705 1, /* scalar_store_cost. */
1706 1, /* vec_stmt_cost. */
1707 1, /* vec_to_scalar_cost. */
1708 1, /* scalar_to_vec_cost. */
1709 1, /* vec_align_load_cost. */
1710 2, /* vec_unalign_load_cost. */
1711 1, /* vec_store_cost. */
1712 3, /* cond_taken_branch_cost. */
1713 1, /* cond_not_taken_branch_cost. */
1716 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1719 struct processor_costs generic32_cost = {
1720 COSTS_N_INSNS (1), /* cost of an add instruction */
1721 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1722 COSTS_N_INSNS (1), /* variable shift costs */
1723 COSTS_N_INSNS (1), /* constant shift costs */
1724 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1725 COSTS_N_INSNS (4), /* HI */
1726 COSTS_N_INSNS (3), /* SI */
1727 COSTS_N_INSNS (4), /* DI */
1728 COSTS_N_INSNS (2)}, /* other */
1729 0, /* cost of multiply per each bit set */
1730 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1731 COSTS_N_INSNS (26), /* HI */
1732 COSTS_N_INSNS (42), /* SI */
1733 COSTS_N_INSNS (74), /* DI */
1734 COSTS_N_INSNS (74)}, /* other */
1735 COSTS_N_INSNS (1), /* cost of movsx */
1736 COSTS_N_INSNS (1), /* cost of movzx */
1737 8, /* "large" insn */
1738 17, /* MOVE_RATIO */
1739 4, /* cost for loading QImode using movzbl */
1740 {4, 4, 4}, /* cost of loading integer registers
1741 in QImode, HImode and SImode.
1742 Relative to reg-reg move (2). */
1743 {4, 4, 4}, /* cost of storing integer registers */
1744 4, /* cost of reg,reg fld/fst */
1745 {12, 12, 12}, /* cost of loading fp registers
1746 in SFmode, DFmode and XFmode */
1747 {6, 6, 8}, /* cost of storing fp registers
1748 in SFmode, DFmode and XFmode */
1749 2, /* cost of moving MMX register */
1750 {8, 8}, /* cost of loading MMX registers
1751 in SImode and DImode */
1752 {8, 8}, /* cost of storing MMX registers
1753 in SImode and DImode */
1754 2, /* cost of moving SSE register */
1755 {8, 8, 8}, /* cost of loading SSE registers
1756 in SImode, DImode and TImode */
1757 {8, 8, 8}, /* cost of storing SSE registers
1758 in SImode, DImode and TImode */
1759 5, /* MMX or SSE register to integer */
1760 32, /* size of l1 cache. */
1761 256, /* size of l2 cache. */
1762 64, /* size of prefetch block */
1763 6, /* number of parallel prefetches */
1764 3, /* Branch cost */
1765 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1766 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1767 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1768 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1769 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1770 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1771 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1772 DUMMY_STRINGOP_ALGS},
1773 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1774 DUMMY_STRINGOP_ALGS},
1775 1, /* scalar_stmt_cost. */
1776 1, /* scalar load_cost. */
1777 1, /* scalar_store_cost. */
1778 1, /* vec_stmt_cost. */
1779 1, /* vec_to_scalar_cost. */
1780 1, /* scalar_to_vec_cost. */
1781 1, /* vec_align_load_cost. */
1782 2, /* vec_unalign_load_cost. */
1783 1, /* vec_store_cost. */
1784 3, /* cond_taken_branch_cost. */
1785 1, /* cond_not_taken_branch_cost. */
1788 const struct processor_costs *ix86_cost = &pentium_cost;
1790 /* Processor feature/optimization bitmasks. */
1791 #define m_386 (1<<PROCESSOR_I386)
1792 #define m_486 (1<<PROCESSOR_I486)
1793 #define m_PENT (1<<PROCESSOR_PENTIUM)
1794 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1795 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1796 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1797 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1798 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1799 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1800 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1801 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1802 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1803 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1804 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1805 #define m_ATOM (1<<PROCESSOR_ATOM)
1807 #define m_GEODE (1<<PROCESSOR_GEODE)
1808 #define m_K6 (1<<PROCESSOR_K6)
1809 #define m_K6_GEODE (m_K6 | m_GEODE)
1810 #define m_K8 (1<<PROCESSOR_K8)
1811 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1812 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1813 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1814 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1815 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1816 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1 | m_BTVER1)
1818 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1819 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1821 /* Generic instruction choice should be common subset of supported CPUs
1822 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1823 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1825 /* Feature tests against the various tunings. */
1826 unsigned char ix86_tune_features[X86_TUNE_LAST];
1828 /* Feature tests against the various tunings used to create ix86_tune_features
1829 based on the processor mask. */
1830 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1831 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1832 negatively, so enabling for Generic64 seems like good code size
1833 tradeoff. We can't enable it for 32bit generic because it does not
1834 work well with PPro base chips. */
1835 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2I7_64 | m_GENERIC64,
1837 /* X86_TUNE_PUSH_MEMORY */
1838 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1839 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1841 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1844 /* X86_TUNE_UNROLL_STRLEN */
1845 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1846 | m_CORE2I7 | m_GENERIC,
1848 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1849 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1850 | m_CORE2I7 | m_GENERIC,
1852 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1853 on simulation result. But after P4 was made, no performance benefit
1854 was observed with branch hints. It also increases the code size.
1855 As a result, icc never generates branch hints. */
1858 /* X86_TUNE_DOUBLE_WITH_ADD */
1861 /* X86_TUNE_USE_SAHF */
1862 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_BTVER1
1863 | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1865 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1866 partial dependencies. */
1867 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1868 | m_CORE2I7 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1870 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1871 register stalls on Generic32 compilation setting as well. However
1872 in current implementation the partial register stalls are not eliminated
1873 very well - they can be introduced via subregs synthesized by combine
1874 and can happen in caller/callee saving sequences. Because this option
1875 pays back little on PPro based chips and is in conflict with partial reg
1876 dependencies used by Athlon/P4 based chips, it is better to leave it off
1877 for generic32 for now. */
1880 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1881 m_CORE2I7 | m_GENERIC,
1883 /* X86_TUNE_USE_HIMODE_FIOP */
1884 m_386 | m_486 | m_K6_GEODE,
1886 /* X86_TUNE_USE_SIMODE_FIOP */
1887 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2I7 | m_GENERIC),
1889 /* X86_TUNE_USE_MOV0 */
1892 /* X86_TUNE_USE_CLTD */
1893 ~(m_PENT | m_ATOM | m_K6 | m_CORE2I7 | m_GENERIC),
1895 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1898 /* X86_TUNE_SPLIT_LONG_MOVES */
1901 /* X86_TUNE_READ_MODIFY_WRITE */
1904 /* X86_TUNE_READ_MODIFY */
1907 /* X86_TUNE_PROMOTE_QIMODE */
1908 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1909 | m_CORE2I7 | m_GENERIC /* | m_PENT4 ? */,
1911 /* X86_TUNE_FAST_PREFIX */
1912 ~(m_PENT | m_486 | m_386),
1914 /* X86_TUNE_SINGLE_STRINGOP */
1915 m_386 | m_PENT4 | m_NOCONA,
1917 /* X86_TUNE_QIMODE_MATH */
1920 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1921 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1922 might be considered for Generic32 if our scheme for avoiding partial
1923 stalls was more effective. */
1926 /* X86_TUNE_PROMOTE_QI_REGS */
1929 /* X86_TUNE_PROMOTE_HI_REGS */
1932 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1933 over esp addition. */
1934 m_386 | m_486 | m_PENT | m_PPRO,
1936 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1937 over esp addition. */
1940 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1941 over esp subtraction. */
1942 m_386 | m_486 | m_PENT | m_K6_GEODE,
1944 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1945 over esp subtraction. */
1946 m_PENT | m_K6_GEODE,
1948 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1949 for DFmode copies */
1950 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
1951 | m_GENERIC | m_GEODE),
1953 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1954 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1956 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1957 conflict here in between PPro/Pentium4 based chips that thread 128bit
1958 SSE registers as single units versus K8 based chips that divide SSE
1959 registers to two 64bit halves. This knob promotes all store destinations
1960 to be 128bit to allow register renaming on 128bit SSE units, but usually
1961 results in one extra microop on 64bit SSE units. Experimental results
1962 shows that disabling this option on P4 brings over 20% SPECfp regression,
1963 while enabling it on K8 brings roughly 2.4% regression that can be partly
1964 masked by careful scheduling of moves. */
1965 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 | m_GENERIC
1966 | m_AMDFAM10 | m_BDVER1,
1968 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1969 m_AMDFAM10 | m_BDVER1 | m_BTVER1 | m_COREI7,
1971 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1972 m_BDVER1 | m_COREI7,
1974 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1977 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1978 are resolved on SSE register parts instead of whole registers, so we may
1979 maintain just lower part of scalar values in proper format leaving the
1980 upper part undefined. */
1983 /* X86_TUNE_SSE_TYPELESS_STORES */
1986 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1987 m_PPRO | m_PENT4 | m_NOCONA,
1989 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1990 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1992 /* X86_TUNE_PROLOGUE_USING_MOVE */
1993 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1995 /* X86_TUNE_EPILOGUE_USING_MOVE */
1996 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1998 /* X86_TUNE_SHIFT1 */
2001 /* X86_TUNE_USE_FFREEP */
2004 /* X86_TUNE_INTER_UNIT_MOVES */
2005 ~(m_AMD_MULTIPLE | m_GENERIC),
2007 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2008 ~(m_AMDFAM10 | m_BDVER1),
2010 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2011 than 4 branch instructions in the 16 byte window. */
2012 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2I7
2015 /* X86_TUNE_SCHEDULE */
2016 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2I7
2019 /* X86_TUNE_USE_BT */
2020 m_AMD_MULTIPLE | m_ATOM | m_CORE2I7 | m_GENERIC,
2022 /* X86_TUNE_USE_INCDEC */
2023 ~(m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC | m_ATOM),
2025 /* X86_TUNE_PAD_RETURNS */
2026 m_AMD_MULTIPLE | m_CORE2I7 | m_GENERIC,
2028 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2031 /* X86_TUNE_EXT_80387_CONSTANTS */
2032 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
2033 | m_CORE2I7 | m_GENERIC,
2035 /* X86_TUNE_SHORTEN_X87_SSE */
2038 /* X86_TUNE_AVOID_VECTOR_DECODE */
2039 m_K8 | m_CORE2I7_64 | m_GENERIC64,
2041 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2042 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2045 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2046 vector path on AMD machines. */
2047 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2049 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2051 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2053 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2057 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2058 but one byte longer. */
2061 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2062 operand that cannot be represented using a modRM byte. The XOR
2063 replacement is long decoded, so this split helps here as well. */
2066 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2068 m_AMDFAM10 | m_CORE2I7 | m_GENERIC,
2070 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2071 from integer to FP. */
2074 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2075 with a subsequent conditional jump instruction into a single
2076 compare-and-branch uop. */
2079 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2080 will impact LEA instruction selection. */
2083 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2087 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2088 at -O3. For the moment, the prefetching seems badly tuned for Intel
2090 m_K6_GEODE | m_AMD_MULTIPLE
2093 /* Feature tests against the various architecture variations. */
2094 unsigned char ix86_arch_features[X86_ARCH_LAST];
2096 /* Feature tests against the various architecture variations, used to create
2097 ix86_arch_features based on the processor mask. */
2098 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2099 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2100 ~(m_386 | m_486 | m_PENT | m_K6),
2102 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2105 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2108 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2111 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2115 static const unsigned int x86_accumulate_outgoing_args
2116 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
2119 static const unsigned int x86_arch_always_fancy_math_387
2120 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
2121 | m_NOCONA | m_CORE2I7 | m_GENERIC;
2123 /* In case the average insn count for single function invocation is
2124 lower than this constant, emit fast (but longer) prologue and
2126 #define FAST_PROLOGUE_INSN_COUNT 20
2128 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2129 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2130 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2131 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2133 /* Array of the smallest class containing reg number REGNO, indexed by
2134 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2136 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2138 /* ax, dx, cx, bx */
2139 AREG, DREG, CREG, BREG,
2140 /* si, di, bp, sp */
2141 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2143 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2144 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2147 /* flags, fpsr, fpcr, frame */
2148 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2150 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2153 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2156 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2157 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2158 /* SSE REX registers */
2159 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2163 /* The "default" register map used in 32bit mode. */
2165 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2167 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2168 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2169 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2170 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2171 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2172 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2173 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2176 /* The "default" register map used in 64bit mode. */
2178 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2180 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2181 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2182 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2183 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2184 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2185 8,9,10,11,12,13,14,15, /* extended integer registers */
2186 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2189 /* Define the register numbers to be used in Dwarf debugging information.
2190 The SVR4 reference port C compiler uses the following register numbers
2191 in its Dwarf output code:
2192 0 for %eax (gcc regno = 0)
2193 1 for %ecx (gcc regno = 2)
2194 2 for %edx (gcc regno = 1)
2195 3 for %ebx (gcc regno = 3)
2196 4 for %esp (gcc regno = 7)
2197 5 for %ebp (gcc regno = 6)
2198 6 for %esi (gcc regno = 4)
2199 7 for %edi (gcc regno = 5)
2200 The following three DWARF register numbers are never generated by
2201 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2202 believes these numbers have these meanings.
2203 8 for %eip (no gcc equivalent)
2204 9 for %eflags (gcc regno = 17)
2205 10 for %trapno (no gcc equivalent)
2206 It is not at all clear how we should number the FP stack registers
2207 for the x86 architecture. If the version of SDB on x86/svr4 were
2208 a bit less brain dead with respect to floating-point then we would
2209 have a precedent to follow with respect to DWARF register numbers
2210 for x86 FP registers, but the SDB on x86/svr4 is so completely
2211 broken with respect to FP registers that it is hardly worth thinking
2212 of it as something to strive for compatibility with.
2213 The version of x86/svr4 SDB I have at the moment does (partially)
2214 seem to believe that DWARF register number 11 is associated with
2215 the x86 register %st(0), but that's about all. Higher DWARF
2216 register numbers don't seem to be associated with anything in
2217 particular, and even for DWARF regno 11, SDB only seems to under-
2218 stand that it should say that a variable lives in %st(0) (when
2219 asked via an `=' command) if we said it was in DWARF regno 11,
2220 but SDB still prints garbage when asked for the value of the
2221 variable in question (via a `/' command).
2222 (Also note that the labels SDB prints for various FP stack regs
2223 when doing an `x' command are all wrong.)
2224 Note that these problems generally don't affect the native SVR4
2225 C compiler because it doesn't allow the use of -O with -g and
2226 because when it is *not* optimizing, it allocates a memory
2227 location for each floating-point variable, and the memory
2228 location is what gets described in the DWARF AT_location
2229 attribute for the variable in question.
2230 Regardless of the severe mental illness of the x86/svr4 SDB, we
2231 do something sensible here and we use the following DWARF
2232 register numbers. Note that these are all stack-top-relative
2234 11 for %st(0) (gcc regno = 8)
2235 12 for %st(1) (gcc regno = 9)
2236 13 for %st(2) (gcc regno = 10)
2237 14 for %st(3) (gcc regno = 11)
2238 15 for %st(4) (gcc regno = 12)
2239 16 for %st(5) (gcc regno = 13)
2240 17 for %st(6) (gcc regno = 14)
2241 18 for %st(7) (gcc regno = 15)
2243 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2245 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2246 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2247 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2248 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2249 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2250 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2251 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2254 /* Define parameter passing and return registers. */
2256 static int const x86_64_int_parameter_registers[6] =
2258 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2261 static int const x86_64_ms_abi_int_parameter_registers[4] =
2263 CX_REG, DX_REG, R8_REG, R9_REG
2266 static int const x86_64_int_return_registers[4] =
2268 AX_REG, DX_REG, DI_REG, SI_REG
2271 /* Define the structure for the machine field in struct function. */
2273 struct GTY(()) stack_local_entry {
2274 unsigned short mode;
2277 struct stack_local_entry *next;
2280 /* Structure describing stack frame layout.
2281 Stack grows downward:
2287 saved static chain if ix86_static_chain_on_stack
2289 saved frame pointer if frame_pointer_needed
2290 <- HARD_FRAME_POINTER
2296 <- sse_regs_save_offset
2299 [va_arg registers] |
2303 [padding2] | = to_allocate
2312 int outgoing_arguments_size;
2313 HOST_WIDE_INT frame;
2315 /* The offsets relative to ARG_POINTER. */
2316 HOST_WIDE_INT frame_pointer_offset;
2317 HOST_WIDE_INT hard_frame_pointer_offset;
2318 HOST_WIDE_INT stack_pointer_offset;
2319 HOST_WIDE_INT hfp_save_offset;
2320 HOST_WIDE_INT reg_save_offset;
2321 HOST_WIDE_INT sse_reg_save_offset;
2323 /* When save_regs_using_mov is set, emit prologue using
2324 move instead of push instructions. */
2325 bool save_regs_using_mov;
2328 /* Which unit we are generating floating point math for. */
2329 enum fpmath_unit ix86_fpmath;
2331 /* Which cpu are we scheduling for. */
2332 enum attr_cpu ix86_schedule;
2334 /* Which cpu are we optimizing for. */
2335 enum processor_type ix86_tune;
2337 /* Which instruction set architecture to use. */
2338 enum processor_type ix86_arch;
2340 /* true if sse prefetch instruction is not NOOP. */
2341 int x86_prefetch_sse;
2343 /* -mstackrealign option */
2344 static const char ix86_force_align_arg_pointer_string[]
2345 = "force_align_arg_pointer";
2347 static rtx (*ix86_gen_leave) (void);
2348 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2349 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2350 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2351 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2352 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2353 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2354 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2355 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2356 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2358 /* Preferred alignment for stack boundary in bits. */
2359 unsigned int ix86_preferred_stack_boundary;
2361 /* Alignment for incoming stack boundary in bits specified at
2363 static unsigned int ix86_user_incoming_stack_boundary;
2365 /* Default alignment for incoming stack boundary in bits. */
2366 static unsigned int ix86_default_incoming_stack_boundary;
2368 /* Alignment for incoming stack boundary in bits. */
2369 unsigned int ix86_incoming_stack_boundary;
2371 /* Calling abi specific va_list type nodes. */
2372 static GTY(()) tree sysv_va_list_type_node;
2373 static GTY(()) tree ms_va_list_type_node;
2375 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2376 char internal_label_prefix[16];
2377 int internal_label_prefix_len;
2379 /* Fence to use after loop using movnt. */
2382 /* Register class used for passing given 64bit part of the argument.
2383 These represent classes as documented by the PS ABI, with the exception
2384 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2385 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2387 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2388 whenever possible (upper half does contain padding). */
2389 enum x86_64_reg_class
2392 X86_64_INTEGER_CLASS,
2393 X86_64_INTEGERSI_CLASS,
2400 X86_64_COMPLEX_X87_CLASS,
2404 #define MAX_CLASSES 4
2406 /* Table of constants used by fldpi, fldln2, etc.... */
2407 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2408 static bool ext_80387_constants_init = 0;
2411 static struct machine_function * ix86_init_machine_status (void);
2412 static rtx ix86_function_value (const_tree, const_tree, bool);
2413 static bool ix86_function_value_regno_p (const unsigned int);
2414 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2416 static rtx ix86_static_chain (const_tree, bool);
2417 static int ix86_function_regparm (const_tree, const_tree);
2418 static void ix86_compute_frame_layout (struct ix86_frame *);
2419 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2421 static void ix86_add_new_builtins (int);
2422 static rtx ix86_expand_vec_perm_builtin (tree);
2423 static tree ix86_canonical_va_list_type (tree);
2424 static void predict_jump (int);
2425 static unsigned int split_stack_prologue_scratch_regno (void);
2426 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2428 enum ix86_function_specific_strings
2430 IX86_FUNCTION_SPECIFIC_ARCH,
2431 IX86_FUNCTION_SPECIFIC_TUNE,
2432 IX86_FUNCTION_SPECIFIC_FPMATH,
2433 IX86_FUNCTION_SPECIFIC_MAX
2436 static char *ix86_target_string (int, int, const char *, const char *,
2437 const char *, bool);
2438 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2439 static void ix86_function_specific_save (struct cl_target_option *);
2440 static void ix86_function_specific_restore (struct cl_target_option *);
2441 static void ix86_function_specific_print (FILE *, int,
2442 struct cl_target_option *);
2443 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2444 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2445 static bool ix86_can_inline_p (tree, tree);
2446 static void ix86_set_current_function (tree);
2447 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2449 static enum calling_abi ix86_function_abi (const_tree);
2452 #ifndef SUBTARGET32_DEFAULT_CPU
2453 #define SUBTARGET32_DEFAULT_CPU "i386"
2456 /* The svr4 ABI for the i386 says that records and unions are returned
2458 #ifndef DEFAULT_PCC_STRUCT_RETURN
2459 #define DEFAULT_PCC_STRUCT_RETURN 1
2462 /* Whether -mtune= or -march= were specified */
2463 static int ix86_tune_defaulted;
2464 static int ix86_arch_specified;
2466 /* Define a set of ISAs which are available when a given ISA is
2467 enabled. MMX and SSE ISAs are handled separately. */
2469 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2470 #define OPTION_MASK_ISA_3DNOW_SET \
2471 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2473 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2474 #define OPTION_MASK_ISA_SSE2_SET \
2475 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2476 #define OPTION_MASK_ISA_SSE3_SET \
2477 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2478 #define OPTION_MASK_ISA_SSSE3_SET \
2479 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2480 #define OPTION_MASK_ISA_SSE4_1_SET \
2481 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2482 #define OPTION_MASK_ISA_SSE4_2_SET \
2483 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2484 #define OPTION_MASK_ISA_AVX_SET \
2485 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2486 #define OPTION_MASK_ISA_FMA_SET \
2487 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2489 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2491 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2493 #define OPTION_MASK_ISA_SSE4A_SET \
2494 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2495 #define OPTION_MASK_ISA_FMA4_SET \
2496 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2497 | OPTION_MASK_ISA_AVX_SET)
2498 #define OPTION_MASK_ISA_XOP_SET \
2499 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2500 #define OPTION_MASK_ISA_LWP_SET \
2503 /* AES and PCLMUL need SSE2 because they use xmm registers */
2504 #define OPTION_MASK_ISA_AES_SET \
2505 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2506 #define OPTION_MASK_ISA_PCLMUL_SET \
2507 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2509 #define OPTION_MASK_ISA_ABM_SET \
2510 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2512 #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
2513 #define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
2514 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2515 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2516 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2517 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2518 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2520 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2521 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2522 #define OPTION_MASK_ISA_F16C_SET \
2523 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2525 /* Define a set of ISAs which aren't available when a given ISA is
2526 disabled. MMX and SSE ISAs are handled separately. */
2528 #define OPTION_MASK_ISA_MMX_UNSET \
2529 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2530 #define OPTION_MASK_ISA_3DNOW_UNSET \
2531 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2532 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2534 #define OPTION_MASK_ISA_SSE_UNSET \
2535 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2536 #define OPTION_MASK_ISA_SSE2_UNSET \
2537 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2538 #define OPTION_MASK_ISA_SSE3_UNSET \
2539 (OPTION_MASK_ISA_SSE3 \
2540 | OPTION_MASK_ISA_SSSE3_UNSET \
2541 | OPTION_MASK_ISA_SSE4A_UNSET )
2542 #define OPTION_MASK_ISA_SSSE3_UNSET \
2543 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2544 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2545 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2546 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2547 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2548 #define OPTION_MASK_ISA_AVX_UNSET \
2549 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2550 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2551 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2553 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2555 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2557 #define OPTION_MASK_ISA_SSE4A_UNSET \
2558 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2560 #define OPTION_MASK_ISA_FMA4_UNSET \
2561 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2562 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2563 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2565 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2566 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2567 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2568 #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
2569 #define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
2570 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2571 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2572 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2573 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2574 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2576 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2577 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2578 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2580 /* Vectorization library interface and handlers. */
2581 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2583 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2584 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2586 /* Processor target table, indexed by processor number */
2589 const struct processor_costs *cost; /* Processor costs */
2590 const int align_loop; /* Default alignments. */
2591 const int align_loop_max_skip;
2592 const int align_jump;
2593 const int align_jump_max_skip;
2594 const int align_func;
2597 static const struct ptt processor_target_table[PROCESSOR_max] =
2599 {&i386_cost, 4, 3, 4, 3, 4},
2600 {&i486_cost, 16, 15, 16, 15, 16},
2601 {&pentium_cost, 16, 7, 16, 7, 16},
2602 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2603 {&geode_cost, 0, 0, 0, 0, 0},
2604 {&k6_cost, 32, 7, 32, 7, 32},
2605 {&athlon_cost, 16, 7, 16, 7, 16},
2606 {&pentium4_cost, 0, 0, 0, 0, 0},
2607 {&k8_cost, 16, 7, 16, 7, 16},
2608 {&nocona_cost, 0, 0, 0, 0, 0},
2609 /* Core 2 32-bit. */
2610 {&generic32_cost, 16, 10, 16, 10, 16},
2611 /* Core 2 64-bit. */
2612 {&generic64_cost, 16, 10, 16, 10, 16},
2613 /* Core i7 32-bit. */
2614 {&generic32_cost, 16, 10, 16, 10, 16},
2615 /* Core i7 64-bit. */
2616 {&generic64_cost, 16, 10, 16, 10, 16},
2617 {&generic32_cost, 16, 7, 16, 7, 16},
2618 {&generic64_cost, 16, 10, 16, 10, 16},
2619 {&amdfam10_cost, 32, 24, 32, 7, 32},
2620 {&bdver1_cost, 32, 24, 32, 7, 32},
2621 {&btver1_cost, 32, 24, 32, 7, 32},
2622 {&atom_cost, 16, 7, 16, 7, 16}
2625 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2654 /* Return true if a red-zone is in use. */
2657 ix86_using_red_zone (void)
2659 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2662 /* Implement TARGET_HANDLE_OPTION. */
2665 ix86_handle_option (struct gcc_options *opts,
2666 struct gcc_options *opts_set ATTRIBUTE_UNUSED,
2667 const struct cl_decoded_option *decoded,
2670 size_t code = decoded->opt_index;
2671 int value = decoded->value;
2678 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2679 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2683 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2684 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2691 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2692 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2696 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2697 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2708 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2712 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2713 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2720 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2721 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2725 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2726 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2733 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2734 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2738 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2739 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2746 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2747 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2751 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2752 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2759 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2760 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2764 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2765 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2772 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2773 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2777 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2778 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2785 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2786 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2790 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2791 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2798 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2799 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2803 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2804 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2809 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2810 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2814 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2815 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2821 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2822 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2826 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2827 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2834 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2835 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2839 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2840 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2847 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2848 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2852 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2853 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2860 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2861 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2865 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2866 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2873 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2874 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2878 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2879 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2886 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI_SET;
2887 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_SET;
2891 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_BMI_UNSET;
2892 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_UNSET;
2899 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM_SET;
2900 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_SET;
2904 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_TBM_UNSET;
2905 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_UNSET;
2912 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2913 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2917 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2918 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2925 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2926 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2930 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2931 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2938 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2939 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2943 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2944 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2951 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2952 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2956 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2957 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2964 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2965 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2969 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2970 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2977 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2978 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2982 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2983 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2990 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2991 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2995 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2996 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
3003 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
3004 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
3008 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
3009 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
3016 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
3017 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
3021 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
3022 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
3029 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
3030 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
3034 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
3035 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
3039 /* Comes from final.c -- no real reason to change it. */
3040 #define MAX_CODE_ALIGN 16
3042 case OPT_malign_loops_:
3043 warning_at (loc, 0, "-malign-loops is obsolete, use -falign-loops");
3044 if (value > MAX_CODE_ALIGN)
3045 error_at (loc, "-malign-loops=%d is not between 0 and %d",
3046 value, MAX_CODE_ALIGN);
3048 opts->x_align_loops = 1 << value;
3051 case OPT_malign_jumps_:
3052 warning_at (loc, 0, "-malign-jumps is obsolete, use -falign-jumps");
3053 if (value > MAX_CODE_ALIGN)
3054 error_at (loc, "-malign-jumps=%d is not between 0 and %d",
3055 value, MAX_CODE_ALIGN);
3057 opts->x_align_jumps = 1 << value;
3060 case OPT_malign_functions_:
3062 "-malign-functions is obsolete, use -falign-functions");
3063 if (value > MAX_CODE_ALIGN)
3064 error_at (loc, "-malign-functions=%d is not between 0 and %d",
3065 value, MAX_CODE_ALIGN);
3067 opts->x_align_functions = 1 << value;
3070 case OPT_mbranch_cost_:
3073 error_at (loc, "-mbranch-cost=%d is not between 0 and 5", value);
3074 opts->x_ix86_branch_cost = 5;
3083 /* Return a string that documents the current -m options. The caller is
3084 responsible for freeing the string. */
3087 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
3088 const char *fpmath, bool add_nl_p)
3090 struct ix86_target_opts
3092 const char *option; /* option string */
3093 int mask; /* isa mask options */
3096 /* This table is ordered so that options like -msse4.2 that imply
3097 preceding options while match those first. */
3098 static struct ix86_target_opts isa_opts[] =
3100 { "-m64", OPTION_MASK_ISA_64BIT },
3101 { "-mfma4", OPTION_MASK_ISA_FMA4 },
3102 { "-mfma", OPTION_MASK_ISA_FMA },
3103 { "-mxop", OPTION_MASK_ISA_XOP },
3104 { "-mlwp", OPTION_MASK_ISA_LWP },
3105 { "-msse4a", OPTION_MASK_ISA_SSE4A },
3106 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
3107 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
3108 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
3109 { "-msse3", OPTION_MASK_ISA_SSE3 },
3110 { "-msse2", OPTION_MASK_ISA_SSE2 },
3111 { "-msse", OPTION_MASK_ISA_SSE },
3112 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
3113 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
3114 { "-mmmx", OPTION_MASK_ISA_MMX },
3115 { "-mabm", OPTION_MASK_ISA_ABM },
3116 { "-mbmi", OPTION_MASK_ISA_BMI },
3117 { "-mtbm", OPTION_MASK_ISA_TBM },
3118 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
3119 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
3120 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
3121 { "-maes", OPTION_MASK_ISA_AES },
3122 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
3123 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
3124 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
3125 { "-mf16c", OPTION_MASK_ISA_F16C },
3129 static struct ix86_target_opts flag_opts[] =
3131 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
3132 { "-m80387", MASK_80387 },
3133 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
3134 { "-malign-double", MASK_ALIGN_DOUBLE },
3135 { "-mcld", MASK_CLD },
3136 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
3137 { "-mieee-fp", MASK_IEEE_FP },
3138 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
3139 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
3140 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
3141 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
3142 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
3143 { "-mno-push-args", MASK_NO_PUSH_ARGS },
3144 { "-mno-red-zone", MASK_NO_RED_ZONE },
3145 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
3146 { "-mrecip", MASK_RECIP },
3147 { "-mrtd", MASK_RTD },
3148 { "-msseregparm", MASK_SSEREGPARM },
3149 { "-mstack-arg-probe", MASK_STACK_PROBE },
3150 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
3151 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
3152 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
3153 { "-mvzeroupper", MASK_VZEROUPPER },
3154 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
3155 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
3158 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
3161 char target_other[40];
3170 memset (opts, '\0', sizeof (opts));
3172 /* Add -march= option. */
3175 opts[num][0] = "-march=";
3176 opts[num++][1] = arch;
3179 /* Add -mtune= option. */
3182 opts[num][0] = "-mtune=";
3183 opts[num++][1] = tune;
3186 /* Pick out the options in isa options. */
3187 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
3189 if ((isa & isa_opts[i].mask) != 0)
3191 opts[num++][0] = isa_opts[i].option;
3192 isa &= ~ isa_opts[i].mask;
3196 if (isa && add_nl_p)
3198 opts[num++][0] = isa_other;
3199 sprintf (isa_other, "(other isa: %#x)", isa);
3202 /* Add flag options. */
3203 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
3205 if ((flags & flag_opts[i].mask) != 0)
3207 opts[num++][0] = flag_opts[i].option;
3208 flags &= ~ flag_opts[i].mask;
3212 if (flags && add_nl_p)
3214 opts[num++][0] = target_other;
3215 sprintf (target_other, "(other flags: %#x)", flags);
3218 /* Add -fpmath= option. */
3221 opts[num][0] = "-mfpmath=";
3222 opts[num++][1] = fpmath;
3229 gcc_assert (num < ARRAY_SIZE (opts));
3231 /* Size the string. */
3233 sep_len = (add_nl_p) ? 3 : 1;
3234 for (i = 0; i < num; i++)
3237 for (j = 0; j < 2; j++)
3239 len += strlen (opts[i][j]);
3242 /* Build the string. */
3243 ret = ptr = (char *) xmalloc (len);
3246 for (i = 0; i < num; i++)
3250 for (j = 0; j < 2; j++)
3251 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3258 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3266 for (j = 0; j < 2; j++)
3269 memcpy (ptr, opts[i][j], len2[j]);
3271 line_len += len2[j];
3276 gcc_assert (ret + len >= ptr);
3281 /* Return true, if profiling code should be emitted before
3282 prologue. Otherwise it returns false.
3283 Note: For x86 with "hotfix" it is sorried. */
3285 ix86_profile_before_prologue (void)
3287 return flag_fentry != 0;
3290 /* Function that is callable from the debugger to print the current
3293 ix86_debug_options (void)
3295 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3296 ix86_arch_string, ix86_tune_string,
3297 ix86_fpmath_string, true);
3301 fprintf (stderr, "%s\n\n", opts);
3305 fputs ("<no options>\n\n", stderr);
3310 /* Override various settings based on options. If MAIN_ARGS_P, the
3311 options are from the command line, otherwise they are from
3315 ix86_option_override_internal (bool main_args_p)
3318 unsigned int ix86_arch_mask, ix86_tune_mask;
3319 const bool ix86_tune_specified = (ix86_tune_string != NULL);
3330 PTA_PREFETCH_SSE = 1 << 4,
3332 PTA_3DNOW_A = 1 << 6,
3336 PTA_POPCNT = 1 << 10,
3338 PTA_SSE4A = 1 << 12,
3339 PTA_NO_SAHF = 1 << 13,
3340 PTA_SSE4_1 = 1 << 14,
3341 PTA_SSE4_2 = 1 << 15,
3343 PTA_PCLMUL = 1 << 17,
3346 PTA_MOVBE = 1 << 20,
3350 PTA_FSGSBASE = 1 << 24,
3351 PTA_RDRND = 1 << 25,
3355 /* if this reaches 32, need to widen struct pta flags below */
3360 const char *const name; /* processor name or nickname. */
3361 const enum processor_type processor;
3362 const enum attr_cpu schedule;
3363 const unsigned /*enum pta_flags*/ flags;
3365 const processor_alias_table[] =
3367 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3368 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3369 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3370 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3371 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3372 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3373 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3374 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3375 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
3376 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3377 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3378 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
3379 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3381 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3383 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3384 PTA_MMX | PTA_SSE | PTA_SSE2},
3385 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3386 PTA_MMX |PTA_SSE | PTA_SSE2},
3387 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3388 PTA_MMX | PTA_SSE | PTA_SSE2},
3389 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3390 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
3391 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3392 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3393 | PTA_CX16 | PTA_NO_SAHF},
3394 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
3395 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3396 | PTA_SSSE3 | PTA_CX16},
3397 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
3398 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3399 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
3400 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
3401 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3402 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
3403 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
3404 {"atom", PROCESSOR_ATOM, CPU_ATOM,
3405 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3406 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
3407 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3408 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
3409 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3410 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3411 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3412 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3413 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3414 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3415 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3416 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3417 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3418 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3419 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3420 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3421 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3422 {"x86-64", PROCESSOR_K8, CPU_K8,
3423 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3424 {"k8", PROCESSOR_K8, CPU_K8,
3425 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3426 | PTA_SSE2 | PTA_NO_SAHF},
3427 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3428 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3429 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3430 {"opteron", PROCESSOR_K8, CPU_K8,
3431 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3432 | PTA_SSE2 | PTA_NO_SAHF},
3433 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3434 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3435 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3436 {"athlon64", PROCESSOR_K8, CPU_K8,
3437 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3438 | PTA_SSE2 | PTA_NO_SAHF},
3439 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3440 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3441 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3442 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3443 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3444 | PTA_SSE2 | PTA_NO_SAHF},
3445 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3446 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3447 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3448 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3449 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3450 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3451 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3452 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3453 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3454 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3455 | PTA_XOP | PTA_LWP},
3456 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3457 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3458 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3459 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3460 0 /* flags are only used for -march switch. */ },
3461 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3462 PTA_64BIT /* flags are only used for -march switch. */ },
3465 int const pta_size = ARRAY_SIZE (processor_alias_table);
3467 /* Set up prefix/suffix so the error messages refer to either the command
3468 line argument, or the attribute(target). */
3477 prefix = "option(\"";
3482 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3483 SUBTARGET_OVERRIDE_OPTIONS;
3486 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3487 SUBSUBTARGET_OVERRIDE_OPTIONS;
3490 /* -fPIC is the default for x86_64. */
3491 if (TARGET_MACHO && TARGET_64BIT)
3494 /* Need to check -mtune=generic first. */
3495 if (ix86_tune_string)
3497 if (!strcmp (ix86_tune_string, "generic")
3498 || !strcmp (ix86_tune_string, "i686")
3499 /* As special support for cross compilers we read -mtune=native
3500 as -mtune=generic. With native compilers we won't see the
3501 -mtune=native, as it was changed by the driver. */
3502 || !strcmp (ix86_tune_string, "native"))
3505 ix86_tune_string = "generic64";
3507 ix86_tune_string = "generic32";
3509 /* If this call is for setting the option attribute, allow the
3510 generic32/generic64 that was previously set. */
3511 else if (!main_args_p
3512 && (!strcmp (ix86_tune_string, "generic32")
3513 || !strcmp (ix86_tune_string, "generic64")))
3515 else if (!strncmp (ix86_tune_string, "generic", 7))
3516 error ("bad value (%s) for %stune=%s %s",
3517 ix86_tune_string, prefix, suffix, sw);
3518 else if (!strcmp (ix86_tune_string, "x86-64"))
3519 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3520 "%stune=k8%s or %stune=generic%s instead as appropriate",
3521 prefix, suffix, prefix, suffix, prefix, suffix);
3525 if (ix86_arch_string)
3526 ix86_tune_string = ix86_arch_string;
3527 if (!ix86_tune_string)
3529 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3530 ix86_tune_defaulted = 1;
3533 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3534 need to use a sensible tune option. */
3535 if (!strcmp (ix86_tune_string, "generic")
3536 || !strcmp (ix86_tune_string, "x86-64")
3537 || !strcmp (ix86_tune_string, "i686"))
3540 ix86_tune_string = "generic64";
3542 ix86_tune_string = "generic32";
3546 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3548 /* rep; movq isn't available in 32-bit code. */
3549 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3550 ix86_stringop_alg = no_stringop;
3553 if (!ix86_arch_string)
3554 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3556 ix86_arch_specified = 1;
3558 if (!global_options_set.x_ix86_abi)
3559 ix86_abi = DEFAULT_ABI;
3561 if (global_options_set.x_ix86_cmodel)
3563 switch (ix86_cmodel)
3568 ix86_cmodel = CM_SMALL_PIC;
3570 error ("code model %qs not supported in the %s bit mode",
3577 ix86_cmodel = CM_MEDIUM_PIC;
3579 error ("code model %qs not supported in the %s bit mode",
3586 ix86_cmodel = CM_LARGE_PIC;
3588 error ("code model %qs not supported in the %s bit mode",
3594 error ("code model %s does not support PIC mode", "32");
3596 error ("code model %qs not supported in the %s bit mode",
3603 error ("code model %s does not support PIC mode", "kernel");
3604 ix86_cmodel = CM_32;
3607 error ("code model %qs not supported in the %s bit mode",
3617 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3618 use of rip-relative addressing. This eliminates fixups that
3619 would otherwise be needed if this object is to be placed in a
3620 DLL, and is essentially just as efficient as direct addressing. */
3621 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3622 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3623 else if (TARGET_64BIT)
3624 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3626 ix86_cmodel = CM_32;
3628 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3630 error ("-masm=intel not supported in this configuration");
3631 ix86_asm_dialect = ASM_ATT;
3633 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3634 sorry ("%i-bit mode not compiled in",
3635 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3637 for (i = 0; i < pta_size; i++)
3638 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3640 ix86_schedule = processor_alias_table[i].schedule;
3641 ix86_arch = processor_alias_table[i].processor;
3642 /* Default cpu tuning to the architecture. */
3643 ix86_tune = ix86_arch;
3645 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3646 error ("CPU you selected does not support x86-64 "
3649 if (processor_alias_table[i].flags & PTA_MMX
3650 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3651 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3652 if (processor_alias_table[i].flags & PTA_3DNOW
3653 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3654 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3655 if (processor_alias_table[i].flags & PTA_3DNOW_A
3656 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3657 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3658 if (processor_alias_table[i].flags & PTA_SSE
3659 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3660 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3661 if (processor_alias_table[i].flags & PTA_SSE2
3662 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3663 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3664 if (processor_alias_table[i].flags & PTA_SSE3
3665 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3666 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3667 if (processor_alias_table[i].flags & PTA_SSSE3
3668 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3669 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3670 if (processor_alias_table[i].flags & PTA_SSE4_1
3671 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3672 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3673 if (processor_alias_table[i].flags & PTA_SSE4_2
3674 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3675 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3676 if (processor_alias_table[i].flags & PTA_AVX
3677 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3678 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3679 if (processor_alias_table[i].flags & PTA_FMA
3680 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3681 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3682 if (processor_alias_table[i].flags & PTA_SSE4A
3683 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3684 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3685 if (processor_alias_table[i].flags & PTA_FMA4
3686 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3687 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3688 if (processor_alias_table[i].flags & PTA_XOP
3689 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3690 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3691 if (processor_alias_table[i].flags & PTA_LWP
3692 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3693 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3694 if (processor_alias_table[i].flags & PTA_ABM
3695 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3696 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3697 if (processor_alias_table[i].flags & PTA_BMI
3698 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3699 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3700 if (processor_alias_table[i].flags & PTA_TBM
3701 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3702 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3703 if (processor_alias_table[i].flags & PTA_CX16
3704 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3705 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3706 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3707 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3708 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3709 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3710 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3711 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3712 if (processor_alias_table[i].flags & PTA_MOVBE
3713 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3714 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3715 if (processor_alias_table[i].flags & PTA_AES
3716 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3717 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3718 if (processor_alias_table[i].flags & PTA_PCLMUL
3719 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3720 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3721 if (processor_alias_table[i].flags & PTA_FSGSBASE
3722 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3723 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3724 if (processor_alias_table[i].flags & PTA_RDRND
3725 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3726 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3727 if (processor_alias_table[i].flags & PTA_F16C
3728 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3729 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3730 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3731 x86_prefetch_sse = true;
3736 if (!strcmp (ix86_arch_string, "generic"))
3737 error ("generic CPU can be used only for %stune=%s %s",
3738 prefix, suffix, sw);
3739 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3740 error ("bad value (%s) for %sarch=%s %s",
3741 ix86_arch_string, prefix, suffix, sw);
3743 ix86_arch_mask = 1u << ix86_arch;
3744 for (i = 0; i < X86_ARCH_LAST; ++i)
3745 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3747 for (i = 0; i < pta_size; i++)
3748 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3750 ix86_schedule = processor_alias_table[i].schedule;
3751 ix86_tune = processor_alias_table[i].processor;
3754 if (!(processor_alias_table[i].flags & PTA_64BIT))
3756 if (ix86_tune_defaulted)
3758 ix86_tune_string = "x86-64";
3759 for (i = 0; i < pta_size; i++)
3760 if (! strcmp (ix86_tune_string,
3761 processor_alias_table[i].name))
3763 ix86_schedule = processor_alias_table[i].schedule;
3764 ix86_tune = processor_alias_table[i].processor;
3767 error ("CPU you selected does not support x86-64 "
3773 /* Adjust tuning when compiling for 32-bit ABI. */
3776 case PROCESSOR_GENERIC64:
3777 ix86_tune = PROCESSOR_GENERIC32;
3778 ix86_schedule = CPU_PENTIUMPRO;
3781 case PROCESSOR_CORE2_64:
3782 ix86_tune = PROCESSOR_CORE2_32;
3785 case PROCESSOR_COREI7_64:
3786 ix86_tune = PROCESSOR_COREI7_32;
3793 /* Intel CPUs have always interpreted SSE prefetch instructions as
3794 NOPs; so, we can enable SSE prefetch instructions even when
3795 -mtune (rather than -march) points us to a processor that has them.
3796 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3797 higher processors. */
3799 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3800 x86_prefetch_sse = true;
3804 if (ix86_tune_specified && i == pta_size)
3805 error ("bad value (%s) for %stune=%s %s",
3806 ix86_tune_string, prefix, suffix, sw);
3808 ix86_tune_mask = 1u << ix86_tune;
3809 for (i = 0; i < X86_TUNE_LAST; ++i)
3810 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3812 #ifndef USE_IX86_FRAME_POINTER
3813 #define USE_IX86_FRAME_POINTER 0
3816 #ifndef USE_X86_64_FRAME_POINTER
3817 #define USE_X86_64_FRAME_POINTER 0
3820 /* Set the default values for switches whose default depends on TARGET_64BIT
3821 in case they weren't overwritten by command line options. */
3824 if (optimize > 1 && !global_options_set.x_flag_zee)
3826 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3827 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3828 if (flag_asynchronous_unwind_tables == 2)
3829 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3830 if (flag_pcc_struct_return == 2)
3831 flag_pcc_struct_return = 0;
3835 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3836 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3837 if (flag_asynchronous_unwind_tables == 2)
3838 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3839 if (flag_pcc_struct_return == 2)
3840 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3844 ix86_cost = &ix86_size_cost;
3846 ix86_cost = processor_target_table[ix86_tune].cost;
3848 /* Arrange to set up i386_stack_locals for all functions. */
3849 init_machine_status = ix86_init_machine_status;
3851 /* Validate -mregparm= value. */
3852 if (global_options_set.x_ix86_regparm)
3855 warning (0, "-mregparm is ignored in 64-bit mode");
3856 if (ix86_regparm > REGPARM_MAX)
3858 error ("-mregparm=%d is not between 0 and %d",
3859 ix86_regparm, REGPARM_MAX);
3864 ix86_regparm = REGPARM_MAX;
3866 /* Default align_* from the processor table. */
3867 if (align_loops == 0)
3869 align_loops = processor_target_table[ix86_tune].align_loop;
3870 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3872 if (align_jumps == 0)
3874 align_jumps = processor_target_table[ix86_tune].align_jump;
3875 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3877 if (align_functions == 0)
3879 align_functions = processor_target_table[ix86_tune].align_func;
3882 /* Provide default for -mbranch-cost= value. */
3883 if (!global_options_set.x_ix86_branch_cost)
3884 ix86_branch_cost = ix86_cost->branch_cost;
3888 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3890 /* Enable by default the SSE and MMX builtins. Do allow the user to
3891 explicitly disable any of these. In particular, disabling SSE and
3892 MMX for kernel code is extremely useful. */
3893 if (!ix86_arch_specified)
3895 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3896 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3899 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3903 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3905 if (!ix86_arch_specified)
3907 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3909 /* i386 ABI does not specify red zone. It still makes sense to use it
3910 when programmer takes care to stack from being destroyed. */
3911 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3912 target_flags |= MASK_NO_RED_ZONE;
3915 /* Keep nonleaf frame pointers. */
3916 if (flag_omit_frame_pointer)
3917 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3918 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3919 flag_omit_frame_pointer = 1;
3921 /* If we're doing fast math, we don't care about comparison order
3922 wrt NaNs. This lets us use a shorter comparison sequence. */
3923 if (flag_finite_math_only)
3924 target_flags &= ~MASK_IEEE_FP;
3926 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3927 since the insns won't need emulation. */
3928 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3929 target_flags &= ~MASK_NO_FANCY_MATH_387;
3931 /* Likewise, if the target doesn't have a 387, or we've specified
3932 software floating point, don't use 387 inline intrinsics. */
3934 target_flags |= MASK_NO_FANCY_MATH_387;
3936 /* Turn on MMX builtins for -msse. */
3939 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3940 x86_prefetch_sse = true;
3943 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3944 if (TARGET_SSE4_2 || TARGET_ABM)
3945 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3947 /* Validate -mpreferred-stack-boundary= value or default it to
3948 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3949 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3950 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3952 int min = (TARGET_64BIT ? 4 : 2);
3953 int max = (TARGET_SEH ? 4 : 12);
3955 if (ix86_preferred_stack_boundary_arg < min
3956 || ix86_preferred_stack_boundary_arg > max)
3959 error ("-mpreferred-stack-boundary is not supported "
3962 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3963 ix86_preferred_stack_boundary_arg, min, max);
3966 ix86_preferred_stack_boundary
3967 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3970 /* Set the default value for -mstackrealign. */
3971 if (ix86_force_align_arg_pointer == -1)
3972 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3974 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3976 /* Validate -mincoming-stack-boundary= value or default it to
3977 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3978 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3979 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3981 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3982 || ix86_incoming_stack_boundary_arg > 12)
3983 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3984 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3987 ix86_user_incoming_stack_boundary
3988 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3989 ix86_incoming_stack_boundary
3990 = ix86_user_incoming_stack_boundary;
3994 /* Accept -msseregparm only if at least SSE support is enabled. */
3995 if (TARGET_SSEREGPARM
3997 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3999 ix86_fpmath = TARGET_FPMATH_DEFAULT;
4000 if (ix86_fpmath_string != 0)
4002 if (! strcmp (ix86_fpmath_string, "387"))
4003 ix86_fpmath = FPMATH_387;
4004 else if (! strcmp (ix86_fpmath_string, "sse"))
4008 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4009 ix86_fpmath = FPMATH_387;
4012 ix86_fpmath = FPMATH_SSE;
4014 else if (! strcmp (ix86_fpmath_string, "387,sse")
4015 || ! strcmp (ix86_fpmath_string, "387+sse")
4016 || ! strcmp (ix86_fpmath_string, "sse,387")
4017 || ! strcmp (ix86_fpmath_string, "sse+387")
4018 || ! strcmp (ix86_fpmath_string, "both"))
4022 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4023 ix86_fpmath = FPMATH_387;
4025 else if (!TARGET_80387)
4027 warning (0, "387 instruction set disabled, using SSE arithmetics");
4028 ix86_fpmath = FPMATH_SSE;
4031 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4034 error ("bad value (%s) for %sfpmath=%s %s",
4035 ix86_fpmath_string, prefix, suffix, sw);
4038 /* If the i387 is disabled, then do not return values in it. */
4040 target_flags &= ~MASK_FLOAT_RETURNS;
4042 /* Use external vectorized library in vectorizing intrinsics. */
4043 if (global_options_set.x_ix86_veclibabi_type)
4044 switch (ix86_veclibabi_type)
4046 case ix86_veclibabi_type_svml:
4047 ix86_veclib_handler = ix86_veclibabi_svml;
4050 case ix86_veclibabi_type_acml:
4051 ix86_veclib_handler = ix86_veclibabi_acml;
4058 if ((!USE_IX86_FRAME_POINTER
4059 || (x86_accumulate_outgoing_args & ix86_tune_mask))
4060 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4062 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4064 /* ??? Unwind info is not correct around the CFG unless either a frame
4065 pointer is present or M_A_O_A is set. Fixing this requires rewriting
4066 unwind info generation to be aware of the CFG and propagating states
4068 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
4069 || flag_exceptions || flag_non_call_exceptions)
4070 && flag_omit_frame_pointer
4071 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4073 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4074 warning (0, "unwind tables currently require either a frame pointer "
4075 "or %saccumulate-outgoing-args%s for correctness",
4077 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4080 /* If stack probes are required, the space used for large function
4081 arguments on the stack must also be probed, so enable
4082 -maccumulate-outgoing-args so this happens in the prologue. */
4083 if (TARGET_STACK_PROBE
4084 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4086 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4087 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4088 "for correctness", prefix, suffix);
4089 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4092 /* For sane SSE instruction set generation we need fcomi instruction.
4093 It is safe to enable all CMOVE instructions. */
4097 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4100 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4101 p = strchr (internal_label_prefix, 'X');
4102 internal_label_prefix_len = p - internal_label_prefix;
4106 /* When scheduling description is not available, disable scheduler pass
4107 so it won't slow down the compilation and make x87 code slower. */
4108 if (!TARGET_SCHEDULE)
4109 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
4111 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4112 ix86_cost->simultaneous_prefetches,
4113 global_options.x_param_values,
4114 global_options_set.x_param_values);
4115 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
4116 global_options.x_param_values,
4117 global_options_set.x_param_values);
4118 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
4119 global_options.x_param_values,
4120 global_options_set.x_param_values);
4121 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
4122 global_options.x_param_values,
4123 global_options_set.x_param_values);
4125 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4126 if (flag_prefetch_loop_arrays < 0
4129 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4130 flag_prefetch_loop_arrays = 1;
4132 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4133 can be optimized to ap = __builtin_next_arg (0). */
4134 if (!TARGET_64BIT && !flag_split_stack)
4135 targetm.expand_builtin_va_start = NULL;
4139 ix86_gen_leave = gen_leave_rex64;
4140 ix86_gen_add3 = gen_adddi3;
4141 ix86_gen_sub3 = gen_subdi3;
4142 ix86_gen_sub3_carry = gen_subdi3_carry;
4143 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4144 ix86_gen_monitor = gen_sse3_monitor64;
4145 ix86_gen_andsp = gen_anddi3;
4146 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4147 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4148 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4152 ix86_gen_leave = gen_leave;
4153 ix86_gen_add3 = gen_addsi3;
4154 ix86_gen_sub3 = gen_subsi3;
4155 ix86_gen_sub3_carry = gen_subsi3_carry;
4156 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4157 ix86_gen_monitor = gen_sse3_monitor;
4158 ix86_gen_andsp = gen_andsi3;
4159 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4160 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4161 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4165 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4167 target_flags |= MASK_CLD & ~target_flags_explicit;
4170 if (!TARGET_64BIT && flag_pic)
4172 if (flag_fentry > 0)
4173 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4177 else if (TARGET_SEH)
4179 if (flag_fentry == 0)
4180 sorry ("-mno-fentry isn%'t compatible with SEH");
4183 else if (flag_fentry < 0)
4185 #if defined(PROFILE_BEFORE_PROLOGUE)
4192 /* Save the initial options in case the user does function specific options */
4194 target_option_default_node = target_option_current_node
4195 = build_target_option_node ();
4199 /* When not optimize for size, enable vzeroupper optimization for
4200 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4201 AVX unaligned load/store. */
4204 if (flag_expensive_optimizations
4205 && !(target_flags_explicit & MASK_VZEROUPPER))
4206 target_flags |= MASK_VZEROUPPER;
4207 if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4208 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4209 if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4210 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4215 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4216 target_flags &= ~MASK_VZEROUPPER;
4220 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4223 function_pass_avx256_p (const_rtx val)
4228 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
4231 if (GET_CODE (val) == PARALLEL)
4236 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
4238 r = XVECEXP (val, 0, i);
4239 if (GET_CODE (r) == EXPR_LIST
4241 && REG_P (XEXP (r, 0))
4242 && (GET_MODE (XEXP (r, 0)) == OImode
4243 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
4251 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4254 ix86_option_override (void)
4256 ix86_option_override_internal (true);
4259 /* Update register usage after having seen the compiler flags. */
4262 ix86_conditional_register_usage (void)
4267 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4269 if (fixed_regs[i] > 1)
4270 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
4271 if (call_used_regs[i] > 1)
4272 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
4275 /* The PIC register, if it exists, is fixed. */
4276 j = PIC_OFFSET_TABLE_REGNUM;
4277 if (j != INVALID_REGNUM)
4278 fixed_regs[j] = call_used_regs[j] = 1;
4280 /* The 64-bit MS_ABI changes the set of call-used registers. */
4281 if (TARGET_64BIT_MS_ABI)
4283 call_used_regs[SI_REG] = 0;
4284 call_used_regs[DI_REG] = 0;
4285 call_used_regs[XMM6_REG] = 0;
4286 call_used_regs[XMM7_REG] = 0;
4287 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4288 call_used_regs[i] = 0;
4291 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4292 other call-clobbered regs for 64-bit. */
4295 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4297 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4298 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4299 && call_used_regs[i])
4300 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4303 /* If MMX is disabled, squash the registers. */
4305 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4306 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4307 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4309 /* If SSE is disabled, squash the registers. */
4311 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4312 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4313 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4315 /* If the FPU is disabled, squash the registers. */
4316 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4317 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4318 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4319 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4321 /* If 32-bit, squash the 64-bit registers. */
4324 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4326 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4332 /* Save the current options */
4335 ix86_function_specific_save (struct cl_target_option *ptr)
4337 ptr->arch = ix86_arch;
4338 ptr->schedule = ix86_schedule;
4339 ptr->tune = ix86_tune;
4340 ptr->fpmath = ix86_fpmath;
4341 ptr->branch_cost = ix86_branch_cost;
4342 ptr->tune_defaulted = ix86_tune_defaulted;
4343 ptr->arch_specified = ix86_arch_specified;
4344 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4345 ptr->ix86_target_flags_explicit = target_flags_explicit;
4347 /* The fields are char but the variables are not; make sure the
4348 values fit in the fields. */
4349 gcc_assert (ptr->arch == ix86_arch);
4350 gcc_assert (ptr->schedule == ix86_schedule);
4351 gcc_assert (ptr->tune == ix86_tune);
4352 gcc_assert (ptr->fpmath == ix86_fpmath);
4353 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4356 /* Restore the current options */
4359 ix86_function_specific_restore (struct cl_target_option *ptr)
4361 enum processor_type old_tune = ix86_tune;
4362 enum processor_type old_arch = ix86_arch;
4363 unsigned int ix86_arch_mask, ix86_tune_mask;
4366 ix86_arch = (enum processor_type) ptr->arch;
4367 ix86_schedule = (enum attr_cpu) ptr->schedule;
4368 ix86_tune = (enum processor_type) ptr->tune;
4369 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
4370 ix86_branch_cost = ptr->branch_cost;
4371 ix86_tune_defaulted = ptr->tune_defaulted;
4372 ix86_arch_specified = ptr->arch_specified;
4373 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4374 target_flags_explicit = ptr->ix86_target_flags_explicit;
4376 /* Recreate the arch feature tests if the arch changed */
4377 if (old_arch != ix86_arch)
4379 ix86_arch_mask = 1u << ix86_arch;
4380 for (i = 0; i < X86_ARCH_LAST; ++i)
4381 ix86_arch_features[i]
4382 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4385 /* Recreate the tune optimization tests */
4386 if (old_tune != ix86_tune)
4388 ix86_tune_mask = 1u << ix86_tune;
4389 for (i = 0; i < X86_TUNE_LAST; ++i)
4390 ix86_tune_features[i]
4391 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4395 /* Print the current options */
4398 ix86_function_specific_print (FILE *file, int indent,
4399 struct cl_target_option *ptr)
4402 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4403 NULL, NULL, NULL, false);
4405 fprintf (file, "%*sarch = %d (%s)\n",
4408 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4409 ? cpu_names[ptr->arch]
4412 fprintf (file, "%*stune = %d (%s)\n",
4415 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4416 ? cpu_names[ptr->tune]
4419 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
4420 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
4421 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
4422 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4426 fprintf (file, "%*s%s\n", indent, "", target_string);
4427 free (target_string);
4432 /* Inner function to process the attribute((target(...))), take an argument and
4433 set the current options from the argument. If we have a list, recursively go
4437 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
4442 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4443 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4444 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4445 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4460 enum ix86_opt_type type;
4465 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4466 IX86_ATTR_ISA ("abm", OPT_mabm),
4467 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4468 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4469 IX86_ATTR_ISA ("aes", OPT_maes),
4470 IX86_ATTR_ISA ("avx", OPT_mavx),
4471 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4472 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4473 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4474 IX86_ATTR_ISA ("sse", OPT_msse),
4475 IX86_ATTR_ISA ("sse2", OPT_msse2),
4476 IX86_ATTR_ISA ("sse3", OPT_msse3),
4477 IX86_ATTR_ISA ("sse4", OPT_msse4),
4478 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4479 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4480 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4481 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4482 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4483 IX86_ATTR_ISA ("xop", OPT_mxop),
4484 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4485 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4486 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4487 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4489 /* string options */
4490 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4491 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
4492 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4495 IX86_ATTR_YES ("cld",
4499 IX86_ATTR_NO ("fancy-math-387",
4500 OPT_mfancy_math_387,
4501 MASK_NO_FANCY_MATH_387),
4503 IX86_ATTR_YES ("ieee-fp",
4507 IX86_ATTR_YES ("inline-all-stringops",
4508 OPT_minline_all_stringops,
4509 MASK_INLINE_ALL_STRINGOPS),
4511 IX86_ATTR_YES ("inline-stringops-dynamically",
4512 OPT_minline_stringops_dynamically,
4513 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4515 IX86_ATTR_NO ("align-stringops",
4516 OPT_mno_align_stringops,
4517 MASK_NO_ALIGN_STRINGOPS),
4519 IX86_ATTR_YES ("recip",
4525 /* If this is a list, recurse to get the options. */
4526 if (TREE_CODE (args) == TREE_LIST)
4530 for (; args; args = TREE_CHAIN (args))
4531 if (TREE_VALUE (args)
4532 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
4538 else if (TREE_CODE (args) != STRING_CST)
4541 /* Handle multiple arguments separated by commas. */
4542 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4544 while (next_optstr && *next_optstr != '\0')
4546 char *p = next_optstr;
4548 char *comma = strchr (next_optstr, ',');
4549 const char *opt_string;
4550 size_t len, opt_len;
4555 enum ix86_opt_type type = ix86_opt_unknown;
4561 len = comma - next_optstr;
4562 next_optstr = comma + 1;
4570 /* Recognize no-xxx. */
4571 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4580 /* Find the option. */
4583 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4585 type = attrs[i].type;
4586 opt_len = attrs[i].len;
4587 if (ch == attrs[i].string[0]
4588 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4589 && memcmp (p, attrs[i].string, opt_len) == 0)
4592 mask = attrs[i].mask;
4593 opt_string = attrs[i].string;
4598 /* Process the option. */
4601 error ("attribute(target(\"%s\")) is unknown", orig_p);
4605 else if (type == ix86_opt_isa)
4607 struct cl_decoded_option decoded;
4609 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4610 ix86_handle_option (&global_options, &global_options_set,
4611 &decoded, input_location);
4614 else if (type == ix86_opt_yes || type == ix86_opt_no)
4616 if (type == ix86_opt_no)
4617 opt_set_p = !opt_set_p;
4620 target_flags |= mask;
4622 target_flags &= ~mask;
4625 else if (type == ix86_opt_str)
4629 error ("option(\"%s\") was already specified", opt_string);
4633 p_strings[opt] = xstrdup (p + opt_len);
4643 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4646 ix86_valid_target_attribute_tree (tree args)
4648 const char *orig_arch_string = ix86_arch_string;
4649 const char *orig_tune_string = ix86_tune_string;
4650 const char *orig_fpmath_string = ix86_fpmath_string;
4651 int orig_tune_defaulted = ix86_tune_defaulted;
4652 int orig_arch_specified = ix86_arch_specified;
4653 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4656 struct cl_target_option *def
4657 = TREE_TARGET_OPTION (target_option_default_node);
4659 /* Process each of the options on the chain. */
4660 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4663 /* If the changed options are different from the default, rerun
4664 ix86_option_override_internal, and then save the options away.
4665 The string options are are attribute options, and will be undone
4666 when we copy the save structure. */
4667 if (ix86_isa_flags != def->x_ix86_isa_flags
4668 || target_flags != def->x_target_flags
4669 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4670 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4671 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4673 /* If we are using the default tune= or arch=, undo the string assigned,
4674 and use the default. */
4675 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4676 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4677 else if (!orig_arch_specified)
4678 ix86_arch_string = NULL;
4680 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4681 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4682 else if (orig_tune_defaulted)
4683 ix86_tune_string = NULL;
4685 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4686 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4687 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4688 else if (!TARGET_64BIT && TARGET_SSE)
4689 ix86_fpmath_string = "sse,387";
4691 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4692 ix86_option_override_internal (false);
4694 /* Add any builtin functions with the new isa if any. */
4695 ix86_add_new_builtins (ix86_isa_flags);
4697 /* Save the current options unless we are validating options for
4699 t = build_target_option_node ();
4701 ix86_arch_string = orig_arch_string;
4702 ix86_tune_string = orig_tune_string;
4703 ix86_fpmath_string = orig_fpmath_string;
4705 /* Free up memory allocated to hold the strings */
4706 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4707 free (option_strings[i]);
4713 /* Hook to validate attribute((target("string"))). */
4716 ix86_valid_target_attribute_p (tree fndecl,
4717 tree ARG_UNUSED (name),
4719 int ARG_UNUSED (flags))
4721 struct cl_target_option cur_target;
4723 tree old_optimize = build_optimization_node ();
4724 tree new_target, new_optimize;
4725 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4727 /* If the function changed the optimization levels as well as setting target
4728 options, start with the optimizations specified. */
4729 if (func_optimize && func_optimize != old_optimize)
4730 cl_optimization_restore (&global_options,
4731 TREE_OPTIMIZATION (func_optimize));
4733 /* The target attributes may also change some optimization flags, so update
4734 the optimization options if necessary. */
4735 cl_target_option_save (&cur_target, &global_options);
4736 new_target = ix86_valid_target_attribute_tree (args);
4737 new_optimize = build_optimization_node ();
4744 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4746 if (old_optimize != new_optimize)
4747 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4750 cl_target_option_restore (&global_options, &cur_target);
4752 if (old_optimize != new_optimize)
4753 cl_optimization_restore (&global_options,
4754 TREE_OPTIMIZATION (old_optimize));
4760 /* Hook to determine if one function can safely inline another. */
4763 ix86_can_inline_p (tree caller, tree callee)
4766 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4767 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4769 /* If callee has no option attributes, then it is ok to inline. */
4773 /* If caller has no option attributes, but callee does then it is not ok to
4775 else if (!caller_tree)
4780 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4781 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4783 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4784 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4786 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4787 != callee_opts->x_ix86_isa_flags)
4790 /* See if we have the same non-isa options. */
4791 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4794 /* See if arch, tune, etc. are the same. */
4795 else if (caller_opts->arch != callee_opts->arch)
4798 else if (caller_opts->tune != callee_opts->tune)
4801 else if (caller_opts->fpmath != callee_opts->fpmath)
4804 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4815 /* Remember the last target of ix86_set_current_function. */
4816 static GTY(()) tree ix86_previous_fndecl;
4818 /* Establish appropriate back-end context for processing the function
4819 FNDECL. The argument might be NULL to indicate processing at top
4820 level, outside of any function scope. */
4822 ix86_set_current_function (tree fndecl)
4824 /* Only change the context if the function changes. This hook is called
4825 several times in the course of compiling a function, and we don't want to
4826 slow things down too much or call target_reinit when it isn't safe. */
4827 if (fndecl && fndecl != ix86_previous_fndecl)
4829 tree old_tree = (ix86_previous_fndecl
4830 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4833 tree new_tree = (fndecl
4834 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4837 ix86_previous_fndecl = fndecl;
4838 if (old_tree == new_tree)
4843 cl_target_option_restore (&global_options,
4844 TREE_TARGET_OPTION (new_tree));
4850 struct cl_target_option *def
4851 = TREE_TARGET_OPTION (target_option_current_node);
4853 cl_target_option_restore (&global_options, def);
4860 /* Return true if this goes in large data/bss. */
4863 ix86_in_large_data_p (tree exp)
4865 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4868 /* Functions are never large data. */
4869 if (TREE_CODE (exp) == FUNCTION_DECL)
4872 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4874 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4875 if (strcmp (section, ".ldata") == 0
4876 || strcmp (section, ".lbss") == 0)
4882 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4884 /* If this is an incomplete type with size 0, then we can't put it
4885 in data because it might be too big when completed. */
4886 if (!size || size > ix86_section_threshold)
4893 /* Switch to the appropriate section for output of DECL.
4894 DECL is either a `VAR_DECL' node or a constant of some sort.
4895 RELOC indicates whether forming the initial value of DECL requires
4896 link-time relocations. */
4898 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4902 x86_64_elf_select_section (tree decl, int reloc,
4903 unsigned HOST_WIDE_INT align)
4905 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4906 && ix86_in_large_data_p (decl))
4908 const char *sname = NULL;
4909 unsigned int flags = SECTION_WRITE;
4910 switch (categorize_decl_for_section (decl, reloc))
4915 case SECCAT_DATA_REL:
4916 sname = ".ldata.rel";
4918 case SECCAT_DATA_REL_LOCAL:
4919 sname = ".ldata.rel.local";
4921 case SECCAT_DATA_REL_RO:
4922 sname = ".ldata.rel.ro";
4924 case SECCAT_DATA_REL_RO_LOCAL:
4925 sname = ".ldata.rel.ro.local";
4929 flags |= SECTION_BSS;
4932 case SECCAT_RODATA_MERGE_STR:
4933 case SECCAT_RODATA_MERGE_STR_INIT:
4934 case SECCAT_RODATA_MERGE_CONST:
4938 case SECCAT_SRODATA:
4945 /* We don't split these for medium model. Place them into
4946 default sections and hope for best. */
4951 /* We might get called with string constants, but get_named_section
4952 doesn't like them as they are not DECLs. Also, we need to set
4953 flags in that case. */
4955 return get_section (sname, flags, NULL);
4956 return get_named_section (decl, sname, reloc);
4959 return default_elf_select_section (decl, reloc, align);
4962 /* Build up a unique section name, expressed as a
4963 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4964 RELOC indicates whether the initial value of EXP requires
4965 link-time relocations. */
4967 static void ATTRIBUTE_UNUSED
4968 x86_64_elf_unique_section (tree decl, int reloc)
4970 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4971 && ix86_in_large_data_p (decl))
4973 const char *prefix = NULL;
4974 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4975 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4977 switch (categorize_decl_for_section (decl, reloc))
4980 case SECCAT_DATA_REL:
4981 case SECCAT_DATA_REL_LOCAL:
4982 case SECCAT_DATA_REL_RO:
4983 case SECCAT_DATA_REL_RO_LOCAL:
4984 prefix = one_only ? ".ld" : ".ldata";
4987 prefix = one_only ? ".lb" : ".lbss";
4990 case SECCAT_RODATA_MERGE_STR:
4991 case SECCAT_RODATA_MERGE_STR_INIT:
4992 case SECCAT_RODATA_MERGE_CONST:
4993 prefix = one_only ? ".lr" : ".lrodata";
4995 case SECCAT_SRODATA:
5002 /* We don't split these for medium model. Place them into
5003 default sections and hope for best. */
5008 const char *name, *linkonce;
5011 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5012 name = targetm.strip_name_encoding (name);
5014 /* If we're using one_only, then there needs to be a .gnu.linkonce
5015 prefix to the section name. */
5016 linkonce = one_only ? ".gnu.linkonce" : "";
5018 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5020 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5024 default_unique_section (decl, reloc);
5027 #ifdef COMMON_ASM_OP
5028 /* This says how to output assembler code to declare an
5029 uninitialized external linkage data object.
5031 For medium model x86-64 we need to use .largecomm opcode for
5034 x86_elf_aligned_common (FILE *file,
5035 const char *name, unsigned HOST_WIDE_INT size,
5038 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5039 && size > (unsigned int)ix86_section_threshold)
5040 fputs (".largecomm\t", file);
5042 fputs (COMMON_ASM_OP, file);
5043 assemble_name (file, name);
5044 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5045 size, align / BITS_PER_UNIT);
5049 /* Utility function for targets to use in implementing
5050 ASM_OUTPUT_ALIGNED_BSS. */
5053 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5054 const char *name, unsigned HOST_WIDE_INT size,
5057 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5058 && size > (unsigned int)ix86_section_threshold)
5059 switch_to_section (get_named_section (decl, ".lbss", 0));
5061 switch_to_section (bss_section);
5062 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5063 #ifdef ASM_DECLARE_OBJECT_NAME
5064 last_assemble_variable_decl = decl;
5065 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5067 /* Standard thing is just output label for the object. */
5068 ASM_OUTPUT_LABEL (file, name);
5069 #endif /* ASM_DECLARE_OBJECT_NAME */
5070 ASM_OUTPUT_SKIP (file, size ? size : 1);
5073 static const struct default_options ix86_option_optimization_table[] =
5075 /* Turn off -fschedule-insns by default. It tends to make the
5076 problem with not enough registers even worse. */
5077 #ifdef INSN_SCHEDULING
5078 { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
5081 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
5082 SUBTARGET_OPTIMIZATION_OPTIONS,
5084 { OPT_LEVELS_NONE, 0, NULL, 0 }
5087 /* Implement TARGET_OPTION_INIT_STRUCT. */
5090 ix86_option_init_struct (struct gcc_options *opts)
5093 /* The Darwin libraries never set errno, so we might as well
5094 avoid calling them when that's the only reason we would. */
5095 opts->x_flag_errno_math = 0;
5097 opts->x_flag_pcc_struct_return = 2;
5098 opts->x_flag_asynchronous_unwind_tables = 2;
5099 opts->x_flag_vect_cost_model = 1;
5102 /* Decide whether we must probe the stack before any space allocation
5103 on this target. It's essentially TARGET_STACK_PROBE except when
5104 -fstack-check causes the stack to be already probed differently. */
5107 ix86_target_stack_probe (void)
5109 /* Do not probe the stack twice if static stack checking is enabled. */
5110 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5113 return TARGET_STACK_PROBE;
5116 /* Decide whether we can make a sibling call to a function. DECL is the
5117 declaration of the function being targeted by the call and EXP is the
5118 CALL_EXPR representing the call. */
5121 ix86_function_ok_for_sibcall (tree decl, tree exp)
5123 tree type, decl_or_type;
5126 /* If we are generating position-independent code, we cannot sibcall
5127 optimize any indirect call, or a direct call to a global function,
5128 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5132 && (!decl || !targetm.binds_local_p (decl)))
5135 /* If we need to align the outgoing stack, then sibcalling would
5136 unalign the stack, which may break the called function. */
5137 if (ix86_minimum_incoming_stack_boundary (true)
5138 < PREFERRED_STACK_BOUNDARY)
5143 decl_or_type = decl;
5144 type = TREE_TYPE (decl);
5148 /* We're looking at the CALL_EXPR, we need the type of the function. */
5149 type = CALL_EXPR_FN (exp); /* pointer expression */
5150 type = TREE_TYPE (type); /* pointer type */
5151 type = TREE_TYPE (type); /* function type */
5152 decl_or_type = type;
5155 /* Check that the return value locations are the same. Like
5156 if we are returning floats on the 80387 register stack, we cannot
5157 make a sibcall from a function that doesn't return a float to a
5158 function that does or, conversely, from a function that does return
5159 a float to a function that doesn't; the necessary stack adjustment
5160 would not be executed. This is also the place we notice
5161 differences in the return value ABI. Note that it is ok for one
5162 of the functions to have void return type as long as the return
5163 value of the other is passed in a register. */
5164 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5165 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5167 if (STACK_REG_P (a) || STACK_REG_P (b))
5169 if (!rtx_equal_p (a, b))
5172 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5174 /* Disable sibcall if we need to generate vzeroupper after
5176 if (TARGET_VZEROUPPER
5177 && cfun->machine->callee_return_avx256_p
5178 && !cfun->machine->caller_return_avx256_p)
5181 else if (!rtx_equal_p (a, b))
5186 /* The SYSV ABI has more call-clobbered registers;
5187 disallow sibcalls from MS to SYSV. */
5188 if (cfun->machine->call_abi == MS_ABI
5189 && ix86_function_type_abi (type) == SYSV_ABI)
5194 /* If this call is indirect, we'll need to be able to use a
5195 call-clobbered register for the address of the target function.
5196 Make sure that all such registers are not used for passing
5197 parameters. Note that DLLIMPORT functions are indirect. */
5199 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5201 if (ix86_function_regparm (type, NULL) >= 3)
5203 /* ??? Need to count the actual number of registers to be used,
5204 not the possible number of registers. Fix later. */
5210 /* Otherwise okay. That also includes certain types of indirect calls. */
5214 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5215 and "sseregparm" calling convention attributes;
5216 arguments as in struct attribute_spec.handler. */
5219 ix86_handle_cconv_attribute (tree *node, tree name,
5221 int flags ATTRIBUTE_UNUSED,
5224 if (TREE_CODE (*node) != FUNCTION_TYPE
5225 && TREE_CODE (*node) != METHOD_TYPE
5226 && TREE_CODE (*node) != FIELD_DECL
5227 && TREE_CODE (*node) != TYPE_DECL)
5229 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5231 *no_add_attrs = true;
5235 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5236 if (is_attribute_p ("regparm", name))
5240 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5242 error ("fastcall and regparm attributes are not compatible");
5245 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5247 error ("regparam and thiscall attributes are not compatible");
5250 cst = TREE_VALUE (args);
5251 if (TREE_CODE (cst) != INTEGER_CST)
5253 warning (OPT_Wattributes,
5254 "%qE attribute requires an integer constant argument",
5256 *no_add_attrs = true;
5258 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5260 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5262 *no_add_attrs = true;
5270 /* Do not warn when emulating the MS ABI. */
5271 if ((TREE_CODE (*node) != FUNCTION_TYPE
5272 && TREE_CODE (*node) != METHOD_TYPE)
5273 || ix86_function_type_abi (*node) != MS_ABI)
5274 warning (OPT_Wattributes, "%qE attribute ignored",
5276 *no_add_attrs = true;
5280 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5281 if (is_attribute_p ("fastcall", name))
5283 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5285 error ("fastcall and cdecl attributes are not compatible");
5287 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5289 error ("fastcall and stdcall attributes are not compatible");
5291 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5293 error ("fastcall and regparm attributes are not compatible");
5295 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5297 error ("fastcall and thiscall attributes are not compatible");
5301 /* Can combine stdcall with fastcall (redundant), regparm and
5303 else if (is_attribute_p ("stdcall", name))
5305 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5307 error ("stdcall and cdecl attributes are not compatible");
5309 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5311 error ("stdcall and fastcall attributes are not compatible");
5313 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5315 error ("stdcall and thiscall attributes are not compatible");
5319 /* Can combine cdecl with regparm and sseregparm. */
5320 else if (is_attribute_p ("cdecl", name))
5322 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5324 error ("stdcall and cdecl attributes are not compatible");
5326 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5328 error ("fastcall and cdecl attributes are not compatible");
5330 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5332 error ("cdecl and thiscall attributes are not compatible");
5335 else if (is_attribute_p ("thiscall", name))
5337 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5338 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5340 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5342 error ("stdcall and thiscall attributes are not compatible");
5344 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5346 error ("fastcall and thiscall attributes are not compatible");
5348 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5350 error ("cdecl and thiscall attributes are not compatible");
5354 /* Can combine sseregparm with all attributes. */
5359 /* This function determines from TYPE the calling-convention. */
5362 ix86_get_callcvt (const_tree type)
5364 unsigned int ret = 0;
5369 return IX86_CALLCVT_CDECL;
5371 attrs = TYPE_ATTRIBUTES (type);
5372 if (attrs != NULL_TREE)
5374 if (lookup_attribute ("cdecl", attrs))
5375 ret |= IX86_CALLCVT_CDECL;
5376 else if (lookup_attribute ("stdcall", attrs))
5377 ret |= IX86_CALLCVT_STDCALL;
5378 else if (lookup_attribute ("fastcall", attrs))
5379 ret |= IX86_CALLCVT_FASTCALL;
5380 else if (lookup_attribute ("thiscall", attrs))
5381 ret |= IX86_CALLCVT_THISCALL;
5383 /* Regparam isn't allowed for thiscall and fastcall. */
5384 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5386 if (lookup_attribute ("regparm", attrs))
5387 ret |= IX86_CALLCVT_REGPARM;
5388 if (lookup_attribute ("sseregparm", attrs))
5389 ret |= IX86_CALLCVT_SSEREGPARM;
5392 if (IX86_BASE_CALLCVT(ret) != 0)
5396 is_stdarg = stdarg_p (type);
5397 if (TARGET_RTD && !is_stdarg)
5398 return IX86_CALLCVT_STDCALL | ret;
5402 || TREE_CODE (type) != METHOD_TYPE
5403 || ix86_function_type_abi (type) != MS_ABI)
5404 return IX86_CALLCVT_CDECL | ret;
5406 return IX86_CALLCVT_THISCALL;
5409 /* Return 0 if the attributes for two types are incompatible, 1 if they
5410 are compatible, and 2 if they are nearly compatible (which causes a
5411 warning to be generated). */
5414 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5416 unsigned int ccvt1, ccvt2;
5418 if (TREE_CODE (type1) != FUNCTION_TYPE
5419 && TREE_CODE (type1) != METHOD_TYPE)
5422 ccvt1 = ix86_get_callcvt (type1);
5423 ccvt2 = ix86_get_callcvt (type2);
5426 if (ix86_function_regparm (type1, NULL)
5427 != ix86_function_regparm (type2, NULL))
5433 /* Return the regparm value for a function with the indicated TYPE and DECL.
5434 DECL may be NULL when calling function indirectly
5435 or considering a libcall. */
5438 ix86_function_regparm (const_tree type, const_tree decl)
5445 return (ix86_function_type_abi (type) == SYSV_ABI
5446 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5447 ccvt = ix86_get_callcvt (type);
5448 regparm = ix86_regparm;
5450 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5452 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5455 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5459 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5461 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5464 /* Use register calling convention for local functions when possible. */
5466 && TREE_CODE (decl) == FUNCTION_DECL
5468 && !(profile_flag && !flag_fentry))
5470 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5471 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5472 if (i && i->local && i->can_change_signature)
5474 int local_regparm, globals = 0, regno;
5476 /* Make sure no regparm register is taken by a
5477 fixed register variable. */
5478 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5479 if (fixed_regs[local_regparm])
5482 /* We don't want to use regparm(3) for nested functions as
5483 these use a static chain pointer in the third argument. */
5484 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5487 /* In 32-bit mode save a register for the split stack. */
5488 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5491 /* Each fixed register usage increases register pressure,
5492 so less registers should be used for argument passing.
5493 This functionality can be overriden by an explicit
5495 for (regno = 0; regno <= DI_REG; regno++)
5496 if (fixed_regs[regno])
5500 = globals < local_regparm ? local_regparm - globals : 0;
5502 if (local_regparm > regparm)
5503 regparm = local_regparm;
5510 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5511 DFmode (2) arguments in SSE registers for a function with the
5512 indicated TYPE and DECL. DECL may be NULL when calling function
5513 indirectly or considering a libcall. Otherwise return 0. */
5516 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5518 gcc_assert (!TARGET_64BIT);
5520 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5521 by the sseregparm attribute. */
5522 if (TARGET_SSEREGPARM
5523 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5530 error ("calling %qD with attribute sseregparm without "
5531 "SSE/SSE2 enabled", decl);
5533 error ("calling %qT with attribute sseregparm without "
5534 "SSE/SSE2 enabled", type);
5542 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5543 (and DFmode for SSE2) arguments in SSE registers. */
5544 if (decl && TARGET_SSE_MATH && optimize
5545 && !(profile_flag && !flag_fentry))
5547 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5548 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5549 if (i && i->local && i->can_change_signature)
5550 return TARGET_SSE2 ? 2 : 1;
5556 /* Return true if EAX is live at the start of the function. Used by
5557 ix86_expand_prologue to determine if we need special help before
5558 calling allocate_stack_worker. */
5561 ix86_eax_live_at_start_p (void)
5563 /* Cheat. Don't bother working forward from ix86_function_regparm
5564 to the function type to whether an actual argument is located in
5565 eax. Instead just look at cfg info, which is still close enough
5566 to correct at this point. This gives false positives for broken
5567 functions that might use uninitialized data that happens to be
5568 allocated in eax, but who cares? */
5569 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5573 ix86_keep_aggregate_return_pointer (tree fntype)
5579 attr = lookup_attribute ("callee_pop_aggregate_return",
5580 TYPE_ATTRIBUTES (fntype));
5582 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5584 /* For 32-bit MS-ABI the default is to keep aggregate
5586 if (ix86_function_type_abi (fntype) == MS_ABI)
5589 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5592 /* Value is the number of bytes of arguments automatically
5593 popped when returning from a subroutine call.
5594 FUNDECL is the declaration node of the function (as a tree),
5595 FUNTYPE is the data type of the function (as a tree),
5596 or for a library call it is an identifier node for the subroutine name.
5597 SIZE is the number of bytes of arguments passed on the stack.
5599 On the 80386, the RTD insn may be used to pop them if the number
5600 of args is fixed, but if the number is variable then the caller
5601 must pop them all. RTD can't be used for library calls now
5602 because the library is compiled with the Unix compiler.
5603 Use of RTD is a selectable option, since it is incompatible with
5604 standard Unix calling sequences. If the option is not selected,
5605 the caller must always pop the args.
5607 The attribute stdcall is equivalent to RTD on a per module basis. */
5610 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5614 /* None of the 64-bit ABIs pop arguments. */
5618 ccvt = ix86_get_callcvt (funtype);
5620 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5621 | IX86_CALLCVT_THISCALL)) != 0
5622 && ! stdarg_p (funtype))
5625 /* Lose any fake structure return argument if it is passed on the stack. */
5626 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5627 && !ix86_keep_aggregate_return_pointer (funtype))
5629 int nregs = ix86_function_regparm (funtype, fundecl);
5631 return GET_MODE_SIZE (Pmode);
5637 /* Argument support functions. */
5639 /* Return true when register may be used to pass function parameters. */
5641 ix86_function_arg_regno_p (int regno)
5644 const int *parm_regs;
5649 return (regno < REGPARM_MAX
5650 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5652 return (regno < REGPARM_MAX
5653 || (TARGET_MMX && MMX_REGNO_P (regno)
5654 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5655 || (TARGET_SSE && SSE_REGNO_P (regno)
5656 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5661 if (SSE_REGNO_P (regno) && TARGET_SSE)
5666 if (TARGET_SSE && SSE_REGNO_P (regno)
5667 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5671 /* TODO: The function should depend on current function ABI but
5672 builtins.c would need updating then. Therefore we use the
5675 /* RAX is used as hidden argument to va_arg functions. */
5676 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5679 if (ix86_abi == MS_ABI)
5680 parm_regs = x86_64_ms_abi_int_parameter_registers;
5682 parm_regs = x86_64_int_parameter_registers;
5683 for (i = 0; i < (ix86_abi == MS_ABI
5684 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5685 if (regno == parm_regs[i])
5690 /* Return if we do not know how to pass TYPE solely in registers. */
5693 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5695 if (must_pass_in_stack_var_size_or_pad (mode, type))
5698 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5699 The layout_type routine is crafty and tries to trick us into passing
5700 currently unsupported vector types on the stack by using TImode. */
5701 return (!TARGET_64BIT && mode == TImode
5702 && type && TREE_CODE (type) != VECTOR_TYPE);
5705 /* It returns the size, in bytes, of the area reserved for arguments passed
5706 in registers for the function represented by fndecl dependent to the used
5709 ix86_reg_parm_stack_space (const_tree fndecl)
5711 enum calling_abi call_abi = SYSV_ABI;
5712 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5713 call_abi = ix86_function_abi (fndecl);
5715 call_abi = ix86_function_type_abi (fndecl);
5716 if (TARGET_64BIT && call_abi == MS_ABI)
5721 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5724 ix86_function_type_abi (const_tree fntype)
5726 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5728 enum calling_abi abi = ix86_abi;
5729 if (abi == SYSV_ABI)
5731 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5734 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5742 ix86_function_ms_hook_prologue (const_tree fn)
5744 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5746 if (decl_function_context (fn) != NULL_TREE)
5747 error_at (DECL_SOURCE_LOCATION (fn),
5748 "ms_hook_prologue is not compatible with nested function");
5755 static enum calling_abi
5756 ix86_function_abi (const_tree fndecl)
5760 return ix86_function_type_abi (TREE_TYPE (fndecl));
5763 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5766 ix86_cfun_abi (void)
5770 return cfun->machine->call_abi;
5773 /* Write the extra assembler code needed to declare a function properly. */
5776 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5779 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5783 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5784 unsigned int filler_cc = 0xcccccccc;
5786 for (i = 0; i < filler_count; i += 4)
5787 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5790 #ifdef SUBTARGET_ASM_UNWIND_INIT
5791 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5794 ASM_OUTPUT_LABEL (asm_out_file, fname);
5796 /* Output magic byte marker, if hot-patch attribute is set. */
5801 /* leaq [%rsp + 0], %rsp */
5802 asm_fprintf (asm_out_file, ASM_BYTE
5803 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5807 /* movl.s %edi, %edi
5809 movl.s %esp, %ebp */
5810 asm_fprintf (asm_out_file, ASM_BYTE
5811 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5817 extern void init_regs (void);
5819 /* Implementation of call abi switching target hook. Specific to FNDECL
5820 the specific call register sets are set. See also
5821 ix86_conditional_register_usage for more details. */
5823 ix86_call_abi_override (const_tree fndecl)
5825 if (fndecl == NULL_TREE)
5826 cfun->machine->call_abi = ix86_abi;
5828 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5831 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5832 expensive re-initialization of init_regs each time we switch function context
5833 since this is needed only during RTL expansion. */
5835 ix86_maybe_switch_abi (void)
5838 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5842 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5843 for a call to a function whose data type is FNTYPE.
5844 For a library call, FNTYPE is 0. */
5847 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5848 tree fntype, /* tree ptr for function decl */
5849 rtx libname, /* SYMBOL_REF of library name or 0 */
5853 struct cgraph_local_info *i;
5856 memset (cum, 0, sizeof (*cum));
5858 /* Initialize for the current callee. */
5861 cfun->machine->callee_pass_avx256_p = false;
5862 cfun->machine->callee_return_avx256_p = false;
5867 i = cgraph_local_info (fndecl);
5868 cum->call_abi = ix86_function_abi (fndecl);
5869 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5874 cum->call_abi = ix86_function_type_abi (fntype);
5876 fnret_type = TREE_TYPE (fntype);
5881 if (TARGET_VZEROUPPER && fnret_type)
5883 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5885 if (function_pass_avx256_p (fnret_value))
5887 /* The return value of this function uses 256bit AVX modes. */
5889 cfun->machine->callee_return_avx256_p = true;
5891 cfun->machine->caller_return_avx256_p = true;
5895 cum->caller = caller;
5897 /* Set up the number of registers to use for passing arguments. */
5899 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5900 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5901 "or subtarget optimization implying it");
5902 cum->nregs = ix86_regparm;
5905 cum->nregs = (cum->call_abi == SYSV_ABI
5906 ? X86_64_REGPARM_MAX
5907 : X86_64_MS_REGPARM_MAX);
5911 cum->sse_nregs = SSE_REGPARM_MAX;
5914 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5915 ? X86_64_SSE_REGPARM_MAX
5916 : X86_64_MS_SSE_REGPARM_MAX);
5920 cum->mmx_nregs = MMX_REGPARM_MAX;
5921 cum->warn_avx = true;
5922 cum->warn_sse = true;
5923 cum->warn_mmx = true;
5925 /* Because type might mismatch in between caller and callee, we need to
5926 use actual type of function for local calls.
5927 FIXME: cgraph_analyze can be told to actually record if function uses
5928 va_start so for local functions maybe_vaarg can be made aggressive
5930 FIXME: once typesytem is fixed, we won't need this code anymore. */
5931 if (i && i->local && i->can_change_signature)
5932 fntype = TREE_TYPE (fndecl);
5933 cum->maybe_vaarg = (fntype
5934 ? (!prototype_p (fntype) || stdarg_p (fntype))
5939 /* If there are variable arguments, then we won't pass anything
5940 in registers in 32-bit mode. */
5941 if (stdarg_p (fntype))
5952 /* Use ecx and edx registers if function has fastcall attribute,
5953 else look for regparm information. */
5956 unsigned int ccvt = ix86_get_callcvt (fntype);
5957 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5960 cum->fastcall = 1; /* Same first register as in fastcall. */
5962 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5968 cum->nregs = ix86_function_regparm (fntype, fndecl);
5971 /* Set up the number of SSE registers used for passing SFmode
5972 and DFmode arguments. Warn for mismatching ABI. */
5973 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5977 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5978 But in the case of vector types, it is some vector mode.
5980 When we have only some of our vector isa extensions enabled, then there
5981 are some modes for which vector_mode_supported_p is false. For these
5982 modes, the generic vector support in gcc will choose some non-vector mode
5983 in order to implement the type. By computing the natural mode, we'll
5984 select the proper ABI location for the operand and not depend on whatever
5985 the middle-end decides to do with these vector types.
5987 The midde-end can't deal with the vector types > 16 bytes. In this
5988 case, we return the original mode and warn ABI change if CUM isn't
5991 static enum machine_mode
5992 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5994 enum machine_mode mode = TYPE_MODE (type);
5996 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5998 HOST_WIDE_INT size = int_size_in_bytes (type);
5999 if ((size == 8 || size == 16 || size == 32)
6000 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6001 && TYPE_VECTOR_SUBPARTS (type) > 1)
6003 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6005 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6006 mode = MIN_MODE_VECTOR_FLOAT;
6008 mode = MIN_MODE_VECTOR_INT;
6010 /* Get the mode which has this inner mode and number of units. */
6011 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6012 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6013 && GET_MODE_INNER (mode) == innermode)
6015 if (size == 32 && !TARGET_AVX)
6017 static bool warnedavx;
6024 warning (0, "AVX vector argument without AVX "
6025 "enabled changes the ABI");
6027 return TYPE_MODE (type);
6040 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6041 this may not agree with the mode that the type system has chosen for the
6042 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6043 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6046 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6051 if (orig_mode != BLKmode)
6052 tmp = gen_rtx_REG (orig_mode, regno);
6055 tmp = gen_rtx_REG (mode, regno);
6056 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6057 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6063 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6064 of this code is to classify each 8bytes of incoming argument by the register
6065 class and assign registers accordingly. */
6067 /* Return the union class of CLASS1 and CLASS2.
6068 See the x86-64 PS ABI for details. */
6070 static enum x86_64_reg_class
6071 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6073 /* Rule #1: If both classes are equal, this is the resulting class. */
6074 if (class1 == class2)
6077 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6079 if (class1 == X86_64_NO_CLASS)
6081 if (class2 == X86_64_NO_CLASS)
6084 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6085 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6086 return X86_64_MEMORY_CLASS;
6088 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6089 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6090 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6091 return X86_64_INTEGERSI_CLASS;
6092 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6093 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6094 return X86_64_INTEGER_CLASS;
6096 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6098 if (class1 == X86_64_X87_CLASS
6099 || class1 == X86_64_X87UP_CLASS
6100 || class1 == X86_64_COMPLEX_X87_CLASS
6101 || class2 == X86_64_X87_CLASS
6102 || class2 == X86_64_X87UP_CLASS
6103 || class2 == X86_64_COMPLEX_X87_CLASS)
6104 return X86_64_MEMORY_CLASS;
6106 /* Rule #6: Otherwise class SSE is used. */
6107 return X86_64_SSE_CLASS;
6110 /* Classify the argument of type TYPE and mode MODE.
6111 CLASSES will be filled by the register class used to pass each word
6112 of the operand. The number of words is returned. In case the parameter
6113 should be passed in memory, 0 is returned. As a special case for zero
6114 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6116 BIT_OFFSET is used internally for handling records and specifies offset
6117 of the offset in bits modulo 256 to avoid overflow cases.
6119 See the x86-64 PS ABI for details.
6123 classify_argument (enum machine_mode mode, const_tree type,
6124 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6126 HOST_WIDE_INT bytes =
6127 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6128 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6130 /* Variable sized entities are always passed/returned in memory. */
6134 if (mode != VOIDmode
6135 && targetm.calls.must_pass_in_stack (mode, type))
6138 if (type && AGGREGATE_TYPE_P (type))
6142 enum x86_64_reg_class subclasses[MAX_CLASSES];
6144 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6148 for (i = 0; i < words; i++)
6149 classes[i] = X86_64_NO_CLASS;
6151 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6152 signalize memory class, so handle it as special case. */
6155 classes[0] = X86_64_NO_CLASS;
6159 /* Classify each field of record and merge classes. */
6160 switch (TREE_CODE (type))
6163 /* And now merge the fields of structure. */
6164 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6166 if (TREE_CODE (field) == FIELD_DECL)
6170 if (TREE_TYPE (field) == error_mark_node)
6173 /* Bitfields are always classified as integer. Handle them
6174 early, since later code would consider them to be
6175 misaligned integers. */
6176 if (DECL_BIT_FIELD (field))
6178 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6179 i < ((int_bit_position (field) + (bit_offset % 64))
6180 + tree_low_cst (DECL_SIZE (field), 0)
6183 merge_classes (X86_64_INTEGER_CLASS,
6190 type = TREE_TYPE (field);
6192 /* Flexible array member is ignored. */
6193 if (TYPE_MODE (type) == BLKmode
6194 && TREE_CODE (type) == ARRAY_TYPE
6195 && TYPE_SIZE (type) == NULL_TREE
6196 && TYPE_DOMAIN (type) != NULL_TREE
6197 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6202 if (!warned && warn_psabi)
6205 inform (input_location,
6206 "the ABI of passing struct with"
6207 " a flexible array member has"
6208 " changed in GCC 4.4");
6212 num = classify_argument (TYPE_MODE (type), type,
6214 (int_bit_position (field)
6215 + bit_offset) % 256);
6218 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6219 for (i = 0; i < num && (i + pos) < words; i++)
6221 merge_classes (subclasses[i], classes[i + pos]);
6228 /* Arrays are handled as small records. */
6231 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6232 TREE_TYPE (type), subclasses, bit_offset);
6236 /* The partial classes are now full classes. */
6237 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6238 subclasses[0] = X86_64_SSE_CLASS;
6239 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6240 && !((bit_offset % 64) == 0 && bytes == 4))
6241 subclasses[0] = X86_64_INTEGER_CLASS;
6243 for (i = 0; i < words; i++)
6244 classes[i] = subclasses[i % num];
6249 case QUAL_UNION_TYPE:
6250 /* Unions are similar to RECORD_TYPE but offset is always 0.
6252 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6254 if (TREE_CODE (field) == FIELD_DECL)
6258 if (TREE_TYPE (field) == error_mark_node)
6261 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6262 TREE_TYPE (field), subclasses,
6266 for (i = 0; i < num; i++)
6267 classes[i] = merge_classes (subclasses[i], classes[i]);
6278 /* When size > 16 bytes, if the first one isn't
6279 X86_64_SSE_CLASS or any other ones aren't
6280 X86_64_SSEUP_CLASS, everything should be passed in
6282 if (classes[0] != X86_64_SSE_CLASS)
6285 for (i = 1; i < words; i++)
6286 if (classes[i] != X86_64_SSEUP_CLASS)
6290 /* Final merger cleanup. */
6291 for (i = 0; i < words; i++)
6293 /* If one class is MEMORY, everything should be passed in
6295 if (classes[i] == X86_64_MEMORY_CLASS)
6298 /* The X86_64_SSEUP_CLASS should be always preceded by
6299 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6300 if (classes[i] == X86_64_SSEUP_CLASS
6301 && classes[i - 1] != X86_64_SSE_CLASS
6302 && classes[i - 1] != X86_64_SSEUP_CLASS)
6304 /* The first one should never be X86_64_SSEUP_CLASS. */
6305 gcc_assert (i != 0);
6306 classes[i] = X86_64_SSE_CLASS;
6309 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6310 everything should be passed in memory. */
6311 if (classes[i] == X86_64_X87UP_CLASS
6312 && (classes[i - 1] != X86_64_X87_CLASS))
6316 /* The first one should never be X86_64_X87UP_CLASS. */
6317 gcc_assert (i != 0);
6318 if (!warned && warn_psabi)
6321 inform (input_location,
6322 "the ABI of passing union with long double"
6323 " has changed in GCC 4.4");
6331 /* Compute alignment needed. We align all types to natural boundaries with
6332 exception of XFmode that is aligned to 64bits. */
6333 if (mode != VOIDmode && mode != BLKmode)
6335 int mode_alignment = GET_MODE_BITSIZE (mode);
6338 mode_alignment = 128;
6339 else if (mode == XCmode)
6340 mode_alignment = 256;
6341 if (COMPLEX_MODE_P (mode))
6342 mode_alignment /= 2;
6343 /* Misaligned fields are always returned in memory. */
6344 if (bit_offset % mode_alignment)
6348 /* for V1xx modes, just use the base mode */
6349 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6350 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6351 mode = GET_MODE_INNER (mode);
6353 /* Classification of atomic types. */
6358 classes[0] = X86_64_SSE_CLASS;
6361 classes[0] = X86_64_SSE_CLASS;
6362 classes[1] = X86_64_SSEUP_CLASS;
6372 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6376 classes[0] = X86_64_INTEGERSI_CLASS;
6379 else if (size <= 64)
6381 classes[0] = X86_64_INTEGER_CLASS;
6384 else if (size <= 64+32)
6386 classes[0] = X86_64_INTEGER_CLASS;
6387 classes[1] = X86_64_INTEGERSI_CLASS;
6390 else if (size <= 64+64)
6392 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6400 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6404 /* OImode shouldn't be used directly. */
6409 if (!(bit_offset % 64))
6410 classes[0] = X86_64_SSESF_CLASS;
6412 classes[0] = X86_64_SSE_CLASS;
6415 classes[0] = X86_64_SSEDF_CLASS;
6418 classes[0] = X86_64_X87_CLASS;
6419 classes[1] = X86_64_X87UP_CLASS;
6422 classes[0] = X86_64_SSE_CLASS;
6423 classes[1] = X86_64_SSEUP_CLASS;
6426 classes[0] = X86_64_SSE_CLASS;
6427 if (!(bit_offset % 64))
6433 if (!warned && warn_psabi)
6436 inform (input_location,
6437 "the ABI of passing structure with complex float"
6438 " member has changed in GCC 4.4");
6440 classes[1] = X86_64_SSESF_CLASS;
6444 classes[0] = X86_64_SSEDF_CLASS;
6445 classes[1] = X86_64_SSEDF_CLASS;
6448 classes[0] = X86_64_COMPLEX_X87_CLASS;
6451 /* This modes is larger than 16 bytes. */
6459 classes[0] = X86_64_SSE_CLASS;
6460 classes[1] = X86_64_SSEUP_CLASS;
6461 classes[2] = X86_64_SSEUP_CLASS;
6462 classes[3] = X86_64_SSEUP_CLASS;
6470 classes[0] = X86_64_SSE_CLASS;
6471 classes[1] = X86_64_SSEUP_CLASS;
6479 classes[0] = X86_64_SSE_CLASS;
6485 gcc_assert (VECTOR_MODE_P (mode));
6490 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6492 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6493 classes[0] = X86_64_INTEGERSI_CLASS;
6495 classes[0] = X86_64_INTEGER_CLASS;
6496 classes[1] = X86_64_INTEGER_CLASS;
6497 return 1 + (bytes > 8);
6501 /* Examine the argument and return set number of register required in each
6502 class. Return 0 iff parameter should be passed in memory. */
6504 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6505 int *int_nregs, int *sse_nregs)
6507 enum x86_64_reg_class regclass[MAX_CLASSES];
6508 int n = classify_argument (mode, type, regclass, 0);
6514 for (n--; n >= 0; n--)
6515 switch (regclass[n])
6517 case X86_64_INTEGER_CLASS:
6518 case X86_64_INTEGERSI_CLASS:
6521 case X86_64_SSE_CLASS:
6522 case X86_64_SSESF_CLASS:
6523 case X86_64_SSEDF_CLASS:
6526 case X86_64_NO_CLASS:
6527 case X86_64_SSEUP_CLASS:
6529 case X86_64_X87_CLASS:
6530 case X86_64_X87UP_CLASS:
6534 case X86_64_COMPLEX_X87_CLASS:
6535 return in_return ? 2 : 0;
6536 case X86_64_MEMORY_CLASS:
6542 /* Construct container for the argument used by GCC interface. See
6543 FUNCTION_ARG for the detailed description. */
6546 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6547 const_tree type, int in_return, int nintregs, int nsseregs,
6548 const int *intreg, int sse_regno)
6550 /* The following variables hold the static issued_error state. */
6551 static bool issued_sse_arg_error;
6552 static bool issued_sse_ret_error;
6553 static bool issued_x87_ret_error;
6555 enum machine_mode tmpmode;
6557 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6558 enum x86_64_reg_class regclass[MAX_CLASSES];
6562 int needed_sseregs, needed_intregs;
6563 rtx exp[MAX_CLASSES];
6566 n = classify_argument (mode, type, regclass, 0);
6569 if (!examine_argument (mode, type, in_return, &needed_intregs,
6572 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6575 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6576 some less clueful developer tries to use floating-point anyway. */
6577 if (needed_sseregs && !TARGET_SSE)
6581 if (!issued_sse_ret_error)
6583 error ("SSE register return with SSE disabled");
6584 issued_sse_ret_error = true;
6587 else if (!issued_sse_arg_error)
6589 error ("SSE register argument with SSE disabled");
6590 issued_sse_arg_error = true;
6595 /* Likewise, error if the ABI requires us to return values in the
6596 x87 registers and the user specified -mno-80387. */
6597 if (!TARGET_80387 && in_return)
6598 for (i = 0; i < n; i++)
6599 if (regclass[i] == X86_64_X87_CLASS
6600 || regclass[i] == X86_64_X87UP_CLASS
6601 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6603 if (!issued_x87_ret_error)
6605 error ("x87 register return with x87 disabled");
6606 issued_x87_ret_error = true;
6611 /* First construct simple cases. Avoid SCmode, since we want to use
6612 single register to pass this type. */
6613 if (n == 1 && mode != SCmode)
6614 switch (regclass[0])
6616 case X86_64_INTEGER_CLASS:
6617 case X86_64_INTEGERSI_CLASS:
6618 return gen_rtx_REG (mode, intreg[0]);
6619 case X86_64_SSE_CLASS:
6620 case X86_64_SSESF_CLASS:
6621 case X86_64_SSEDF_CLASS:
6622 if (mode != BLKmode)
6623 return gen_reg_or_parallel (mode, orig_mode,
6624 SSE_REGNO (sse_regno));
6626 case X86_64_X87_CLASS:
6627 case X86_64_COMPLEX_X87_CLASS:
6628 return gen_rtx_REG (mode, FIRST_STACK_REG);
6629 case X86_64_NO_CLASS:
6630 /* Zero sized array, struct or class. */
6635 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6636 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6637 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6639 && regclass[0] == X86_64_SSE_CLASS
6640 && regclass[1] == X86_64_SSEUP_CLASS
6641 && regclass[2] == X86_64_SSEUP_CLASS
6642 && regclass[3] == X86_64_SSEUP_CLASS
6644 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6647 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6648 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6649 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6650 && regclass[1] == X86_64_INTEGER_CLASS
6651 && (mode == CDImode || mode == TImode || mode == TFmode)
6652 && intreg[0] + 1 == intreg[1])
6653 return gen_rtx_REG (mode, intreg[0]);
6655 /* Otherwise figure out the entries of the PARALLEL. */
6656 for (i = 0; i < n; i++)
6660 switch (regclass[i])
6662 case X86_64_NO_CLASS:
6664 case X86_64_INTEGER_CLASS:
6665 case X86_64_INTEGERSI_CLASS:
6666 /* Merge TImodes on aligned occasions here too. */
6667 if (i * 8 + 8 > bytes)
6668 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6669 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6673 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6674 if (tmpmode == BLKmode)
6676 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6677 gen_rtx_REG (tmpmode, *intreg),
6681 case X86_64_SSESF_CLASS:
6682 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6683 gen_rtx_REG (SFmode,
6684 SSE_REGNO (sse_regno)),
6688 case X86_64_SSEDF_CLASS:
6689 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6690 gen_rtx_REG (DFmode,
6691 SSE_REGNO (sse_regno)),
6695 case X86_64_SSE_CLASS:
6703 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6713 && regclass[1] == X86_64_SSEUP_CLASS
6714 && regclass[2] == X86_64_SSEUP_CLASS
6715 && regclass[3] == X86_64_SSEUP_CLASS);
6722 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6723 gen_rtx_REG (tmpmode,
6724 SSE_REGNO (sse_regno)),
6733 /* Empty aligned struct, union or class. */
6737 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6738 for (i = 0; i < nexps; i++)
6739 XVECEXP (ret, 0, i) = exp [i];
6743 /* Update the data in CUM to advance over an argument of mode MODE
6744 and data type TYPE. (TYPE is null for libcalls where that information
6745 may not be available.) */
6748 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6749 const_tree type, HOST_WIDE_INT bytes,
6750 HOST_WIDE_INT words)
6766 cum->words += words;
6767 cum->nregs -= words;
6768 cum->regno += words;
6770 if (cum->nregs <= 0)
6778 /* OImode shouldn't be used directly. */
6782 if (cum->float_in_sse < 2)
6785 if (cum->float_in_sse < 1)
6802 if (!type || !AGGREGATE_TYPE_P (type))
6804 cum->sse_words += words;
6805 cum->sse_nregs -= 1;
6806 cum->sse_regno += 1;
6807 if (cum->sse_nregs <= 0)
6821 if (!type || !AGGREGATE_TYPE_P (type))
6823 cum->mmx_words += words;
6824 cum->mmx_nregs -= 1;
6825 cum->mmx_regno += 1;
6826 if (cum->mmx_nregs <= 0)
6837 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6838 const_tree type, HOST_WIDE_INT words, bool named)
6840 int int_nregs, sse_nregs;
6842 /* Unnamed 256bit vector mode parameters are passed on stack. */
6843 if (!named && VALID_AVX256_REG_MODE (mode))
6846 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6847 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6849 cum->nregs -= int_nregs;
6850 cum->sse_nregs -= sse_nregs;
6851 cum->regno += int_nregs;
6852 cum->sse_regno += sse_nregs;
6856 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6857 cum->words = (cum->words + align - 1) & ~(align - 1);
6858 cum->words += words;
6863 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6864 HOST_WIDE_INT words)
6866 /* Otherwise, this should be passed indirect. */
6867 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6869 cum->words += words;
6877 /* Update the data in CUM to advance over an argument of mode MODE and
6878 data type TYPE. (TYPE is null for libcalls where that information
6879 may not be available.) */
6882 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6883 const_tree type, bool named)
6885 HOST_WIDE_INT bytes, words;
6887 if (mode == BLKmode)
6888 bytes = int_size_in_bytes (type);
6890 bytes = GET_MODE_SIZE (mode);
6891 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6894 mode = type_natural_mode (type, NULL);
6896 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6897 function_arg_advance_ms_64 (cum, bytes, words);
6898 else if (TARGET_64BIT)
6899 function_arg_advance_64 (cum, mode, type, words, named);
6901 function_arg_advance_32 (cum, mode, type, bytes, words);
6904 /* Define where to put the arguments to a function.
6905 Value is zero to push the argument on the stack,
6906 or a hard register in which to store the argument.
6908 MODE is the argument's machine mode.
6909 TYPE is the data type of the argument (as a tree).
6910 This is null for libcalls where that information may
6912 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6913 the preceding args and about the function being called.
6914 NAMED is nonzero if this argument is a named parameter
6915 (otherwise it is an extra parameter matching an ellipsis). */
6918 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6919 enum machine_mode orig_mode, const_tree type,
6920 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6922 static bool warnedsse, warnedmmx;
6924 /* Avoid the AL settings for the Unix64 ABI. */
6925 if (mode == VOIDmode)
6941 if (words <= cum->nregs)
6943 int regno = cum->regno;
6945 /* Fastcall allocates the first two DWORD (SImode) or
6946 smaller arguments to ECX and EDX if it isn't an
6952 || (type && AGGREGATE_TYPE_P (type)))
6955 /* ECX not EAX is the first allocated register. */
6956 if (regno == AX_REG)
6959 return gen_rtx_REG (mode, regno);
6964 if (cum->float_in_sse < 2)
6967 if (cum->float_in_sse < 1)
6971 /* In 32bit, we pass TImode in xmm registers. */
6978 if (!type || !AGGREGATE_TYPE_P (type))
6980 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6983 warning (0, "SSE vector argument without SSE enabled "
6987 return gen_reg_or_parallel (mode, orig_mode,
6988 cum->sse_regno + FIRST_SSE_REG);
6993 /* OImode shouldn't be used directly. */
7002 if (!type || !AGGREGATE_TYPE_P (type))
7005 return gen_reg_or_parallel (mode, orig_mode,
7006 cum->sse_regno + FIRST_SSE_REG);
7016 if (!type || !AGGREGATE_TYPE_P (type))
7018 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
7021 warning (0, "MMX vector argument without MMX enabled "
7025 return gen_reg_or_parallel (mode, orig_mode,
7026 cum->mmx_regno + FIRST_MMX_REG);
7035 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7036 enum machine_mode orig_mode, const_tree type, bool named)
7038 /* Handle a hidden AL argument containing number of registers
7039 for varargs x86-64 functions. */
7040 if (mode == VOIDmode)
7041 return GEN_INT (cum->maybe_vaarg
7042 ? (cum->sse_nregs < 0
7043 ? X86_64_SSE_REGPARM_MAX
7058 /* Unnamed 256bit vector mode parameters are passed on stack. */
7064 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7066 &x86_64_int_parameter_registers [cum->regno],
7071 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7072 enum machine_mode orig_mode, bool named,
7073 HOST_WIDE_INT bytes)
7077 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7078 We use value of -2 to specify that current function call is MSABI. */
7079 if (mode == VOIDmode)
7080 return GEN_INT (-2);
7082 /* If we've run out of registers, it goes on the stack. */
7083 if (cum->nregs == 0)
7086 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7088 /* Only floating point modes are passed in anything but integer regs. */
7089 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7092 regno = cum->regno + FIRST_SSE_REG;
7097 /* Unnamed floating parameters are passed in both the
7098 SSE and integer registers. */
7099 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7100 t2 = gen_rtx_REG (mode, regno);
7101 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7102 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7103 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7106 /* Handle aggregated types passed in register. */
7107 if (orig_mode == BLKmode)
7109 if (bytes > 0 && bytes <= 8)
7110 mode = (bytes > 4 ? DImode : SImode);
7111 if (mode == BLKmode)
7115 return gen_reg_or_parallel (mode, orig_mode, regno);
7118 /* Return where to put the arguments to a function.
7119 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7121 MODE is the argument's machine mode. TYPE is the data type of the
7122 argument. It is null for libcalls where that information may not be
7123 available. CUM gives information about the preceding args and about
7124 the function being called. NAMED is nonzero if this argument is a
7125 named parameter (otherwise it is an extra parameter matching an
7129 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
7130 const_tree type, bool named)
7132 enum machine_mode mode = omode;
7133 HOST_WIDE_INT bytes, words;
7136 if (mode == BLKmode)
7137 bytes = int_size_in_bytes (type);
7139 bytes = GET_MODE_SIZE (mode);
7140 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7142 /* To simplify the code below, represent vector types with a vector mode
7143 even if MMX/SSE are not active. */
7144 if (type && TREE_CODE (type) == VECTOR_TYPE)
7145 mode = type_natural_mode (type, cum);
7147 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7148 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7149 else if (TARGET_64BIT)
7150 arg = function_arg_64 (cum, mode, omode, type, named);
7152 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7154 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
7156 /* This argument uses 256bit AVX modes. */
7158 cfun->machine->callee_pass_avx256_p = true;
7160 cfun->machine->caller_pass_avx256_p = true;
7166 /* A C expression that indicates when an argument must be passed by
7167 reference. If nonzero for an argument, a copy of that argument is
7168 made in memory and a pointer to the argument is passed instead of
7169 the argument itself. The pointer is passed in whatever way is
7170 appropriate for passing a pointer to that type. */
7173 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
7174 enum machine_mode mode ATTRIBUTE_UNUSED,
7175 const_tree type, bool named ATTRIBUTE_UNUSED)
7177 /* See Windows x64 Software Convention. */
7178 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7180 int msize = (int) GET_MODE_SIZE (mode);
7183 /* Arrays are passed by reference. */
7184 if (TREE_CODE (type) == ARRAY_TYPE)
7187 if (AGGREGATE_TYPE_P (type))
7189 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7190 are passed by reference. */
7191 msize = int_size_in_bytes (type);
7195 /* __m128 is passed by reference. */
7197 case 1: case 2: case 4: case 8:
7203 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7209 /* Return true when TYPE should be 128bit aligned for 32bit argument
7210 passing ABI. XXX: This function is obsolete and is only used for
7211 checking psABI compatibility with previous versions of GCC. */
7214 ix86_compat_aligned_value_p (const_tree type)
7216 enum machine_mode mode = TYPE_MODE (type);
7217 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7221 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7223 if (TYPE_ALIGN (type) < 128)
7226 if (AGGREGATE_TYPE_P (type))
7228 /* Walk the aggregates recursively. */
7229 switch (TREE_CODE (type))
7233 case QUAL_UNION_TYPE:
7237 /* Walk all the structure fields. */
7238 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7240 if (TREE_CODE (field) == FIELD_DECL
7241 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7248 /* Just for use if some languages passes arrays by value. */
7249 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7260 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7261 XXX: This function is obsolete and is only used for checking psABI
7262 compatibility with previous versions of GCC. */
7265 ix86_compat_function_arg_boundary (enum machine_mode mode,
7266 const_tree type, unsigned int align)
7268 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7269 natural boundaries. */
7270 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7272 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7273 make an exception for SSE modes since these require 128bit
7276 The handling here differs from field_alignment. ICC aligns MMX
7277 arguments to 4 byte boundaries, while structure fields are aligned
7278 to 8 byte boundaries. */
7281 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7282 align = PARM_BOUNDARY;
7286 if (!ix86_compat_aligned_value_p (type))
7287 align = PARM_BOUNDARY;
7290 if (align > BIGGEST_ALIGNMENT)
7291 align = BIGGEST_ALIGNMENT;
7295 /* Return true when TYPE should be 128bit aligned for 32bit argument
7299 ix86_contains_aligned_value_p (const_tree type)
7301 enum machine_mode mode = TYPE_MODE (type);
7303 if (mode == XFmode || mode == XCmode)
7306 if (TYPE_ALIGN (type) < 128)
7309 if (AGGREGATE_TYPE_P (type))
7311 /* Walk the aggregates recursively. */
7312 switch (TREE_CODE (type))
7316 case QUAL_UNION_TYPE:
7320 /* Walk all the structure fields. */
7321 for (field = TYPE_FIELDS (type);
7323 field = DECL_CHAIN (field))
7325 if (TREE_CODE (field) == FIELD_DECL
7326 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7333 /* Just for use if some languages passes arrays by value. */
7334 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7343 return TYPE_ALIGN (type) >= 128;
7348 /* Gives the alignment boundary, in bits, of an argument with the
7349 specified mode and type. */
7352 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7357 /* Since the main variant type is used for call, we convert it to
7358 the main variant type. */
7359 type = TYPE_MAIN_VARIANT (type);
7360 align = TYPE_ALIGN (type);
7363 align = GET_MODE_ALIGNMENT (mode);
7364 if (align < PARM_BOUNDARY)
7365 align = PARM_BOUNDARY;
7369 unsigned int saved_align = align;
7373 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7376 if (mode == XFmode || mode == XCmode)
7377 align = PARM_BOUNDARY;
7379 else if (!ix86_contains_aligned_value_p (type))
7380 align = PARM_BOUNDARY;
7383 align = PARM_BOUNDARY;
7388 && align != ix86_compat_function_arg_boundary (mode, type,
7392 inform (input_location,
7393 "The ABI for passing parameters with %d-byte"
7394 " alignment has changed in GCC 4.6",
7395 align / BITS_PER_UNIT);
7402 /* Return true if N is a possible register number of function value. */
7405 ix86_function_value_regno_p (const unsigned int regno)
7412 case FIRST_FLOAT_REG:
7413 /* TODO: The function should depend on current function ABI but
7414 builtins.c would need updating then. Therefore we use the
7416 if (TARGET_64BIT && ix86_abi == MS_ABI)
7418 return TARGET_FLOAT_RETURNS_IN_80387;
7424 if (TARGET_MACHO || TARGET_64BIT)
7432 /* Define how to find the value returned by a function.
7433 VALTYPE is the data type of the value (as a tree).
7434 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7435 otherwise, FUNC is 0. */
7438 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7439 const_tree fntype, const_tree fn)
7443 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7444 we normally prevent this case when mmx is not available. However
7445 some ABIs may require the result to be returned like DImode. */
7446 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7447 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7449 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7450 we prevent this case when sse is not available. However some ABIs
7451 may require the result to be returned like integer TImode. */
7452 else if (mode == TImode
7453 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7454 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7456 /* 32-byte vector modes in %ymm0. */
7457 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7458 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7460 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7461 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7462 regno = FIRST_FLOAT_REG;
7464 /* Most things go in %eax. */
7467 /* Override FP return register with %xmm0 for local functions when
7468 SSE math is enabled or for functions with sseregparm attribute. */
7469 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7471 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7472 if ((sse_level >= 1 && mode == SFmode)
7473 || (sse_level == 2 && mode == DFmode))
7474 regno = FIRST_SSE_REG;
7477 /* OImode shouldn't be used directly. */
7478 gcc_assert (mode != OImode);
7480 return gen_rtx_REG (orig_mode, regno);
7484 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7489 /* Handle libcalls, which don't provide a type node. */
7490 if (valtype == NULL)
7502 return gen_rtx_REG (mode, FIRST_SSE_REG);
7505 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7509 return gen_rtx_REG (mode, AX_REG);
7513 ret = construct_container (mode, orig_mode, valtype, 1,
7514 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7515 x86_64_int_return_registers, 0);
7517 /* For zero sized structures, construct_container returns NULL, but we
7518 need to keep rest of compiler happy by returning meaningful value. */
7520 ret = gen_rtx_REG (orig_mode, AX_REG);
7526 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7528 unsigned int regno = AX_REG;
7532 switch (GET_MODE_SIZE (mode))
7535 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7536 && !COMPLEX_MODE_P (mode))
7537 regno = FIRST_SSE_REG;
7541 if (mode == SFmode || mode == DFmode)
7542 regno = FIRST_SSE_REG;
7548 return gen_rtx_REG (orig_mode, regno);
7552 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7553 enum machine_mode orig_mode, enum machine_mode mode)
7555 const_tree fn, fntype;
7558 if (fntype_or_decl && DECL_P (fntype_or_decl))
7559 fn = fntype_or_decl;
7560 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7562 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7563 return function_value_ms_64 (orig_mode, mode);
7564 else if (TARGET_64BIT)
7565 return function_value_64 (orig_mode, mode, valtype);
7567 return function_value_32 (orig_mode, mode, fntype, fn);
7571 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7572 bool outgoing ATTRIBUTE_UNUSED)
7574 enum machine_mode mode, orig_mode;
7576 orig_mode = TYPE_MODE (valtype);
7577 mode = type_natural_mode (valtype, NULL);
7578 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7582 ix86_libcall_value (enum machine_mode mode)
7584 return ix86_function_value_1 (NULL, NULL, mode, mode);
7587 /* Return true iff type is returned in memory. */
7589 static bool ATTRIBUTE_UNUSED
7590 return_in_memory_32 (const_tree type, enum machine_mode mode)
7594 if (mode == BLKmode)
7597 size = int_size_in_bytes (type);
7599 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7602 if (VECTOR_MODE_P (mode) || mode == TImode)
7604 /* User-created vectors small enough to fit in EAX. */
7608 /* MMX/3dNow values are returned in MM0,
7609 except when it doesn't exits or the ABI prescribes otherwise. */
7611 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7613 /* SSE values are returned in XMM0, except when it doesn't exist. */
7617 /* AVX values are returned in YMM0, except when it doesn't exist. */
7628 /* OImode shouldn't be used directly. */
7629 gcc_assert (mode != OImode);
7634 static bool ATTRIBUTE_UNUSED
7635 return_in_memory_64 (const_tree type, enum machine_mode mode)
7637 int needed_intregs, needed_sseregs;
7638 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7641 static bool ATTRIBUTE_UNUSED
7642 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7644 HOST_WIDE_INT size = int_size_in_bytes (type);
7646 /* __m128 is returned in xmm0. */
7647 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7648 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7651 /* Otherwise, the size must be exactly in [1248]. */
7652 return size != 1 && size != 2 && size != 4 && size != 8;
7656 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7658 #ifdef SUBTARGET_RETURN_IN_MEMORY
7659 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7661 const enum machine_mode mode = type_natural_mode (type, NULL);
7665 if (ix86_function_type_abi (fntype) == MS_ABI)
7666 return return_in_memory_ms_64 (type, mode);
7668 return return_in_memory_64 (type, mode);
7671 return return_in_memory_32 (type, mode);
7675 /* When returning SSE vector types, we have a choice of either
7676 (1) being abi incompatible with a -march switch, or
7677 (2) generating an error.
7678 Given no good solution, I think the safest thing is one warning.
7679 The user won't be able to use -Werror, but....
7681 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7682 called in response to actually generating a caller or callee that
7683 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7684 via aggregate_value_p for general type probing from tree-ssa. */
7687 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7689 static bool warnedsse, warnedmmx;
7691 if (!TARGET_64BIT && type)
7693 /* Look at the return type of the function, not the function type. */
7694 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7696 if (!TARGET_SSE && !warnedsse)
7699 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7702 warning (0, "SSE vector return without SSE enabled "
7707 if (!TARGET_MMX && !warnedmmx)
7709 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7712 warning (0, "MMX vector return without MMX enabled "
7722 /* Create the va_list data type. */
7724 /* Returns the calling convention specific va_list date type.
7725 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7728 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7730 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7732 /* For i386 we use plain pointer to argument area. */
7733 if (!TARGET_64BIT || abi == MS_ABI)
7734 return build_pointer_type (char_type_node);
7736 record = lang_hooks.types.make_type (RECORD_TYPE);
7737 type_decl = build_decl (BUILTINS_LOCATION,
7738 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7740 f_gpr = build_decl (BUILTINS_LOCATION,
7741 FIELD_DECL, get_identifier ("gp_offset"),
7742 unsigned_type_node);
7743 f_fpr = build_decl (BUILTINS_LOCATION,
7744 FIELD_DECL, get_identifier ("fp_offset"),
7745 unsigned_type_node);
7746 f_ovf = build_decl (BUILTINS_LOCATION,
7747 FIELD_DECL, get_identifier ("overflow_arg_area"),
7749 f_sav = build_decl (BUILTINS_LOCATION,
7750 FIELD_DECL, get_identifier ("reg_save_area"),
7753 va_list_gpr_counter_field = f_gpr;
7754 va_list_fpr_counter_field = f_fpr;
7756 DECL_FIELD_CONTEXT (f_gpr) = record;
7757 DECL_FIELD_CONTEXT (f_fpr) = record;
7758 DECL_FIELD_CONTEXT (f_ovf) = record;
7759 DECL_FIELD_CONTEXT (f_sav) = record;
7761 TYPE_STUB_DECL (record) = type_decl;
7762 TYPE_NAME (record) = type_decl;
7763 TYPE_FIELDS (record) = f_gpr;
7764 DECL_CHAIN (f_gpr) = f_fpr;
7765 DECL_CHAIN (f_fpr) = f_ovf;
7766 DECL_CHAIN (f_ovf) = f_sav;
7768 layout_type (record);
7770 /* The correct type is an array type of one element. */
7771 return build_array_type (record, build_index_type (size_zero_node));
7774 /* Setup the builtin va_list data type and for 64-bit the additional
7775 calling convention specific va_list data types. */
7778 ix86_build_builtin_va_list (void)
7780 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7782 /* Initialize abi specific va_list builtin types. */
7786 if (ix86_abi == MS_ABI)
7788 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7789 if (TREE_CODE (t) != RECORD_TYPE)
7790 t = build_variant_type_copy (t);
7791 sysv_va_list_type_node = t;
7796 if (TREE_CODE (t) != RECORD_TYPE)
7797 t = build_variant_type_copy (t);
7798 sysv_va_list_type_node = t;
7800 if (ix86_abi != MS_ABI)
7802 t = ix86_build_builtin_va_list_abi (MS_ABI);
7803 if (TREE_CODE (t) != RECORD_TYPE)
7804 t = build_variant_type_copy (t);
7805 ms_va_list_type_node = t;
7810 if (TREE_CODE (t) != RECORD_TYPE)
7811 t = build_variant_type_copy (t);
7812 ms_va_list_type_node = t;
7819 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7822 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7828 /* GPR size of varargs save area. */
7829 if (cfun->va_list_gpr_size)
7830 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7832 ix86_varargs_gpr_size = 0;
7834 /* FPR size of varargs save area. We don't need it if we don't pass
7835 anything in SSE registers. */
7836 if (TARGET_SSE && cfun->va_list_fpr_size)
7837 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7839 ix86_varargs_fpr_size = 0;
7841 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7844 save_area = frame_pointer_rtx;
7845 set = get_varargs_alias_set ();
7847 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7848 if (max > X86_64_REGPARM_MAX)
7849 max = X86_64_REGPARM_MAX;
7851 for (i = cum->regno; i < max; i++)
7853 mem = gen_rtx_MEM (Pmode,
7854 plus_constant (save_area, i * UNITS_PER_WORD));
7855 MEM_NOTRAP_P (mem) = 1;
7856 set_mem_alias_set (mem, set);
7857 emit_move_insn (mem, gen_rtx_REG (Pmode,
7858 x86_64_int_parameter_registers[i]));
7861 if (ix86_varargs_fpr_size)
7863 enum machine_mode smode;
7866 /* Now emit code to save SSE registers. The AX parameter contains number
7867 of SSE parameter registers used to call this function, though all we
7868 actually check here is the zero/non-zero status. */
7870 label = gen_label_rtx ();
7871 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7872 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7875 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7876 we used movdqa (i.e. TImode) instead? Perhaps even better would
7877 be if we could determine the real mode of the data, via a hook
7878 into pass_stdarg. Ignore all that for now. */
7880 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7881 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7883 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7884 if (max > X86_64_SSE_REGPARM_MAX)
7885 max = X86_64_SSE_REGPARM_MAX;
7887 for (i = cum->sse_regno; i < max; ++i)
7889 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7890 mem = gen_rtx_MEM (smode, mem);
7891 MEM_NOTRAP_P (mem) = 1;
7892 set_mem_alias_set (mem, set);
7893 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7895 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7903 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7905 alias_set_type set = get_varargs_alias_set ();
7908 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7912 mem = gen_rtx_MEM (Pmode,
7913 plus_constant (virtual_incoming_args_rtx,
7914 i * UNITS_PER_WORD));
7915 MEM_NOTRAP_P (mem) = 1;
7916 set_mem_alias_set (mem, set);
7918 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7919 emit_move_insn (mem, reg);
7924 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7925 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7928 CUMULATIVE_ARGS next_cum;
7931 /* This argument doesn't appear to be used anymore. Which is good,
7932 because the old code here didn't suppress rtl generation. */
7933 gcc_assert (!no_rtl);
7938 fntype = TREE_TYPE (current_function_decl);
7940 /* For varargs, we do not want to skip the dummy va_dcl argument.
7941 For stdargs, we do want to skip the last named argument. */
7943 if (stdarg_p (fntype))
7944 ix86_function_arg_advance (&next_cum, mode, type, true);
7946 if (cum->call_abi == MS_ABI)
7947 setup_incoming_varargs_ms_64 (&next_cum);
7949 setup_incoming_varargs_64 (&next_cum);
7952 /* Checks if TYPE is of kind va_list char *. */
7955 is_va_list_char_pointer (tree type)
7959 /* For 32-bit it is always true. */
7962 canonic = ix86_canonical_va_list_type (type);
7963 return (canonic == ms_va_list_type_node
7964 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7967 /* Implement va_start. */
7970 ix86_va_start (tree valist, rtx nextarg)
7972 HOST_WIDE_INT words, n_gpr, n_fpr;
7973 tree f_gpr, f_fpr, f_ovf, f_sav;
7974 tree gpr, fpr, ovf, sav, t;
7978 if (flag_split_stack
7979 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7981 unsigned int scratch_regno;
7983 /* When we are splitting the stack, we can't refer to the stack
7984 arguments using internal_arg_pointer, because they may be on
7985 the old stack. The split stack prologue will arrange to
7986 leave a pointer to the old stack arguments in a scratch
7987 register, which we here copy to a pseudo-register. The split
7988 stack prologue can't set the pseudo-register directly because
7989 it (the prologue) runs before any registers have been saved. */
7991 scratch_regno = split_stack_prologue_scratch_regno ();
7992 if (scratch_regno != INVALID_REGNUM)
7996 reg = gen_reg_rtx (Pmode);
7997 cfun->machine->split_stack_varargs_pointer = reg;
8000 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8004 push_topmost_sequence ();
8005 emit_insn_after (seq, entry_of_function ());
8006 pop_topmost_sequence ();
8010 /* Only 64bit target needs something special. */
8011 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8013 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8014 std_expand_builtin_va_start (valist, nextarg);
8019 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8020 next = expand_binop (ptr_mode, add_optab,
8021 cfun->machine->split_stack_varargs_pointer,
8022 crtl->args.arg_offset_rtx,
8023 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8024 convert_move (va_r, next, 0);
8029 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8030 f_fpr = DECL_CHAIN (f_gpr);
8031 f_ovf = DECL_CHAIN (f_fpr);
8032 f_sav = DECL_CHAIN (f_ovf);
8034 valist = build_simple_mem_ref (valist);
8035 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8036 /* The following should be folded into the MEM_REF offset. */
8037 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8039 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8041 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8043 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8046 /* Count number of gp and fp argument registers used. */
8047 words = crtl->args.info.words;
8048 n_gpr = crtl->args.info.regno;
8049 n_fpr = crtl->args.info.sse_regno;
8051 if (cfun->va_list_gpr_size)
8053 type = TREE_TYPE (gpr);
8054 t = build2 (MODIFY_EXPR, type,
8055 gpr, build_int_cst (type, n_gpr * 8));
8056 TREE_SIDE_EFFECTS (t) = 1;
8057 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8060 if (TARGET_SSE && cfun->va_list_fpr_size)
8062 type = TREE_TYPE (fpr);
8063 t = build2 (MODIFY_EXPR, type, fpr,
8064 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8065 TREE_SIDE_EFFECTS (t) = 1;
8066 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8069 /* Find the overflow area. */
8070 type = TREE_TYPE (ovf);
8071 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8072 ovf_rtx = crtl->args.internal_arg_pointer;
8074 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8075 t = make_tree (type, ovf_rtx);
8077 t = build2 (POINTER_PLUS_EXPR, type, t,
8078 size_int (words * UNITS_PER_WORD));
8079 t = build2 (MODIFY_EXPR, type, ovf, t);
8080 TREE_SIDE_EFFECTS (t) = 1;
8081 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8083 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8085 /* Find the register save area.
8086 Prologue of the function save it right above stack frame. */
8087 type = TREE_TYPE (sav);
8088 t = make_tree (type, frame_pointer_rtx);
8089 if (!ix86_varargs_gpr_size)
8090 t = build2 (POINTER_PLUS_EXPR, type, t,
8091 size_int (-8 * X86_64_REGPARM_MAX));
8092 t = build2 (MODIFY_EXPR, type, sav, t);
8093 TREE_SIDE_EFFECTS (t) = 1;
8094 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8098 /* Implement va_arg. */
8101 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8104 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8105 tree f_gpr, f_fpr, f_ovf, f_sav;
8106 tree gpr, fpr, ovf, sav, t;
8108 tree lab_false, lab_over = NULL_TREE;
8113 enum machine_mode nat_mode;
8114 unsigned int arg_boundary;
8116 /* Only 64bit target needs something special. */
8117 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8118 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8120 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8121 f_fpr = DECL_CHAIN (f_gpr);
8122 f_ovf = DECL_CHAIN (f_fpr);
8123 f_sav = DECL_CHAIN (f_ovf);
8125 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8126 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8127 valist = build_va_arg_indirect_ref (valist);
8128 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8129 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8130 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8132 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8134 type = build_pointer_type (type);
8135 size = int_size_in_bytes (type);
8136 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8138 nat_mode = type_natural_mode (type, NULL);
8147 /* Unnamed 256bit vector mode parameters are passed on stack. */
8148 if (!TARGET_64BIT_MS_ABI)
8155 container = construct_container (nat_mode, TYPE_MODE (type),
8156 type, 0, X86_64_REGPARM_MAX,
8157 X86_64_SSE_REGPARM_MAX, intreg,
8162 /* Pull the value out of the saved registers. */
8164 addr = create_tmp_var (ptr_type_node, "addr");
8168 int needed_intregs, needed_sseregs;
8170 tree int_addr, sse_addr;
8172 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8173 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8175 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8177 need_temp = (!REG_P (container)
8178 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8179 || TYPE_ALIGN (type) > 128));
8181 /* In case we are passing structure, verify that it is consecutive block
8182 on the register save area. If not we need to do moves. */
8183 if (!need_temp && !REG_P (container))
8185 /* Verify that all registers are strictly consecutive */
8186 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8190 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8192 rtx slot = XVECEXP (container, 0, i);
8193 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8194 || INTVAL (XEXP (slot, 1)) != i * 16)
8202 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8204 rtx slot = XVECEXP (container, 0, i);
8205 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8206 || INTVAL (XEXP (slot, 1)) != i * 8)
8218 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8219 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8222 /* First ensure that we fit completely in registers. */
8225 t = build_int_cst (TREE_TYPE (gpr),
8226 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8227 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8228 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8229 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8230 gimplify_and_add (t, pre_p);
8234 t = build_int_cst (TREE_TYPE (fpr),
8235 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8236 + X86_64_REGPARM_MAX * 8);
8237 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8238 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8239 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8240 gimplify_and_add (t, pre_p);
8243 /* Compute index to start of area used for integer regs. */
8246 /* int_addr = gpr + sav; */
8247 t = fold_convert (sizetype, gpr);
8248 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8249 gimplify_assign (int_addr, t, pre_p);
8253 /* sse_addr = fpr + sav; */
8254 t = fold_convert (sizetype, fpr);
8255 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8256 gimplify_assign (sse_addr, t, pre_p);
8260 int i, prev_size = 0;
8261 tree temp = create_tmp_var (type, "va_arg_tmp");
8264 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8265 gimplify_assign (addr, t, pre_p);
8267 for (i = 0; i < XVECLEN (container, 0); i++)
8269 rtx slot = XVECEXP (container, 0, i);
8270 rtx reg = XEXP (slot, 0);
8271 enum machine_mode mode = GET_MODE (reg);
8277 tree dest_addr, dest;
8278 int cur_size = GET_MODE_SIZE (mode);
8280 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8281 prev_size = INTVAL (XEXP (slot, 1));
8282 if (prev_size + cur_size > size)
8284 cur_size = size - prev_size;
8285 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8286 if (mode == BLKmode)
8289 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8290 if (mode == GET_MODE (reg))
8291 addr_type = build_pointer_type (piece_type);
8293 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8295 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8298 if (SSE_REGNO_P (REGNO (reg)))
8300 src_addr = sse_addr;
8301 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8305 src_addr = int_addr;
8306 src_offset = REGNO (reg) * 8;
8308 src_addr = fold_convert (addr_type, src_addr);
8309 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
8310 size_int (src_offset));
8312 dest_addr = fold_convert (daddr_type, addr);
8313 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
8314 size_int (prev_size));
8315 if (cur_size == GET_MODE_SIZE (mode))
8317 src = build_va_arg_indirect_ref (src_addr);
8318 dest = build_va_arg_indirect_ref (dest_addr);
8320 gimplify_assign (dest, src, pre_p);
8325 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
8326 3, dest_addr, src_addr,
8327 size_int (cur_size));
8328 gimplify_and_add (copy, pre_p);
8330 prev_size += cur_size;
8336 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8337 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8338 gimplify_assign (gpr, t, pre_p);
8343 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8344 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8345 gimplify_assign (fpr, t, pre_p);
8348 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8350 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8353 /* ... otherwise out of the overflow area. */
8355 /* When we align parameter on stack for caller, if the parameter
8356 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8357 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8358 here with caller. */
8359 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8360 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8361 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8363 /* Care for on-stack alignment if needed. */
8364 if (arg_boundary <= 64 || size == 0)
8368 HOST_WIDE_INT align = arg_boundary / 8;
8369 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
8370 size_int (align - 1));
8371 t = fold_convert (sizetype, t);
8372 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8374 t = fold_convert (TREE_TYPE (ovf), t);
8377 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8378 gimplify_assign (addr, t, pre_p);
8380 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
8381 size_int (rsize * UNITS_PER_WORD));
8382 gimplify_assign (unshare_expr (ovf), t, pre_p);
8385 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8387 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8388 addr = fold_convert (ptrtype, addr);
8391 addr = build_va_arg_indirect_ref (addr);
8392 return build_va_arg_indirect_ref (addr);
8395 /* Return true if OPNUM's MEM should be matched
8396 in movabs* patterns. */
8399 ix86_check_movabs (rtx insn, int opnum)
8403 set = PATTERN (insn);
8404 if (GET_CODE (set) == PARALLEL)
8405 set = XVECEXP (set, 0, 0);
8406 gcc_assert (GET_CODE (set) == SET);
8407 mem = XEXP (set, opnum);
8408 while (GET_CODE (mem) == SUBREG)
8409 mem = SUBREG_REG (mem);
8410 gcc_assert (MEM_P (mem));
8411 return volatile_ok || !MEM_VOLATILE_P (mem);
8414 /* Initialize the table of extra 80387 mathematical constants. */
8417 init_ext_80387_constants (void)
8419 static const char * cst[5] =
8421 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8422 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8423 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8424 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8425 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8429 for (i = 0; i < 5; i++)
8431 real_from_string (&ext_80387_constants_table[i], cst[i]);
8432 /* Ensure each constant is rounded to XFmode precision. */
8433 real_convert (&ext_80387_constants_table[i],
8434 XFmode, &ext_80387_constants_table[i]);
8437 ext_80387_constants_init = 1;
8440 /* Return non-zero if the constant is something that
8441 can be loaded with a special instruction. */
8444 standard_80387_constant_p (rtx x)
8446 enum machine_mode mode = GET_MODE (x);
8450 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8453 if (x == CONST0_RTX (mode))
8455 if (x == CONST1_RTX (mode))
8458 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8460 /* For XFmode constants, try to find a special 80387 instruction when
8461 optimizing for size or on those CPUs that benefit from them. */
8463 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8467 if (! ext_80387_constants_init)
8468 init_ext_80387_constants ();
8470 for (i = 0; i < 5; i++)
8471 if (real_identical (&r, &ext_80387_constants_table[i]))
8475 /* Load of the constant -0.0 or -1.0 will be split as
8476 fldz;fchs or fld1;fchs sequence. */
8477 if (real_isnegzero (&r))
8479 if (real_identical (&r, &dconstm1))
8485 /* Return the opcode of the special instruction to be used to load
8489 standard_80387_constant_opcode (rtx x)
8491 switch (standard_80387_constant_p (x))
8515 /* Return the CONST_DOUBLE representing the 80387 constant that is
8516 loaded by the specified special instruction. The argument IDX
8517 matches the return value from standard_80387_constant_p. */
8520 standard_80387_constant_rtx (int idx)
8524 if (! ext_80387_constants_init)
8525 init_ext_80387_constants ();
8541 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8545 /* Return 1 if X is all 0s and 2 if x is all 1s
8546 in supported SSE vector mode. */
8549 standard_sse_constant_p (rtx x)
8551 enum machine_mode mode = GET_MODE (x);
8553 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8555 if (vector_all_ones_operand (x, mode))
8571 /* Return the opcode of the special instruction to be used to load
8575 standard_sse_constant_opcode (rtx insn, rtx x)
8577 switch (standard_sse_constant_p (x))
8580 switch (get_attr_mode (insn))
8583 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8585 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8586 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8588 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
8590 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8591 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8593 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
8595 return "vxorps\t%x0, %x0, %x0";
8597 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8598 return "vxorps\t%x0, %x0, %x0";
8600 return "vxorpd\t%x0, %x0, %x0";
8602 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8603 return "vxorps\t%x0, %x0, %x0";
8605 return "vpxor\t%x0, %x0, %x0";
8610 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
8617 /* Returns true if OP contains a symbol reference */
8620 symbolic_reference_mentioned_p (rtx op)
8625 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8628 fmt = GET_RTX_FORMAT (GET_CODE (op));
8629 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8635 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8636 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8640 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8647 /* Return true if it is appropriate to emit `ret' instructions in the
8648 body of a function. Do this only if the epilogue is simple, needing a
8649 couple of insns. Prior to reloading, we can't tell how many registers
8650 must be saved, so return false then. Return false if there is no frame
8651 marker to de-allocate. */
8654 ix86_can_use_return_insn_p (void)
8656 struct ix86_frame frame;
8658 if (! reload_completed || frame_pointer_needed)
8661 /* Don't allow more than 32k pop, since that's all we can do
8662 with one instruction. */
8663 if (crtl->args.pops_args && crtl->args.size >= 32768)
8666 ix86_compute_frame_layout (&frame);
8667 return (frame.stack_pointer_offset == UNITS_PER_WORD
8668 && (frame.nregs + frame.nsseregs) == 0);
8671 /* Value should be nonzero if functions must have frame pointers.
8672 Zero means the frame pointer need not be set up (and parms may
8673 be accessed via the stack pointer) in functions that seem suitable. */
8676 ix86_frame_pointer_required (void)
8678 /* If we accessed previous frames, then the generated code expects
8679 to be able to access the saved ebp value in our frame. */
8680 if (cfun->machine->accesses_prev_frame)
8683 /* Several x86 os'es need a frame pointer for other reasons,
8684 usually pertaining to setjmp. */
8685 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8688 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8689 turns off the frame pointer by default. Turn it back on now if
8690 we've not got a leaf function. */
8691 if (TARGET_OMIT_LEAF_FRAME_POINTER
8692 && (!current_function_is_leaf
8693 || ix86_current_function_calls_tls_descriptor))
8696 if (crtl->profile && !flag_fentry)
8702 /* Record that the current function accesses previous call frames. */
8705 ix86_setup_frame_addresses (void)
8707 cfun->machine->accesses_prev_frame = 1;
8710 #ifndef USE_HIDDEN_LINKONCE
8711 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
8712 # define USE_HIDDEN_LINKONCE 1
8714 # define USE_HIDDEN_LINKONCE 0
8718 static int pic_labels_used;
8720 /* Fills in the label name that should be used for a pc thunk for
8721 the given register. */
8724 get_pc_thunk_name (char name[32], unsigned int regno)
8726 gcc_assert (!TARGET_64BIT);
8728 if (USE_HIDDEN_LINKONCE)
8729 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8731 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8735 /* This function generates code for -fpic that loads %ebx with
8736 the return address of the caller and then returns. */
8739 ix86_code_end (void)
8744 for (regno = AX_REG; regno <= SP_REG; regno++)
8749 if (!(pic_labels_used & (1 << regno)))
8752 get_pc_thunk_name (name, regno);
8754 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8755 get_identifier (name),
8756 build_function_type_list (void_type_node, NULL_TREE));
8757 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8758 NULL_TREE, void_type_node);
8759 TREE_PUBLIC (decl) = 1;
8760 TREE_STATIC (decl) = 1;
8765 switch_to_section (darwin_sections[text_coal_section]);
8766 fputs ("\t.weak_definition\t", asm_out_file);
8767 assemble_name (asm_out_file, name);
8768 fputs ("\n\t.private_extern\t", asm_out_file);
8769 assemble_name (asm_out_file, name);
8770 putc ('\n', asm_out_file);
8771 ASM_OUTPUT_LABEL (asm_out_file, name);
8772 DECL_WEAK (decl) = 1;
8776 if (USE_HIDDEN_LINKONCE)
8778 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8780 targetm.asm_out.unique_section (decl, 0);
8781 switch_to_section (get_named_section (decl, NULL, 0));
8783 targetm.asm_out.globalize_label (asm_out_file, name);
8784 fputs ("\t.hidden\t", asm_out_file);
8785 assemble_name (asm_out_file, name);
8786 putc ('\n', asm_out_file);
8787 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8791 switch_to_section (text_section);
8792 ASM_OUTPUT_LABEL (asm_out_file, name);
8795 DECL_INITIAL (decl) = make_node (BLOCK);
8796 current_function_decl = decl;
8797 init_function_start (decl);
8798 first_function_block_is_cold = false;
8799 /* Make sure unwind info is emitted for the thunk if needed. */
8800 final_start_function (emit_barrier (), asm_out_file, 1);
8802 /* Pad stack IP move with 4 instructions (two NOPs count
8803 as one instruction). */
8804 if (TARGET_PAD_SHORT_FUNCTION)
8809 fputs ("\tnop\n", asm_out_file);
8812 xops[0] = gen_rtx_REG (Pmode, regno);
8813 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8814 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8815 fputs ("\tret\n", asm_out_file);
8816 final_end_function ();
8817 init_insn_lengths ();
8818 free_after_compilation (cfun);
8820 current_function_decl = NULL;
8823 if (flag_split_stack)
8824 file_end_indicate_split_stack ();
8827 /* Emit code for the SET_GOT patterns. */
8830 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8836 if (TARGET_VXWORKS_RTP && flag_pic)
8838 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8839 xops[2] = gen_rtx_MEM (Pmode,
8840 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8841 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8843 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8844 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8845 an unadorned address. */
8846 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8847 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8848 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8852 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8854 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8856 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8859 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8862 output_asm_insn ("call\t%a2", xops);
8863 #ifdef DWARF2_UNWIND_INFO
8864 /* The call to next label acts as a push. */
8865 if (dwarf2out_do_frame ())
8869 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8870 gen_rtx_PLUS (Pmode,
8873 RTX_FRAME_RELATED_P (insn) = 1;
8874 dwarf2out_frame_debug (insn, true);
8881 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8882 is what will be referenced by the Mach-O PIC subsystem. */
8884 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8887 targetm.asm_out.internal_label (asm_out_file, "L",
8888 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8892 output_asm_insn ("pop%z0\t%0", xops);
8893 #ifdef DWARF2_UNWIND_INFO
8894 /* The pop is a pop and clobbers dest, but doesn't restore it
8895 for unwind info purposes. */
8896 if (dwarf2out_do_frame ())
8900 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8901 dwarf2out_frame_debug (insn, true);
8902 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8903 gen_rtx_PLUS (Pmode,
8906 RTX_FRAME_RELATED_P (insn) = 1;
8907 dwarf2out_frame_debug (insn, true);
8916 get_pc_thunk_name (name, REGNO (dest));
8917 pic_labels_used |= 1 << REGNO (dest);
8919 #ifdef DWARF2_UNWIND_INFO
8920 /* Ensure all queued register saves are flushed before the
8922 if (dwarf2out_do_frame ())
8923 dwarf2out_flush_queued_reg_saves ();
8925 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8926 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8927 output_asm_insn ("call\t%X2", xops);
8928 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8929 is what will be referenced by the Mach-O PIC subsystem. */
8932 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8934 targetm.asm_out.internal_label (asm_out_file, "L",
8935 CODE_LABEL_NUMBER (label));
8942 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8943 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8945 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8950 /* Generate an "push" pattern for input ARG. */
8955 struct machine_function *m = cfun->machine;
8957 if (m->fs.cfa_reg == stack_pointer_rtx)
8958 m->fs.cfa_offset += UNITS_PER_WORD;
8959 m->fs.sp_offset += UNITS_PER_WORD;
8961 return gen_rtx_SET (VOIDmode,
8963 gen_rtx_PRE_DEC (Pmode,
8964 stack_pointer_rtx)),
8968 /* Generate an "pop" pattern for input ARG. */
8973 return gen_rtx_SET (VOIDmode,
8976 gen_rtx_POST_INC (Pmode,
8977 stack_pointer_rtx)));
8980 /* Return >= 0 if there is an unused call-clobbered register available
8981 for the entire function. */
8984 ix86_select_alt_pic_regnum (void)
8986 if (current_function_is_leaf
8988 && !ix86_current_function_calls_tls_descriptor)
8991 /* Can't use the same register for both PIC and DRAP. */
8993 drap = REGNO (crtl->drap_reg);
8996 for (i = 2; i >= 0; --i)
8997 if (i != drap && !df_regs_ever_live_p (i))
9001 return INVALID_REGNUM;
9004 /* Return 1 if we need to save REGNO. */
9006 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9008 if (pic_offset_table_rtx
9009 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9010 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9012 || crtl->calls_eh_return
9013 || crtl->uses_const_pool))
9015 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
9020 if (crtl->calls_eh_return && maybe_eh_return)
9025 unsigned test = EH_RETURN_DATA_REGNO (i);
9026 if (test == INVALID_REGNUM)
9033 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9036 return (df_regs_ever_live_p (regno)
9037 && !call_used_regs[regno]
9038 && !fixed_regs[regno]
9039 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9042 /* Return number of saved general prupose registers. */
9045 ix86_nsaved_regs (void)
9050 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9051 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9056 /* Return number of saved SSE registrers. */
9059 ix86_nsaved_sseregs (void)
9064 if (!TARGET_64BIT_MS_ABI)
9066 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9067 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9072 /* Given FROM and TO register numbers, say whether this elimination is
9073 allowed. If stack alignment is needed, we can only replace argument
9074 pointer with hard frame pointer, or replace frame pointer with stack
9075 pointer. Otherwise, frame pointer elimination is automatically
9076 handled and all other eliminations are valid. */
9079 ix86_can_eliminate (const int from, const int to)
9081 if (stack_realign_fp)
9082 return ((from == ARG_POINTER_REGNUM
9083 && to == HARD_FRAME_POINTER_REGNUM)
9084 || (from == FRAME_POINTER_REGNUM
9085 && to == STACK_POINTER_REGNUM));
9087 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9090 /* Return the offset between two registers, one to be eliminated, and the other
9091 its replacement, at the start of a routine. */
9094 ix86_initial_elimination_offset (int from, int to)
9096 struct ix86_frame frame;
9097 ix86_compute_frame_layout (&frame);
9099 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9100 return frame.hard_frame_pointer_offset;
9101 else if (from == FRAME_POINTER_REGNUM
9102 && to == HARD_FRAME_POINTER_REGNUM)
9103 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9106 gcc_assert (to == STACK_POINTER_REGNUM);
9108 if (from == ARG_POINTER_REGNUM)
9109 return frame.stack_pointer_offset;
9111 gcc_assert (from == FRAME_POINTER_REGNUM);
9112 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9116 /* In a dynamically-aligned function, we can't know the offset from
9117 stack pointer to frame pointer, so we must ensure that setjmp
9118 eliminates fp against the hard fp (%ebp) rather than trying to
9119 index from %esp up to the top of the frame across a gap that is
9120 of unknown (at compile-time) size. */
9122 ix86_builtin_setjmp_frame_value (void)
9124 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9127 /* On the x86 -fsplit-stack and -fstack-protector both use the same
9128 field in the TCB, so they can not be used together. */
9131 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED,
9132 struct gcc_options *opts ATTRIBUTE_UNUSED)
9136 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
9138 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
9141 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
9144 error ("%<-fsplit-stack%> requires "
9145 "assembler support for CFI directives");
9153 /* When using -fsplit-stack, the allocation routines set a field in
9154 the TCB to the bottom of the stack plus this much space, measured
9157 #define SPLIT_STACK_AVAILABLE 256
9159 /* Fill structure ix86_frame about frame of currently computed function. */
9162 ix86_compute_frame_layout (struct ix86_frame *frame)
9164 unsigned int stack_alignment_needed;
9165 HOST_WIDE_INT offset;
9166 unsigned int preferred_alignment;
9167 HOST_WIDE_INT size = get_frame_size ();
9168 HOST_WIDE_INT to_allocate;
9170 frame->nregs = ix86_nsaved_regs ();
9171 frame->nsseregs = ix86_nsaved_sseregs ();
9173 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9174 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9176 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9177 function prologues and leaf. */
9178 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9179 && (!current_function_is_leaf || cfun->calls_alloca != 0
9180 || ix86_current_function_calls_tls_descriptor))
9182 preferred_alignment = 16;
9183 stack_alignment_needed = 16;
9184 crtl->preferred_stack_boundary = 128;
9185 crtl->stack_alignment_needed = 128;
9188 gcc_assert (!size || stack_alignment_needed);
9189 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9190 gcc_assert (preferred_alignment <= stack_alignment_needed);
9192 /* For SEH we have to limit the amount of code movement into the prologue.
9193 At present we do this via a BLOCKAGE, at which point there's very little
9194 scheduling that can be done, which means that there's very little point
9195 in doing anything except PUSHs. */
9197 cfun->machine->use_fast_prologue_epilogue = false;
9199 /* During reload iteration the amount of registers saved can change.
9200 Recompute the value as needed. Do not recompute when amount of registers
9201 didn't change as reload does multiple calls to the function and does not
9202 expect the decision to change within single iteration. */
9203 else if (!optimize_function_for_size_p (cfun)
9204 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9206 int count = frame->nregs;
9207 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9209 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9211 /* The fast prologue uses move instead of push to save registers. This
9212 is significantly longer, but also executes faster as modern hardware
9213 can execute the moves in parallel, but can't do that for push/pop.
9215 Be careful about choosing what prologue to emit: When function takes
9216 many instructions to execute we may use slow version as well as in
9217 case function is known to be outside hot spot (this is known with
9218 feedback only). Weight the size of function by number of registers
9219 to save as it is cheap to use one or two push instructions but very
9220 slow to use many of them. */
9222 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9223 if (node->frequency < NODE_FREQUENCY_NORMAL
9224 || (flag_branch_probabilities
9225 && node->frequency < NODE_FREQUENCY_HOT))
9226 cfun->machine->use_fast_prologue_epilogue = false;
9228 cfun->machine->use_fast_prologue_epilogue
9229 = !expensive_function_p (count);
9231 if (TARGET_PROLOGUE_USING_MOVE
9232 && cfun->machine->use_fast_prologue_epilogue)
9233 frame->save_regs_using_mov = true;
9235 frame->save_regs_using_mov = false;
9237 /* If static stack checking is enabled and done with probes, the registers
9238 need to be saved before allocating the frame. */
9239 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9240 frame->save_regs_using_mov = false;
9242 /* Skip return address. */
9243 offset = UNITS_PER_WORD;
9245 /* Skip pushed static chain. */
9246 if (ix86_static_chain_on_stack)
9247 offset += UNITS_PER_WORD;
9249 /* Skip saved base pointer. */
9250 if (frame_pointer_needed)
9251 offset += UNITS_PER_WORD;
9252 frame->hfp_save_offset = offset;
9254 /* The traditional frame pointer location is at the top of the frame. */
9255 frame->hard_frame_pointer_offset = offset;
9257 /* Register save area */
9258 offset += frame->nregs * UNITS_PER_WORD;
9259 frame->reg_save_offset = offset;
9261 /* Align and set SSE register save area. */
9262 if (frame->nsseregs)
9264 /* The only ABI that has saved SSE registers (Win64) also has a
9265 16-byte aligned default stack, and thus we don't need to be
9266 within the re-aligned local stack frame to save them. */
9267 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9268 offset = (offset + 16 - 1) & -16;
9269 offset += frame->nsseregs * 16;
9271 frame->sse_reg_save_offset = offset;
9273 /* The re-aligned stack starts here. Values before this point are not
9274 directly comparable with values below this point. In order to make
9275 sure that no value happens to be the same before and after, force
9276 the alignment computation below to add a non-zero value. */
9277 if (stack_realign_fp)
9278 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9281 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9282 offset += frame->va_arg_size;
9284 /* Align start of frame for local function. */
9285 if (stack_realign_fp
9286 || offset != frame->sse_reg_save_offset
9288 || !current_function_is_leaf
9289 || cfun->calls_alloca
9290 || ix86_current_function_calls_tls_descriptor)
9291 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9293 /* Frame pointer points here. */
9294 frame->frame_pointer_offset = offset;
9298 /* Add outgoing arguments area. Can be skipped if we eliminated
9299 all the function calls as dead code.
9300 Skipping is however impossible when function calls alloca. Alloca
9301 expander assumes that last crtl->outgoing_args_size
9302 of stack frame are unused. */
9303 if (ACCUMULATE_OUTGOING_ARGS
9304 && (!current_function_is_leaf || cfun->calls_alloca
9305 || ix86_current_function_calls_tls_descriptor))
9307 offset += crtl->outgoing_args_size;
9308 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9311 frame->outgoing_arguments_size = 0;
9313 /* Align stack boundary. Only needed if we're calling another function
9315 if (!current_function_is_leaf || cfun->calls_alloca
9316 || ix86_current_function_calls_tls_descriptor)
9317 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9319 /* We've reached end of stack frame. */
9320 frame->stack_pointer_offset = offset;
9322 /* Size prologue needs to allocate. */
9323 to_allocate = offset - frame->sse_reg_save_offset;
9325 if ((!to_allocate && frame->nregs <= 1)
9326 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9327 frame->save_regs_using_mov = false;
9329 if (ix86_using_red_zone ()
9330 && current_function_sp_is_unchanging
9331 && current_function_is_leaf
9332 && !ix86_current_function_calls_tls_descriptor)
9334 frame->red_zone_size = to_allocate;
9335 if (frame->save_regs_using_mov)
9336 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9337 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9338 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9341 frame->red_zone_size = 0;
9342 frame->stack_pointer_offset -= frame->red_zone_size;
9344 /* The SEH frame pointer location is near the bottom of the frame.
9345 This is enforced by the fact that the difference between the
9346 stack pointer and the frame pointer is limited to 240 bytes in
9347 the unwind data structure. */
9352 /* If we can leave the frame pointer where it is, do so. */
9353 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9354 if (diff > 240 || (diff & 15) != 0)
9356 /* Ideally we'd determine what portion of the local stack frame
9357 (within the constraint of the lowest 240) is most heavily used.
9358 But without that complication, simply bias the frame pointer
9359 by 128 bytes so as to maximize the amount of the local stack
9360 frame that is addressable with 8-bit offsets. */
9361 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9366 /* This is semi-inlined memory_address_length, but simplified
9367 since we know that we're always dealing with reg+offset, and
9368 to avoid having to create and discard all that rtl. */
9371 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9377 /* EBP and R13 cannot be encoded without an offset. */
9378 len = (regno == BP_REG || regno == R13_REG);
9380 else if (IN_RANGE (offset, -128, 127))
9383 /* ESP and R12 must be encoded with a SIB byte. */
9384 if (regno == SP_REG || regno == R12_REG)
9390 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9391 The valid base registers are taken from CFUN->MACHINE->FS. */
9394 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9396 const struct machine_function *m = cfun->machine;
9397 rtx base_reg = NULL;
9398 HOST_WIDE_INT base_offset = 0;
9400 if (m->use_fast_prologue_epilogue)
9402 /* Choose the base register most likely to allow the most scheduling
9403 opportunities. Generally FP is valid througout the function,
9404 while DRAP must be reloaded within the epilogue. But choose either
9405 over the SP due to increased encoding size. */
9409 base_reg = hard_frame_pointer_rtx;
9410 base_offset = m->fs.fp_offset - cfa_offset;
9412 else if (m->fs.drap_valid)
9414 base_reg = crtl->drap_reg;
9415 base_offset = 0 - cfa_offset;
9417 else if (m->fs.sp_valid)
9419 base_reg = stack_pointer_rtx;
9420 base_offset = m->fs.sp_offset - cfa_offset;
9425 HOST_WIDE_INT toffset;
9428 /* Choose the base register with the smallest address encoding.
9429 With a tie, choose FP > DRAP > SP. */
9432 base_reg = stack_pointer_rtx;
9433 base_offset = m->fs.sp_offset - cfa_offset;
9434 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9436 if (m->fs.drap_valid)
9438 toffset = 0 - cfa_offset;
9439 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9442 base_reg = crtl->drap_reg;
9443 base_offset = toffset;
9449 toffset = m->fs.fp_offset - cfa_offset;
9450 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9453 base_reg = hard_frame_pointer_rtx;
9454 base_offset = toffset;
9459 gcc_assert (base_reg != NULL);
9461 return plus_constant (base_reg, base_offset);
9464 /* Emit code to save registers in the prologue. */
9467 ix86_emit_save_regs (void)
9472 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9473 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9475 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9476 RTX_FRAME_RELATED_P (insn) = 1;
9480 /* Emit a single register save at CFA - CFA_OFFSET. */
9483 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9484 HOST_WIDE_INT cfa_offset)
9486 struct machine_function *m = cfun->machine;
9487 rtx reg = gen_rtx_REG (mode, regno);
9488 rtx mem, addr, base, insn;
9490 addr = choose_baseaddr (cfa_offset);
9491 mem = gen_frame_mem (mode, addr);
9493 /* For SSE saves, we need to indicate the 128-bit alignment. */
9494 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9496 insn = emit_move_insn (mem, reg);
9497 RTX_FRAME_RELATED_P (insn) = 1;
9500 if (GET_CODE (base) == PLUS)
9501 base = XEXP (base, 0);
9502 gcc_checking_assert (REG_P (base));
9504 /* When saving registers into a re-aligned local stack frame, avoid
9505 any tricky guessing by dwarf2out. */
9506 if (m->fs.realigned)
9508 gcc_checking_assert (stack_realign_drap);
9510 if (regno == REGNO (crtl->drap_reg))
9512 /* A bit of a hack. We force the DRAP register to be saved in
9513 the re-aligned stack frame, which provides us with a copy
9514 of the CFA that will last past the prologue. Install it. */
9515 gcc_checking_assert (cfun->machine->fs.fp_valid);
9516 addr = plus_constant (hard_frame_pointer_rtx,
9517 cfun->machine->fs.fp_offset - cfa_offset);
9518 mem = gen_rtx_MEM (mode, addr);
9519 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9523 /* The frame pointer is a stable reference within the
9524 aligned frame. Use it. */
9525 gcc_checking_assert (cfun->machine->fs.fp_valid);
9526 addr = plus_constant (hard_frame_pointer_rtx,
9527 cfun->machine->fs.fp_offset - cfa_offset);
9528 mem = gen_rtx_MEM (mode, addr);
9529 add_reg_note (insn, REG_CFA_EXPRESSION,
9530 gen_rtx_SET (VOIDmode, mem, reg));
9534 /* The memory may not be relative to the current CFA register,
9535 which means that we may need to generate a new pattern for
9536 use by the unwind info. */
9537 else if (base != m->fs.cfa_reg)
9539 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9540 mem = gen_rtx_MEM (mode, addr);
9541 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9545 /* Emit code to save registers using MOV insns.
9546 First register is stored at CFA - CFA_OFFSET. */
9548 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9552 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9553 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9555 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9556 cfa_offset -= UNITS_PER_WORD;
9560 /* Emit code to save SSE registers using MOV insns.
9561 First register is stored at CFA - CFA_OFFSET. */
9563 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9567 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9568 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9570 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9575 static GTY(()) rtx queued_cfa_restores;
9577 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9578 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9579 Don't add the note if the previously saved value will be left untouched
9580 within stack red-zone till return, as unwinders can find the same value
9581 in the register and on the stack. */
9584 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9586 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9591 add_reg_note (insn, REG_CFA_RESTORE, reg);
9592 RTX_FRAME_RELATED_P (insn) = 1;
9596 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9599 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9602 ix86_add_queued_cfa_restore_notes (rtx insn)
9605 if (!queued_cfa_restores)
9607 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9609 XEXP (last, 1) = REG_NOTES (insn);
9610 REG_NOTES (insn) = queued_cfa_restores;
9611 queued_cfa_restores = NULL_RTX;
9612 RTX_FRAME_RELATED_P (insn) = 1;
9615 /* Expand prologue or epilogue stack adjustment.
9616 The pattern exist to put a dependency on all ebp-based memory accesses.
9617 STYLE should be negative if instructions should be marked as frame related,
9618 zero if %r11 register is live and cannot be freely used and positive
9622 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9623 int style, bool set_cfa)
9625 struct machine_function *m = cfun->machine;
9627 bool add_frame_related_expr = false;
9630 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9631 else if (x86_64_immediate_operand (offset, DImode))
9632 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9636 /* r11 is used by indirect sibcall return as well, set before the
9637 epilogue and used after the epilogue. */
9639 tmp = gen_rtx_REG (DImode, R11_REG);
9642 gcc_assert (src != hard_frame_pointer_rtx
9643 && dest != hard_frame_pointer_rtx);
9644 tmp = hard_frame_pointer_rtx;
9646 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9648 add_frame_related_expr = true;
9650 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9653 insn = emit_insn (insn);
9655 ix86_add_queued_cfa_restore_notes (insn);
9661 gcc_assert (m->fs.cfa_reg == src);
9662 m->fs.cfa_offset += INTVAL (offset);
9663 m->fs.cfa_reg = dest;
9665 r = gen_rtx_PLUS (Pmode, src, offset);
9666 r = gen_rtx_SET (VOIDmode, dest, r);
9667 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9668 RTX_FRAME_RELATED_P (insn) = 1;
9672 RTX_FRAME_RELATED_P (insn) = 1;
9673 if (add_frame_related_expr)
9675 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9676 r = gen_rtx_SET (VOIDmode, dest, r);
9677 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9681 if (dest == stack_pointer_rtx)
9683 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9684 bool valid = m->fs.sp_valid;
9686 if (src == hard_frame_pointer_rtx)
9688 valid = m->fs.fp_valid;
9689 ooffset = m->fs.fp_offset;
9691 else if (src == crtl->drap_reg)
9693 valid = m->fs.drap_valid;
9698 /* Else there are two possibilities: SP itself, which we set
9699 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9700 taken care of this by hand along the eh_return path. */
9701 gcc_checking_assert (src == stack_pointer_rtx
9702 || offset == const0_rtx);
9705 m->fs.sp_offset = ooffset - INTVAL (offset);
9706 m->fs.sp_valid = valid;
9710 /* Find an available register to be used as dynamic realign argument
9711 pointer regsiter. Such a register will be written in prologue and
9712 used in begin of body, so it must not be
9713 1. parameter passing register.
9715 We reuse static-chain register if it is available. Otherwise, we
9716 use DI for i386 and R13 for x86-64. We chose R13 since it has
9719 Return: the regno of chosen register. */
9722 find_drap_reg (void)
9724 tree decl = cfun->decl;
9728 /* Use R13 for nested function or function need static chain.
9729 Since function with tail call may use any caller-saved
9730 registers in epilogue, DRAP must not use caller-saved
9731 register in such case. */
9732 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9739 /* Use DI for nested function or function need static chain.
9740 Since function with tail call may use any caller-saved
9741 registers in epilogue, DRAP must not use caller-saved
9742 register in such case. */
9743 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9746 /* Reuse static chain register if it isn't used for parameter
9748 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9750 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9751 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9758 /* Return minimum incoming stack alignment. */
9761 ix86_minimum_incoming_stack_boundary (bool sibcall)
9763 unsigned int incoming_stack_boundary;
9765 /* Prefer the one specified at command line. */
9766 if (ix86_user_incoming_stack_boundary)
9767 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9768 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9769 if -mstackrealign is used, it isn't used for sibcall check and
9770 estimated stack alignment is 128bit. */
9773 && ix86_force_align_arg_pointer
9774 && crtl->stack_alignment_estimated == 128)
9775 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9777 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9779 /* Incoming stack alignment can be changed on individual functions
9780 via force_align_arg_pointer attribute. We use the smallest
9781 incoming stack boundary. */
9782 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9783 && lookup_attribute (ix86_force_align_arg_pointer_string,
9784 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9785 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9787 /* The incoming stack frame has to be aligned at least at
9788 parm_stack_boundary. */
9789 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9790 incoming_stack_boundary = crtl->parm_stack_boundary;
9792 /* Stack at entrance of main is aligned by runtime. We use the
9793 smallest incoming stack boundary. */
9794 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9795 && DECL_NAME (current_function_decl)
9796 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9797 && DECL_FILE_SCOPE_P (current_function_decl))
9798 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9800 return incoming_stack_boundary;
9803 /* Update incoming stack boundary and estimated stack alignment. */
9806 ix86_update_stack_boundary (void)
9808 ix86_incoming_stack_boundary
9809 = ix86_minimum_incoming_stack_boundary (false);
9811 /* x86_64 vararg needs 16byte stack alignment for register save
9815 && crtl->stack_alignment_estimated < 128)
9816 crtl->stack_alignment_estimated = 128;
9819 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9820 needed or an rtx for DRAP otherwise. */
9823 ix86_get_drap_rtx (void)
9825 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9826 crtl->need_drap = true;
9828 if (stack_realign_drap)
9830 /* Assign DRAP to vDRAP and returns vDRAP */
9831 unsigned int regno = find_drap_reg ();
9836 arg_ptr = gen_rtx_REG (Pmode, regno);
9837 crtl->drap_reg = arg_ptr;
9840 drap_vreg = copy_to_reg (arg_ptr);
9844 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9847 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9848 RTX_FRAME_RELATED_P (insn) = 1;
9856 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9859 ix86_internal_arg_pointer (void)
9861 return virtual_incoming_args_rtx;
9864 struct scratch_reg {
9869 /* Return a short-lived scratch register for use on function entry.
9870 In 32-bit mode, it is valid only after the registers are saved
9871 in the prologue. This register must be released by means of
9872 release_scratch_register_on_entry once it is dead. */
9875 get_scratch_register_on_entry (struct scratch_reg *sr)
9883 /* We always use R11 in 64-bit mode. */
9888 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9890 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9891 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9892 int regparm = ix86_function_regparm (fntype, decl);
9894 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9896 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9897 for the static chain register. */
9898 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9899 && drap_regno != AX_REG)
9901 else if (regparm < 2 && drap_regno != DX_REG)
9903 /* ecx is the static chain register. */
9904 else if (regparm < 3 && !fastcall_p && !static_chain_p
9905 && drap_regno != CX_REG)
9907 else if (ix86_save_reg (BX_REG, true))
9909 /* esi is the static chain register. */
9910 else if (!(regparm == 3 && static_chain_p)
9911 && ix86_save_reg (SI_REG, true))
9913 else if (ix86_save_reg (DI_REG, true))
9917 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9922 sr->reg = gen_rtx_REG (Pmode, regno);
9925 rtx insn = emit_insn (gen_push (sr->reg));
9926 RTX_FRAME_RELATED_P (insn) = 1;
9930 /* Release a scratch register obtained from the preceding function. */
9933 release_scratch_register_on_entry (struct scratch_reg *sr)
9937 rtx x, insn = emit_insn (gen_pop (sr->reg));
9939 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9940 RTX_FRAME_RELATED_P (insn) = 1;
9941 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9942 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9943 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9947 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9949 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9952 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9954 /* We skip the probe for the first interval + a small dope of 4 words and
9955 probe that many bytes past the specified size to maintain a protection
9956 area at the botton of the stack. */
9957 const int dope = 4 * UNITS_PER_WORD;
9958 rtx size_rtx = GEN_INT (size), last;
9960 /* See if we have a constant small number of probes to generate. If so,
9961 that's the easy case. The run-time loop is made up of 11 insns in the
9962 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9963 for n # of intervals. */
9964 if (size <= 5 * PROBE_INTERVAL)
9966 HOST_WIDE_INT i, adjust;
9967 bool first_probe = true;
9969 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9970 values of N from 1 until it exceeds SIZE. If only one probe is
9971 needed, this will not generate any code. Then adjust and probe
9972 to PROBE_INTERVAL + SIZE. */
9973 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9977 adjust = 2 * PROBE_INTERVAL + dope;
9978 first_probe = false;
9981 adjust = PROBE_INTERVAL;
9983 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9984 plus_constant (stack_pointer_rtx, -adjust)));
9985 emit_stack_probe (stack_pointer_rtx);
9989 adjust = size + PROBE_INTERVAL + dope;
9991 adjust = size + PROBE_INTERVAL - i;
9993 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9994 plus_constant (stack_pointer_rtx, -adjust)));
9995 emit_stack_probe (stack_pointer_rtx);
9997 /* Adjust back to account for the additional first interval. */
9998 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9999 plus_constant (stack_pointer_rtx,
10000 PROBE_INTERVAL + dope)));
10003 /* Otherwise, do the same as above, but in a loop. Note that we must be
10004 extra careful with variables wrapping around because we might be at
10005 the very top (or the very bottom) of the address space and we have
10006 to be able to handle this case properly; in particular, we use an
10007 equality test for the loop condition. */
10010 HOST_WIDE_INT rounded_size;
10011 struct scratch_reg sr;
10013 get_scratch_register_on_entry (&sr);
10016 /* Step 1: round SIZE to the previous multiple of the interval. */
10018 rounded_size = size & -PROBE_INTERVAL;
10021 /* Step 2: compute initial and final value of the loop counter. */
10023 /* SP = SP_0 + PROBE_INTERVAL. */
10024 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10025 plus_constant (stack_pointer_rtx,
10026 - (PROBE_INTERVAL + dope))));
10028 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10029 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10030 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10031 gen_rtx_PLUS (Pmode, sr.reg,
10032 stack_pointer_rtx)));
10035 /* Step 3: the loop
10037 while (SP != LAST_ADDR)
10039 SP = SP + PROBE_INTERVAL
10043 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10044 values of N from 1 until it is equal to ROUNDED_SIZE. */
10046 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10049 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10050 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10052 if (size != rounded_size)
10054 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10055 plus_constant (stack_pointer_rtx,
10056 rounded_size - size)));
10057 emit_stack_probe (stack_pointer_rtx);
10060 /* Adjust back to account for the additional first interval. */
10061 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10062 plus_constant (stack_pointer_rtx,
10063 PROBE_INTERVAL + dope)));
10065 release_scratch_register_on_entry (&sr);
10068 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10070 /* Even if the stack pointer isn't the CFA register, we need to correctly
10071 describe the adjustments made to it, in particular differentiate the
10072 frame-related ones from the frame-unrelated ones. */
10075 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10076 XVECEXP (expr, 0, 0)
10077 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10078 plus_constant (stack_pointer_rtx, -size));
10079 XVECEXP (expr, 0, 1)
10080 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10081 plus_constant (stack_pointer_rtx,
10082 PROBE_INTERVAL + dope + size));
10083 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10084 RTX_FRAME_RELATED_P (last) = 1;
10086 cfun->machine->fs.sp_offset += size;
10089 /* Make sure nothing is scheduled before we are done. */
10090 emit_insn (gen_blockage ());
10093 /* Adjust the stack pointer up to REG while probing it. */
10096 output_adjust_stack_and_probe (rtx reg)
10098 static int labelno = 0;
10099 char loop_lab[32], end_lab[32];
10102 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10103 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10105 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10107 /* Jump to END_LAB if SP == LAST_ADDR. */
10108 xops[0] = stack_pointer_rtx;
10110 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10111 fputs ("\tje\t", asm_out_file);
10112 assemble_name_raw (asm_out_file, end_lab);
10113 fputc ('\n', asm_out_file);
10115 /* SP = SP + PROBE_INTERVAL. */
10116 xops[1] = GEN_INT (PROBE_INTERVAL);
10117 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10120 xops[1] = const0_rtx;
10121 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10123 fprintf (asm_out_file, "\tjmp\t");
10124 assemble_name_raw (asm_out_file, loop_lab);
10125 fputc ('\n', asm_out_file);
10127 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10132 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10133 inclusive. These are offsets from the current stack pointer. */
10136 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10138 /* See if we have a constant small number of probes to generate. If so,
10139 that's the easy case. The run-time loop is made up of 7 insns in the
10140 generic case while the compile-time loop is made up of n insns for n #
10142 if (size <= 7 * PROBE_INTERVAL)
10146 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10147 it exceeds SIZE. If only one probe is needed, this will not
10148 generate any code. Then probe at FIRST + SIZE. */
10149 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10150 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
10152 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
10155 /* Otherwise, do the same as above, but in a loop. Note that we must be
10156 extra careful with variables wrapping around because we might be at
10157 the very top (or the very bottom) of the address space and we have
10158 to be able to handle this case properly; in particular, we use an
10159 equality test for the loop condition. */
10162 HOST_WIDE_INT rounded_size, last;
10163 struct scratch_reg sr;
10165 get_scratch_register_on_entry (&sr);
10168 /* Step 1: round SIZE to the previous multiple of the interval. */
10170 rounded_size = size & -PROBE_INTERVAL;
10173 /* Step 2: compute initial and final value of the loop counter. */
10175 /* TEST_OFFSET = FIRST. */
10176 emit_move_insn (sr.reg, GEN_INT (-first));
10178 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10179 last = first + rounded_size;
10182 /* Step 3: the loop
10184 while (TEST_ADDR != LAST_ADDR)
10186 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10190 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10191 until it is equal to ROUNDED_SIZE. */
10193 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10196 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10197 that SIZE is equal to ROUNDED_SIZE. */
10199 if (size != rounded_size)
10200 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
10203 rounded_size - size));
10205 release_scratch_register_on_entry (&sr);
10208 /* Make sure nothing is scheduled before we are done. */
10209 emit_insn (gen_blockage ());
10212 /* Probe a range of stack addresses from REG to END, inclusive. These are
10213 offsets from the current stack pointer. */
10216 output_probe_stack_range (rtx reg, rtx end)
10218 static int labelno = 0;
10219 char loop_lab[32], end_lab[32];
10222 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10223 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10225 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10227 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10230 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10231 fputs ("\tje\t", asm_out_file);
10232 assemble_name_raw (asm_out_file, end_lab);
10233 fputc ('\n', asm_out_file);
10235 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10236 xops[1] = GEN_INT (PROBE_INTERVAL);
10237 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10239 /* Probe at TEST_ADDR. */
10240 xops[0] = stack_pointer_rtx;
10242 xops[2] = const0_rtx;
10243 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10245 fprintf (asm_out_file, "\tjmp\t");
10246 assemble_name_raw (asm_out_file, loop_lab);
10247 fputc ('\n', asm_out_file);
10249 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10254 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10255 to be generated in correct form. */
10257 ix86_finalize_stack_realign_flags (void)
10259 /* Check if stack realign is really needed after reload, and
10260 stores result in cfun */
10261 unsigned int incoming_stack_boundary
10262 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10263 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10264 unsigned int stack_realign = (incoming_stack_boundary
10265 < (current_function_is_leaf
10266 ? crtl->max_used_stack_slot_alignment
10267 : crtl->stack_alignment_needed));
10269 if (crtl->stack_realign_finalized)
10271 /* After stack_realign_needed is finalized, we can't no longer
10273 gcc_assert (crtl->stack_realign_needed == stack_realign);
10277 crtl->stack_realign_needed = stack_realign;
10278 crtl->stack_realign_finalized = true;
10282 /* Expand the prologue into a bunch of separate insns. */
10285 ix86_expand_prologue (void)
10287 struct machine_function *m = cfun->machine;
10290 struct ix86_frame frame;
10291 HOST_WIDE_INT allocate;
10292 bool int_registers_saved;
10294 ix86_finalize_stack_realign_flags ();
10296 /* DRAP should not coexist with stack_realign_fp */
10297 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10299 memset (&m->fs, 0, sizeof (m->fs));
10301 /* Initialize CFA state for before the prologue. */
10302 m->fs.cfa_reg = stack_pointer_rtx;
10303 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10305 /* Track SP offset to the CFA. We continue tracking this after we've
10306 swapped the CFA register away from SP. In the case of re-alignment
10307 this is fudged; we're interested to offsets within the local frame. */
10308 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10309 m->fs.sp_valid = true;
10311 ix86_compute_frame_layout (&frame);
10313 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10315 /* We should have already generated an error for any use of
10316 ms_hook on a nested function. */
10317 gcc_checking_assert (!ix86_static_chain_on_stack);
10319 /* Check if profiling is active and we shall use profiling before
10320 prologue variant. If so sorry. */
10321 if (crtl->profile && flag_fentry != 0)
10322 sorry ("ms_hook_prologue attribute isn%'t compatible "
10323 "with -mfentry for 32-bit");
10325 /* In ix86_asm_output_function_label we emitted:
10326 8b ff movl.s %edi,%edi
10328 8b ec movl.s %esp,%ebp
10330 This matches the hookable function prologue in Win32 API
10331 functions in Microsoft Windows XP Service Pack 2 and newer.
10332 Wine uses this to enable Windows apps to hook the Win32 API
10333 functions provided by Wine.
10335 What that means is that we've already set up the frame pointer. */
10337 if (frame_pointer_needed
10338 && !(crtl->drap_reg && crtl->stack_realign_needed))
10342 /* We've decided to use the frame pointer already set up.
10343 Describe this to the unwinder by pretending that both
10344 push and mov insns happen right here.
10346 Putting the unwind info here at the end of the ms_hook
10347 is done so that we can make absolutely certain we get
10348 the required byte sequence at the start of the function,
10349 rather than relying on an assembler that can produce
10350 the exact encoding required.
10352 However it does mean (in the unpatched case) that we have
10353 a 1 insn window where the asynchronous unwind info is
10354 incorrect. However, if we placed the unwind info at
10355 its correct location we would have incorrect unwind info
10356 in the patched case. Which is probably all moot since
10357 I don't expect Wine generates dwarf2 unwind info for the
10358 system libraries that use this feature. */
10360 insn = emit_insn (gen_blockage ());
10362 push = gen_push (hard_frame_pointer_rtx);
10363 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10364 stack_pointer_rtx);
10365 RTX_FRAME_RELATED_P (push) = 1;
10366 RTX_FRAME_RELATED_P (mov) = 1;
10368 RTX_FRAME_RELATED_P (insn) = 1;
10369 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10370 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10372 /* Note that gen_push incremented m->fs.cfa_offset, even
10373 though we didn't emit the push insn here. */
10374 m->fs.cfa_reg = hard_frame_pointer_rtx;
10375 m->fs.fp_offset = m->fs.cfa_offset;
10376 m->fs.fp_valid = true;
10380 /* The frame pointer is not needed so pop %ebp again.
10381 This leaves us with a pristine state. */
10382 emit_insn (gen_pop (hard_frame_pointer_rtx));
10386 /* The first insn of a function that accepts its static chain on the
10387 stack is to push the register that would be filled in by a direct
10388 call. This insn will be skipped by the trampoline. */
10389 else if (ix86_static_chain_on_stack)
10391 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10392 emit_insn (gen_blockage ());
10394 /* We don't want to interpret this push insn as a register save,
10395 only as a stack adjustment. The real copy of the register as
10396 a save will be done later, if needed. */
10397 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10398 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10399 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10400 RTX_FRAME_RELATED_P (insn) = 1;
10403 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10404 of DRAP is needed and stack realignment is really needed after reload */
10405 if (stack_realign_drap)
10407 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10409 /* Only need to push parameter pointer reg if it is caller saved. */
10410 if (!call_used_regs[REGNO (crtl->drap_reg)])
10412 /* Push arg pointer reg */
10413 insn = emit_insn (gen_push (crtl->drap_reg));
10414 RTX_FRAME_RELATED_P (insn) = 1;
10417 /* Grab the argument pointer. */
10418 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10419 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10420 RTX_FRAME_RELATED_P (insn) = 1;
10421 m->fs.cfa_reg = crtl->drap_reg;
10422 m->fs.cfa_offset = 0;
10424 /* Align the stack. */
10425 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10427 GEN_INT (-align_bytes)));
10428 RTX_FRAME_RELATED_P (insn) = 1;
10430 /* Replicate the return address on the stack so that return
10431 address can be reached via (argp - 1) slot. This is needed
10432 to implement macro RETURN_ADDR_RTX and intrinsic function
10433 expand_builtin_return_addr etc. */
10434 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10435 t = gen_frame_mem (Pmode, t);
10436 insn = emit_insn (gen_push (t));
10437 RTX_FRAME_RELATED_P (insn) = 1;
10439 /* For the purposes of frame and register save area addressing,
10440 we've started over with a new frame. */
10441 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10442 m->fs.realigned = true;
10445 if (frame_pointer_needed && !m->fs.fp_valid)
10447 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10448 slower on all targets. Also sdb doesn't like it. */
10449 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10450 RTX_FRAME_RELATED_P (insn) = 1;
10452 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10454 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10455 RTX_FRAME_RELATED_P (insn) = 1;
10457 if (m->fs.cfa_reg == stack_pointer_rtx)
10458 m->fs.cfa_reg = hard_frame_pointer_rtx;
10459 m->fs.fp_offset = m->fs.sp_offset;
10460 m->fs.fp_valid = true;
10464 int_registers_saved = (frame.nregs == 0);
10466 if (!int_registers_saved)
10468 /* If saving registers via PUSH, do so now. */
10469 if (!frame.save_regs_using_mov)
10471 ix86_emit_save_regs ();
10472 int_registers_saved = true;
10473 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10476 /* When using red zone we may start register saving before allocating
10477 the stack frame saving one cycle of the prologue. However, avoid
10478 doing this if we have to probe the stack; at least on x86_64 the
10479 stack probe can turn into a call that clobbers a red zone location. */
10480 else if (ix86_using_red_zone ()
10481 && (! TARGET_STACK_PROBE
10482 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10484 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10485 int_registers_saved = true;
10489 if (stack_realign_fp)
10491 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10492 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10494 /* The computation of the size of the re-aligned stack frame means
10495 that we must allocate the size of the register save area before
10496 performing the actual alignment. Otherwise we cannot guarantee
10497 that there's enough storage above the realignment point. */
10498 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10499 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10500 GEN_INT (m->fs.sp_offset
10501 - frame.sse_reg_save_offset),
10504 /* Align the stack. */
10505 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10507 GEN_INT (-align_bytes)));
10509 /* For the purposes of register save area addressing, the stack
10510 pointer is no longer valid. As for the value of sp_offset,
10511 see ix86_compute_frame_layout, which we need to match in order
10512 to pass verification of stack_pointer_offset at the end. */
10513 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10514 m->fs.sp_valid = false;
10517 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10519 if (flag_stack_usage)
10521 /* We start to count from ARG_POINTER. */
10522 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10524 /* If it was realigned, take into account the fake frame. */
10525 if (stack_realign_drap)
10527 if (ix86_static_chain_on_stack)
10528 stack_size += UNITS_PER_WORD;
10530 if (!call_used_regs[REGNO (crtl->drap_reg)])
10531 stack_size += UNITS_PER_WORD;
10533 /* This over-estimates by 1 minimal-stack-alignment-unit but
10534 mitigates that by counting in the new return address slot. */
10535 current_function_dynamic_stack_size
10536 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10539 current_function_static_stack_size = stack_size;
10542 /* The stack has already been decremented by the instruction calling us
10543 so probe if the size is non-negative to preserve the protection area. */
10544 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10546 /* We expect the registers to be saved when probes are used. */
10547 gcc_assert (int_registers_saved);
10549 if (STACK_CHECK_MOVING_SP)
10551 ix86_adjust_stack_and_probe (allocate);
10556 HOST_WIDE_INT size = allocate;
10558 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10559 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10561 if (TARGET_STACK_PROBE)
10562 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10564 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10570 else if (!ix86_target_stack_probe ()
10571 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10573 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10574 GEN_INT (-allocate), -1,
10575 m->fs.cfa_reg == stack_pointer_rtx);
10579 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10581 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10583 bool eax_live = false;
10584 bool r10_live = false;
10587 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10588 if (!TARGET_64BIT_MS_ABI)
10589 eax_live = ix86_eax_live_at_start_p ();
10593 emit_insn (gen_push (eax));
10594 allocate -= UNITS_PER_WORD;
10598 r10 = gen_rtx_REG (Pmode, R10_REG);
10599 emit_insn (gen_push (r10));
10600 allocate -= UNITS_PER_WORD;
10603 emit_move_insn (eax, GEN_INT (allocate));
10604 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10606 /* Use the fact that AX still contains ALLOCATE. */
10607 adjust_stack_insn = (TARGET_64BIT
10608 ? gen_pro_epilogue_adjust_stack_di_sub
10609 : gen_pro_epilogue_adjust_stack_si_sub);
10611 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10612 stack_pointer_rtx, eax));
10614 /* Note that SEH directives need to continue tracking the stack
10615 pointer even after the frame pointer has been set up. */
10616 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10618 if (m->fs.cfa_reg == stack_pointer_rtx)
10619 m->fs.cfa_offset += allocate;
10621 RTX_FRAME_RELATED_P (insn) = 1;
10622 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10623 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10624 plus_constant (stack_pointer_rtx,
10627 m->fs.sp_offset += allocate;
10629 if (r10_live && eax_live)
10631 t = choose_baseaddr (m->fs.sp_offset - allocate);
10632 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10633 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10634 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10636 else if (eax_live || r10_live)
10638 t = choose_baseaddr (m->fs.sp_offset - allocate);
10639 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10642 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10644 /* If we havn't already set up the frame pointer, do so now. */
10645 if (frame_pointer_needed && !m->fs.fp_valid)
10647 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10648 GEN_INT (frame.stack_pointer_offset
10649 - frame.hard_frame_pointer_offset));
10650 insn = emit_insn (insn);
10651 RTX_FRAME_RELATED_P (insn) = 1;
10652 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10654 if (m->fs.cfa_reg == stack_pointer_rtx)
10655 m->fs.cfa_reg = hard_frame_pointer_rtx;
10656 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10657 m->fs.fp_valid = true;
10660 if (!int_registers_saved)
10661 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10662 if (frame.nsseregs)
10663 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10665 pic_reg_used = false;
10666 if (pic_offset_table_rtx
10667 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10670 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10672 if (alt_pic_reg_used != INVALID_REGNUM)
10673 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10675 pic_reg_used = true;
10682 if (ix86_cmodel == CM_LARGE_PIC)
10684 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10685 rtx label = gen_label_rtx ();
10686 emit_label (label);
10687 LABEL_PRESERVE_P (label) = 1;
10688 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10689 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10690 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10691 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10692 pic_offset_table_rtx, tmp_reg));
10695 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10698 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10701 /* In the pic_reg_used case, make sure that the got load isn't deleted
10702 when mcount needs it. Blockage to avoid call movement across mcount
10703 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10705 if (crtl->profile && !flag_fentry && pic_reg_used)
10706 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10708 if (crtl->drap_reg && !crtl->stack_realign_needed)
10710 /* vDRAP is setup but after reload it turns out stack realign
10711 isn't necessary, here we will emit prologue to setup DRAP
10712 without stack realign adjustment */
10713 t = choose_baseaddr (0);
10714 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10717 /* Prevent instructions from being scheduled into register save push
10718 sequence when access to the redzone area is done through frame pointer.
10719 The offset between the frame pointer and the stack pointer is calculated
10720 relative to the value of the stack pointer at the end of the function
10721 prologue, and moving instructions that access redzone area via frame
10722 pointer inside push sequence violates this assumption. */
10723 if (frame_pointer_needed && frame.red_zone_size)
10724 emit_insn (gen_memory_blockage ());
10726 /* Emit cld instruction if stringops are used in the function. */
10727 if (TARGET_CLD && ix86_current_function_needs_cld)
10728 emit_insn (gen_cld ());
10730 /* SEH requires that the prologue end within 256 bytes of the start of
10731 the function. Prevent instruction schedules that would extend that. */
10733 emit_insn (gen_blockage ());
10736 /* Emit code to restore REG using a POP insn. */
10739 ix86_emit_restore_reg_using_pop (rtx reg)
10741 struct machine_function *m = cfun->machine;
10742 rtx insn = emit_insn (gen_pop (reg));
10744 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10745 m->fs.sp_offset -= UNITS_PER_WORD;
10747 if (m->fs.cfa_reg == crtl->drap_reg
10748 && REGNO (reg) == REGNO (crtl->drap_reg))
10750 /* Previously we'd represented the CFA as an expression
10751 like *(%ebp - 8). We've just popped that value from
10752 the stack, which means we need to reset the CFA to
10753 the drap register. This will remain until we restore
10754 the stack pointer. */
10755 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10756 RTX_FRAME_RELATED_P (insn) = 1;
10758 /* This means that the DRAP register is valid for addressing too. */
10759 m->fs.drap_valid = true;
10763 if (m->fs.cfa_reg == stack_pointer_rtx)
10765 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10766 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10767 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10768 RTX_FRAME_RELATED_P (insn) = 1;
10770 m->fs.cfa_offset -= UNITS_PER_WORD;
10773 /* When the frame pointer is the CFA, and we pop it, we are
10774 swapping back to the stack pointer as the CFA. This happens
10775 for stack frames that don't allocate other data, so we assume
10776 the stack pointer is now pointing at the return address, i.e.
10777 the function entry state, which makes the offset be 1 word. */
10778 if (reg == hard_frame_pointer_rtx)
10780 m->fs.fp_valid = false;
10781 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10783 m->fs.cfa_reg = stack_pointer_rtx;
10784 m->fs.cfa_offset -= UNITS_PER_WORD;
10786 add_reg_note (insn, REG_CFA_DEF_CFA,
10787 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10788 GEN_INT (m->fs.cfa_offset)));
10789 RTX_FRAME_RELATED_P (insn) = 1;
10794 /* Emit code to restore saved registers using POP insns. */
10797 ix86_emit_restore_regs_using_pop (void)
10799 unsigned int regno;
10801 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10802 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10803 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10806 /* Emit code and notes for the LEAVE instruction. */
10809 ix86_emit_leave (void)
10811 struct machine_function *m = cfun->machine;
10812 rtx insn = emit_insn (ix86_gen_leave ());
10814 ix86_add_queued_cfa_restore_notes (insn);
10816 gcc_assert (m->fs.fp_valid);
10817 m->fs.sp_valid = true;
10818 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10819 m->fs.fp_valid = false;
10821 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10823 m->fs.cfa_reg = stack_pointer_rtx;
10824 m->fs.cfa_offset = m->fs.sp_offset;
10826 add_reg_note (insn, REG_CFA_DEF_CFA,
10827 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10828 RTX_FRAME_RELATED_P (insn) = 1;
10829 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10834 /* Emit code to restore saved registers using MOV insns.
10835 First register is restored from CFA - CFA_OFFSET. */
10837 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10838 bool maybe_eh_return)
10840 struct machine_function *m = cfun->machine;
10841 unsigned int regno;
10843 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10844 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10846 rtx reg = gen_rtx_REG (Pmode, regno);
10849 mem = choose_baseaddr (cfa_offset);
10850 mem = gen_frame_mem (Pmode, mem);
10851 insn = emit_move_insn (reg, mem);
10853 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10855 /* Previously we'd represented the CFA as an expression
10856 like *(%ebp - 8). We've just popped that value from
10857 the stack, which means we need to reset the CFA to
10858 the drap register. This will remain until we restore
10859 the stack pointer. */
10860 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10861 RTX_FRAME_RELATED_P (insn) = 1;
10863 /* This means that the DRAP register is valid for addressing. */
10864 m->fs.drap_valid = true;
10867 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10869 cfa_offset -= UNITS_PER_WORD;
10873 /* Emit code to restore saved registers using MOV insns.
10874 First register is restored from CFA - CFA_OFFSET. */
10876 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10877 bool maybe_eh_return)
10879 unsigned int regno;
10881 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10882 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10884 rtx reg = gen_rtx_REG (V4SFmode, regno);
10887 mem = choose_baseaddr (cfa_offset);
10888 mem = gen_rtx_MEM (V4SFmode, mem);
10889 set_mem_align (mem, 128);
10890 emit_move_insn (reg, mem);
10892 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10898 /* Restore function stack, frame, and registers. */
10901 ix86_expand_epilogue (int style)
10903 struct machine_function *m = cfun->machine;
10904 struct machine_frame_state frame_state_save = m->fs;
10905 struct ix86_frame frame;
10906 bool restore_regs_via_mov;
10909 ix86_finalize_stack_realign_flags ();
10910 ix86_compute_frame_layout (&frame);
10912 m->fs.sp_valid = (!frame_pointer_needed
10913 || (current_function_sp_is_unchanging
10914 && !stack_realign_fp));
10915 gcc_assert (!m->fs.sp_valid
10916 || m->fs.sp_offset == frame.stack_pointer_offset);
10918 /* The FP must be valid if the frame pointer is present. */
10919 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10920 gcc_assert (!m->fs.fp_valid
10921 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10923 /* We must have *some* valid pointer to the stack frame. */
10924 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10926 /* The DRAP is never valid at this point. */
10927 gcc_assert (!m->fs.drap_valid);
10929 /* See the comment about red zone and frame
10930 pointer usage in ix86_expand_prologue. */
10931 if (frame_pointer_needed && frame.red_zone_size)
10932 emit_insn (gen_memory_blockage ());
10934 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10935 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10937 /* Determine the CFA offset of the end of the red-zone. */
10938 m->fs.red_zone_offset = 0;
10939 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10941 /* The red-zone begins below the return address. */
10942 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10944 /* When the register save area is in the aligned portion of
10945 the stack, determine the maximum runtime displacement that
10946 matches up with the aligned frame. */
10947 if (stack_realign_drap)
10948 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10952 /* Special care must be taken for the normal return case of a function
10953 using eh_return: the eax and edx registers are marked as saved, but
10954 not restored along this path. Adjust the save location to match. */
10955 if (crtl->calls_eh_return && style != 2)
10956 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10958 /* EH_RETURN requires the use of moves to function properly. */
10959 if (crtl->calls_eh_return)
10960 restore_regs_via_mov = true;
10961 /* SEH requires the use of pops to identify the epilogue. */
10962 else if (TARGET_SEH)
10963 restore_regs_via_mov = false;
10964 /* If we're only restoring one register and sp is not valid then
10965 using a move instruction to restore the register since it's
10966 less work than reloading sp and popping the register. */
10967 else if (!m->fs.sp_valid && frame.nregs <= 1)
10968 restore_regs_via_mov = true;
10969 else if (TARGET_EPILOGUE_USING_MOVE
10970 && cfun->machine->use_fast_prologue_epilogue
10971 && (frame.nregs > 1
10972 || m->fs.sp_offset != frame.reg_save_offset))
10973 restore_regs_via_mov = true;
10974 else if (frame_pointer_needed
10976 && m->fs.sp_offset != frame.reg_save_offset)
10977 restore_regs_via_mov = true;
10978 else if (frame_pointer_needed
10979 && TARGET_USE_LEAVE
10980 && cfun->machine->use_fast_prologue_epilogue
10981 && frame.nregs == 1)
10982 restore_regs_via_mov = true;
10984 restore_regs_via_mov = false;
10986 if (restore_regs_via_mov || frame.nsseregs)
10988 /* Ensure that the entire register save area is addressable via
10989 the stack pointer, if we will restore via sp. */
10991 && m->fs.sp_offset > 0x7fffffff
10992 && !(m->fs.fp_valid || m->fs.drap_valid)
10993 && (frame.nsseregs + frame.nregs) != 0)
10995 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10996 GEN_INT (m->fs.sp_offset
10997 - frame.sse_reg_save_offset),
10999 m->fs.cfa_reg == stack_pointer_rtx);
11003 /* If there are any SSE registers to restore, then we have to do it
11004 via moves, since there's obviously no pop for SSE regs. */
11005 if (frame.nsseregs)
11006 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11009 if (restore_regs_via_mov)
11014 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11016 /* eh_return epilogues need %ecx added to the stack pointer. */
11019 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11021 /* Stack align doesn't work with eh_return. */
11022 gcc_assert (!stack_realign_drap);
11023 /* Neither does regparm nested functions. */
11024 gcc_assert (!ix86_static_chain_on_stack);
11026 if (frame_pointer_needed)
11028 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11029 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
11030 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11032 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11033 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11035 /* Note that we use SA as a temporary CFA, as the return
11036 address is at the proper place relative to it. We
11037 pretend this happens at the FP restore insn because
11038 prior to this insn the FP would be stored at the wrong
11039 offset relative to SA, and after this insn we have no
11040 other reasonable register to use for the CFA. We don't
11041 bother resetting the CFA to the SP for the duration of
11042 the return insn. */
11043 add_reg_note (insn, REG_CFA_DEF_CFA,
11044 plus_constant (sa, UNITS_PER_WORD));
11045 ix86_add_queued_cfa_restore_notes (insn);
11046 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11047 RTX_FRAME_RELATED_P (insn) = 1;
11049 m->fs.cfa_reg = sa;
11050 m->fs.cfa_offset = UNITS_PER_WORD;
11051 m->fs.fp_valid = false;
11053 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11054 const0_rtx, style, false);
11058 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11059 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
11060 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11061 ix86_add_queued_cfa_restore_notes (insn);
11063 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11064 if (m->fs.cfa_offset != UNITS_PER_WORD)
11066 m->fs.cfa_offset = UNITS_PER_WORD;
11067 add_reg_note (insn, REG_CFA_DEF_CFA,
11068 plus_constant (stack_pointer_rtx,
11070 RTX_FRAME_RELATED_P (insn) = 1;
11073 m->fs.sp_offset = UNITS_PER_WORD;
11074 m->fs.sp_valid = true;
11079 /* SEH requires that the function end with (1) a stack adjustment
11080 if necessary, (2) a sequence of pops, and (3) a return or
11081 jump instruction. Prevent insns from the function body from
11082 being scheduled into this sequence. */
11085 /* Prevent a catch region from being adjacent to the standard
11086 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11087 several other flags that would be interesting to test are
11089 if (flag_non_call_exceptions)
11090 emit_insn (gen_nops (const1_rtx));
11092 emit_insn (gen_blockage ());
11095 /* First step is to deallocate the stack frame so that we can
11096 pop the registers. */
11097 if (!m->fs.sp_valid)
11099 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11100 GEN_INT (m->fs.fp_offset
11101 - frame.reg_save_offset),
11104 else if (m->fs.sp_offset != frame.reg_save_offset)
11106 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11107 GEN_INT (m->fs.sp_offset
11108 - frame.reg_save_offset),
11110 m->fs.cfa_reg == stack_pointer_rtx);
11113 ix86_emit_restore_regs_using_pop ();
11116 /* If we used a stack pointer and haven't already got rid of it,
11118 if (m->fs.fp_valid)
11120 /* If the stack pointer is valid and pointing at the frame
11121 pointer store address, then we only need a pop. */
11122 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11123 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11124 /* Leave results in shorter dependency chains on CPUs that are
11125 able to grok it fast. */
11126 else if (TARGET_USE_LEAVE
11127 || optimize_function_for_size_p (cfun)
11128 || !cfun->machine->use_fast_prologue_epilogue)
11129 ix86_emit_leave ();
11132 pro_epilogue_adjust_stack (stack_pointer_rtx,
11133 hard_frame_pointer_rtx,
11134 const0_rtx, style, !using_drap);
11135 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11141 int param_ptr_offset = UNITS_PER_WORD;
11144 gcc_assert (stack_realign_drap);
11146 if (ix86_static_chain_on_stack)
11147 param_ptr_offset += UNITS_PER_WORD;
11148 if (!call_used_regs[REGNO (crtl->drap_reg)])
11149 param_ptr_offset += UNITS_PER_WORD;
11151 insn = emit_insn (gen_rtx_SET
11152 (VOIDmode, stack_pointer_rtx,
11153 gen_rtx_PLUS (Pmode,
11155 GEN_INT (-param_ptr_offset))));
11156 m->fs.cfa_reg = stack_pointer_rtx;
11157 m->fs.cfa_offset = param_ptr_offset;
11158 m->fs.sp_offset = param_ptr_offset;
11159 m->fs.realigned = false;
11161 add_reg_note (insn, REG_CFA_DEF_CFA,
11162 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11163 GEN_INT (param_ptr_offset)));
11164 RTX_FRAME_RELATED_P (insn) = 1;
11166 if (!call_used_regs[REGNO (crtl->drap_reg)])
11167 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11170 /* At this point the stack pointer must be valid, and we must have
11171 restored all of the registers. We may not have deallocated the
11172 entire stack frame. We've delayed this until now because it may
11173 be possible to merge the local stack deallocation with the
11174 deallocation forced by ix86_static_chain_on_stack. */
11175 gcc_assert (m->fs.sp_valid);
11176 gcc_assert (!m->fs.fp_valid);
11177 gcc_assert (!m->fs.realigned);
11178 if (m->fs.sp_offset != UNITS_PER_WORD)
11180 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11181 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11185 /* Sibcall epilogues don't want a return instruction. */
11188 m->fs = frame_state_save;
11192 /* Emit vzeroupper if needed. */
11193 if (TARGET_VZEROUPPER
11194 && !TREE_THIS_VOLATILE (cfun->decl)
11195 && !cfun->machine->caller_return_avx256_p)
11196 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
11198 if (crtl->args.pops_args && crtl->args.size)
11200 rtx popc = GEN_INT (crtl->args.pops_args);
11202 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11203 address, do explicit add, and jump indirectly to the caller. */
11205 if (crtl->args.pops_args >= 65536)
11207 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11210 /* There is no "pascal" calling convention in any 64bit ABI. */
11211 gcc_assert (!TARGET_64BIT);
11213 insn = emit_insn (gen_pop (ecx));
11214 m->fs.cfa_offset -= UNITS_PER_WORD;
11215 m->fs.sp_offset -= UNITS_PER_WORD;
11217 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11218 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11219 add_reg_note (insn, REG_CFA_REGISTER,
11220 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11221 RTX_FRAME_RELATED_P (insn) = 1;
11223 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11225 emit_jump_insn (gen_return_indirect_internal (ecx));
11228 emit_jump_insn (gen_return_pop_internal (popc));
11231 emit_jump_insn (gen_return_internal ());
11233 /* Restore the state back to the state from the prologue,
11234 so that it's correct for the next epilogue. */
11235 m->fs = frame_state_save;
11238 /* Reset from the function's potential modifications. */
11241 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11242 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11244 if (pic_offset_table_rtx)
11245 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11247 /* Mach-O doesn't support labels at the end of objects, so if
11248 it looks like we might want one, insert a NOP. */
11250 rtx insn = get_last_insn ();
11253 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11254 insn = PREV_INSN (insn);
11258 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11259 fputs ("\tnop\n", file);
11265 /* Return a scratch register to use in the split stack prologue. The
11266 split stack prologue is used for -fsplit-stack. It is the first
11267 instructions in the function, even before the regular prologue.
11268 The scratch register can be any caller-saved register which is not
11269 used for parameters or for the static chain. */
11271 static unsigned int
11272 split_stack_prologue_scratch_regno (void)
11281 is_fastcall = (lookup_attribute ("fastcall",
11282 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11284 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11288 if (DECL_STATIC_CHAIN (cfun->decl))
11290 sorry ("-fsplit-stack does not support fastcall with "
11291 "nested function");
11292 return INVALID_REGNUM;
11296 else if (regparm < 3)
11298 if (!DECL_STATIC_CHAIN (cfun->decl))
11304 sorry ("-fsplit-stack does not support 2 register "
11305 " parameters for a nested function");
11306 return INVALID_REGNUM;
11313 /* FIXME: We could make this work by pushing a register
11314 around the addition and comparison. */
11315 sorry ("-fsplit-stack does not support 3 register parameters");
11316 return INVALID_REGNUM;
11321 /* A SYMBOL_REF for the function which allocates new stackspace for
11324 static GTY(()) rtx split_stack_fn;
11326 /* A SYMBOL_REF for the more stack function when using the large
11329 static GTY(()) rtx split_stack_fn_large;
11331 /* Handle -fsplit-stack. These are the first instructions in the
11332 function, even before the regular prologue. */
11335 ix86_expand_split_stack_prologue (void)
11337 struct ix86_frame frame;
11338 HOST_WIDE_INT allocate;
11339 unsigned HOST_WIDE_INT args_size;
11340 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11341 rtx scratch_reg = NULL_RTX;
11342 rtx varargs_label = NULL_RTX;
11345 gcc_assert (flag_split_stack && reload_completed);
11347 ix86_finalize_stack_realign_flags ();
11348 ix86_compute_frame_layout (&frame);
11349 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11351 /* This is the label we will branch to if we have enough stack
11352 space. We expect the basic block reordering pass to reverse this
11353 branch if optimizing, so that we branch in the unlikely case. */
11354 label = gen_label_rtx ();
11356 /* We need to compare the stack pointer minus the frame size with
11357 the stack boundary in the TCB. The stack boundary always gives
11358 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11359 can compare directly. Otherwise we need to do an addition. */
11361 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11362 UNSPEC_STACK_CHECK);
11363 limit = gen_rtx_CONST (Pmode, limit);
11364 limit = gen_rtx_MEM (Pmode, limit);
11365 if (allocate < SPLIT_STACK_AVAILABLE)
11366 current = stack_pointer_rtx;
11369 unsigned int scratch_regno;
11372 /* We need a scratch register to hold the stack pointer minus
11373 the required frame size. Since this is the very start of the
11374 function, the scratch register can be any caller-saved
11375 register which is not used for parameters. */
11376 offset = GEN_INT (- allocate);
11377 scratch_regno = split_stack_prologue_scratch_regno ();
11378 if (scratch_regno == INVALID_REGNUM)
11380 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11381 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11383 /* We don't use ix86_gen_add3 in this case because it will
11384 want to split to lea, but when not optimizing the insn
11385 will not be split after this point. */
11386 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11387 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11392 emit_move_insn (scratch_reg, offset);
11393 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11394 stack_pointer_rtx));
11396 current = scratch_reg;
11399 ix86_expand_branch (GEU, current, limit, label);
11400 jump_insn = get_last_insn ();
11401 JUMP_LABEL (jump_insn) = label;
11403 /* Mark the jump as very likely to be taken. */
11404 add_reg_note (jump_insn, REG_BR_PROB,
11405 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11407 if (split_stack_fn == NULL_RTX)
11408 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11409 fn = split_stack_fn;
11411 /* Get more stack space. We pass in the desired stack space and the
11412 size of the arguments to copy to the new stack. In 32-bit mode
11413 we push the parameters; __morestack will return on a new stack
11414 anyhow. In 64-bit mode we pass the parameters in r10 and
11416 allocate_rtx = GEN_INT (allocate);
11417 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11418 call_fusage = NULL_RTX;
11423 reg10 = gen_rtx_REG (Pmode, R10_REG);
11424 reg11 = gen_rtx_REG (Pmode, R11_REG);
11426 /* If this function uses a static chain, it will be in %r10.
11427 Preserve it across the call to __morestack. */
11428 if (DECL_STATIC_CHAIN (cfun->decl))
11432 rax = gen_rtx_REG (Pmode, AX_REG);
11433 emit_move_insn (rax, reg10);
11434 use_reg (&call_fusage, rax);
11437 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11439 HOST_WIDE_INT argval;
11441 /* When using the large model we need to load the address
11442 into a register, and we've run out of registers. So we
11443 switch to a different calling convention, and we call a
11444 different function: __morestack_large. We pass the
11445 argument size in the upper 32 bits of r10 and pass the
11446 frame size in the lower 32 bits. */
11447 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11448 gcc_assert ((args_size & 0xffffffff) == args_size);
11450 if (split_stack_fn_large == NULL_RTX)
11451 split_stack_fn_large =
11452 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11454 if (ix86_cmodel == CM_LARGE_PIC)
11458 label = gen_label_rtx ();
11459 emit_label (label);
11460 LABEL_PRESERVE_P (label) = 1;
11461 emit_insn (gen_set_rip_rex64 (reg10, label));
11462 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11463 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11464 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11466 x = gen_rtx_CONST (Pmode, x);
11467 emit_move_insn (reg11, x);
11468 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11469 x = gen_const_mem (Pmode, x);
11470 emit_move_insn (reg11, x);
11473 emit_move_insn (reg11, split_stack_fn_large);
11477 argval = ((args_size << 16) << 16) + allocate;
11478 emit_move_insn (reg10, GEN_INT (argval));
11482 emit_move_insn (reg10, allocate_rtx);
11483 emit_move_insn (reg11, GEN_INT (args_size));
11484 use_reg (&call_fusage, reg11);
11487 use_reg (&call_fusage, reg10);
11491 emit_insn (gen_push (GEN_INT (args_size)));
11492 emit_insn (gen_push (allocate_rtx));
11494 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11495 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11497 add_function_usage_to (call_insn, call_fusage);
11499 /* In order to make call/return prediction work right, we now need
11500 to execute a return instruction. See
11501 libgcc/config/i386/morestack.S for the details on how this works.
11503 For flow purposes gcc must not see this as a return
11504 instruction--we need control flow to continue at the subsequent
11505 label. Therefore, we use an unspec. */
11506 gcc_assert (crtl->args.pops_args < 65536);
11507 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11509 /* If we are in 64-bit mode and this function uses a static chain,
11510 we saved %r10 in %rax before calling _morestack. */
11511 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11512 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11513 gen_rtx_REG (Pmode, AX_REG));
11515 /* If this function calls va_start, we need to store a pointer to
11516 the arguments on the old stack, because they may not have been
11517 all copied to the new stack. At this point the old stack can be
11518 found at the frame pointer value used by __morestack, because
11519 __morestack has set that up before calling back to us. Here we
11520 store that pointer in a scratch register, and in
11521 ix86_expand_prologue we store the scratch register in a stack
11523 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11525 unsigned int scratch_regno;
11529 scratch_regno = split_stack_prologue_scratch_regno ();
11530 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11531 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11535 return address within this function
11536 return address of caller of this function
11538 So we add three words to get to the stack arguments.
11542 return address within this function
11543 first argument to __morestack
11544 second argument to __morestack
11545 return address of caller of this function
11547 So we add five words to get to the stack arguments.
11549 words = TARGET_64BIT ? 3 : 5;
11550 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11551 gen_rtx_PLUS (Pmode, frame_reg,
11552 GEN_INT (words * UNITS_PER_WORD))));
11554 varargs_label = gen_label_rtx ();
11555 emit_jump_insn (gen_jump (varargs_label));
11556 JUMP_LABEL (get_last_insn ()) = varargs_label;
11561 emit_label (label);
11562 LABEL_NUSES (label) = 1;
11564 /* If this function calls va_start, we now have to set the scratch
11565 register for the case where we do not call __morestack. In this
11566 case we need to set it based on the stack pointer. */
11567 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11569 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11570 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11571 GEN_INT (UNITS_PER_WORD))));
11573 emit_label (varargs_label);
11574 LABEL_NUSES (varargs_label) = 1;
11578 /* We may have to tell the dataflow pass that the split stack prologue
11579 is initializing a scratch register. */
11582 ix86_live_on_entry (bitmap regs)
11584 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11586 gcc_assert (flag_split_stack);
11587 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11591 /* Extract the parts of an RTL expression that is a valid memory address
11592 for an instruction. Return 0 if the structure of the address is
11593 grossly off. Return -1 if the address contains ASHIFT, so it is not
11594 strictly valid, but still used for computing length of lea instruction. */
11597 ix86_decompose_address (rtx addr, struct ix86_address *out)
11599 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11600 rtx base_reg, index_reg;
11601 HOST_WIDE_INT scale = 1;
11602 rtx scale_rtx = NULL_RTX;
11605 enum ix86_address_seg seg = SEG_DEFAULT;
11607 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
11609 else if (GET_CODE (addr) == PLUS)
11611 rtx addends[4], op;
11619 addends[n++] = XEXP (op, 1);
11622 while (GET_CODE (op) == PLUS);
11627 for (i = n; i >= 0; --i)
11630 switch (GET_CODE (op))
11635 index = XEXP (op, 0);
11636 scale_rtx = XEXP (op, 1);
11642 index = XEXP (op, 0);
11643 tmp = XEXP (op, 1);
11644 if (!CONST_INT_P (tmp))
11646 scale = INTVAL (tmp);
11647 if ((unsigned HOST_WIDE_INT) scale > 3)
11649 scale = 1 << scale;
11653 if (XINT (op, 1) == UNSPEC_TP
11654 && TARGET_TLS_DIRECT_SEG_REFS
11655 && seg == SEG_DEFAULT)
11656 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11685 else if (GET_CODE (addr) == MULT)
11687 index = XEXP (addr, 0); /* index*scale */
11688 scale_rtx = XEXP (addr, 1);
11690 else if (GET_CODE (addr) == ASHIFT)
11692 /* We're called for lea too, which implements ashift on occasion. */
11693 index = XEXP (addr, 0);
11694 tmp = XEXP (addr, 1);
11695 if (!CONST_INT_P (tmp))
11697 scale = INTVAL (tmp);
11698 if ((unsigned HOST_WIDE_INT) scale > 3)
11700 scale = 1 << scale;
11704 disp = addr; /* displacement */
11706 /* Extract the integral value of scale. */
11709 if (!CONST_INT_P (scale_rtx))
11711 scale = INTVAL (scale_rtx);
11714 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11715 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11717 /* Avoid useless 0 displacement. */
11718 if (disp == const0_rtx && (base || index))
11721 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11722 if (base_reg && index_reg && scale == 1
11723 && (index_reg == arg_pointer_rtx
11724 || index_reg == frame_pointer_rtx
11725 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11728 tmp = base, base = index, index = tmp;
11729 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11732 /* Special case: %ebp cannot be encoded as a base without a displacement.
11736 && (base_reg == hard_frame_pointer_rtx
11737 || base_reg == frame_pointer_rtx
11738 || base_reg == arg_pointer_rtx
11739 || (REG_P (base_reg)
11740 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11741 || REGNO (base_reg) == R13_REG))))
11744 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11745 Avoid this by transforming to [%esi+0].
11746 Reload calls address legitimization without cfun defined, so we need
11747 to test cfun for being non-NULL. */
11748 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11749 && base_reg && !index_reg && !disp
11750 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11753 /* Special case: encode reg+reg instead of reg*2. */
11754 if (!base && index && scale == 2)
11755 base = index, base_reg = index_reg, scale = 1;
11757 /* Special case: scaling cannot be encoded without base or displacement. */
11758 if (!base && !disp && index && scale != 1)
11762 out->index = index;
11764 out->scale = scale;
11770 /* Return cost of the memory address x.
11771 For i386, it is better to use a complex address than let gcc copy
11772 the address into a reg and make a new pseudo. But not if the address
11773 requires to two regs - that would mean more pseudos with longer
11776 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11778 struct ix86_address parts;
11780 int ok = ix86_decompose_address (x, &parts);
11784 if (parts.base && GET_CODE (parts.base) == SUBREG)
11785 parts.base = SUBREG_REG (parts.base);
11786 if (parts.index && GET_CODE (parts.index) == SUBREG)
11787 parts.index = SUBREG_REG (parts.index);
11789 /* Attempt to minimize number of registers in the address. */
11791 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11793 && (!REG_P (parts.index)
11794 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11798 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11800 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11801 && parts.base != parts.index)
11804 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11805 since it's predecode logic can't detect the length of instructions
11806 and it degenerates to vector decoded. Increase cost of such
11807 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11808 to split such addresses or even refuse such addresses at all.
11810 Following addressing modes are affected:
11815 The first and last case may be avoidable by explicitly coding the zero in
11816 memory address, but I don't have AMD-K6 machine handy to check this
11820 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11821 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11822 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11828 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11829 this is used for to form addresses to local data when -fPIC is in
11833 darwin_local_data_pic (rtx disp)
11835 return (GET_CODE (disp) == UNSPEC
11836 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11839 /* Determine if a given RTX is a valid constant. We already know this
11840 satisfies CONSTANT_P. */
11843 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11845 switch (GET_CODE (x))
11850 if (GET_CODE (x) == PLUS)
11852 if (!CONST_INT_P (XEXP (x, 1)))
11857 if (TARGET_MACHO && darwin_local_data_pic (x))
11860 /* Only some unspecs are valid as "constants". */
11861 if (GET_CODE (x) == UNSPEC)
11862 switch (XINT (x, 1))
11865 case UNSPEC_GOTOFF:
11866 case UNSPEC_PLTOFF:
11867 return TARGET_64BIT;
11869 case UNSPEC_NTPOFF:
11870 x = XVECEXP (x, 0, 0);
11871 return (GET_CODE (x) == SYMBOL_REF
11872 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11873 case UNSPEC_DTPOFF:
11874 x = XVECEXP (x, 0, 0);
11875 return (GET_CODE (x) == SYMBOL_REF
11876 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11881 /* We must have drilled down to a symbol. */
11882 if (GET_CODE (x) == LABEL_REF)
11884 if (GET_CODE (x) != SYMBOL_REF)
11889 /* TLS symbols are never valid. */
11890 if (SYMBOL_REF_TLS_MODEL (x))
11893 /* DLLIMPORT symbols are never valid. */
11894 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11895 && SYMBOL_REF_DLLIMPORT_P (x))
11899 /* mdynamic-no-pic */
11900 if (MACHO_DYNAMIC_NO_PIC_P)
11901 return machopic_symbol_defined_p (x);
11906 if (GET_MODE (x) == TImode
11907 && x != CONST0_RTX (TImode)
11913 if (!standard_sse_constant_p (x))
11920 /* Otherwise we handle everything else in the move patterns. */
11924 /* Determine if it's legal to put X into the constant pool. This
11925 is not possible for the address of thread-local symbols, which
11926 is checked above. */
11929 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11931 /* We can always put integral constants and vectors in memory. */
11932 switch (GET_CODE (x))
11942 return !ix86_legitimate_constant_p (mode, x);
11946 /* Nonzero if the constant value X is a legitimate general operand
11947 when generating PIC code. It is given that flag_pic is on and
11948 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11951 legitimate_pic_operand_p (rtx x)
11955 switch (GET_CODE (x))
11958 inner = XEXP (x, 0);
11959 if (GET_CODE (inner) == PLUS
11960 && CONST_INT_P (XEXP (inner, 1)))
11961 inner = XEXP (inner, 0);
11963 /* Only some unspecs are valid as "constants". */
11964 if (GET_CODE (inner) == UNSPEC)
11965 switch (XINT (inner, 1))
11968 case UNSPEC_GOTOFF:
11969 case UNSPEC_PLTOFF:
11970 return TARGET_64BIT;
11972 x = XVECEXP (inner, 0, 0);
11973 return (GET_CODE (x) == SYMBOL_REF
11974 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11975 case UNSPEC_MACHOPIC_OFFSET:
11976 return legitimate_pic_address_disp_p (x);
11984 return legitimate_pic_address_disp_p (x);
11991 /* Determine if a given CONST RTX is a valid memory displacement
11995 legitimate_pic_address_disp_p (rtx disp)
11999 /* In 64bit mode we can allow direct addresses of symbols and labels
12000 when they are not dynamic symbols. */
12003 rtx op0 = disp, op1;
12005 switch (GET_CODE (disp))
12011 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12013 op0 = XEXP (XEXP (disp, 0), 0);
12014 op1 = XEXP (XEXP (disp, 0), 1);
12015 if (!CONST_INT_P (op1)
12016 || INTVAL (op1) >= 16*1024*1024
12017 || INTVAL (op1) < -16*1024*1024)
12019 if (GET_CODE (op0) == LABEL_REF)
12021 if (GET_CODE (op0) != SYMBOL_REF)
12026 /* TLS references should always be enclosed in UNSPEC. */
12027 if (SYMBOL_REF_TLS_MODEL (op0))
12029 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
12030 && ix86_cmodel != CM_LARGE_PIC)
12038 if (GET_CODE (disp) != CONST)
12040 disp = XEXP (disp, 0);
12044 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12045 of GOT tables. We should not need these anyway. */
12046 if (GET_CODE (disp) != UNSPEC
12047 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12048 && XINT (disp, 1) != UNSPEC_GOTOFF
12049 && XINT (disp, 1) != UNSPEC_PCREL
12050 && XINT (disp, 1) != UNSPEC_PLTOFF))
12053 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12054 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12060 if (GET_CODE (disp) == PLUS)
12062 if (!CONST_INT_P (XEXP (disp, 1)))
12064 disp = XEXP (disp, 0);
12068 if (TARGET_MACHO && darwin_local_data_pic (disp))
12071 if (GET_CODE (disp) != UNSPEC)
12074 switch (XINT (disp, 1))
12079 /* We need to check for both symbols and labels because VxWorks loads
12080 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12082 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12083 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12084 case UNSPEC_GOTOFF:
12085 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12086 While ABI specify also 32bit relocation but we don't produce it in
12087 small PIC model at all. */
12088 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12089 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12091 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12093 case UNSPEC_GOTTPOFF:
12094 case UNSPEC_GOTNTPOFF:
12095 case UNSPEC_INDNTPOFF:
12098 disp = XVECEXP (disp, 0, 0);
12099 return (GET_CODE (disp) == SYMBOL_REF
12100 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12101 case UNSPEC_NTPOFF:
12102 disp = XVECEXP (disp, 0, 0);
12103 return (GET_CODE (disp) == SYMBOL_REF
12104 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12105 case UNSPEC_DTPOFF:
12106 disp = XVECEXP (disp, 0, 0);
12107 return (GET_CODE (disp) == SYMBOL_REF
12108 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12114 /* Recognizes RTL expressions that are valid memory addresses for an
12115 instruction. The MODE argument is the machine mode for the MEM
12116 expression that wants to use this address.
12118 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12119 convert common non-canonical forms to canonical form so that they will
12123 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12124 rtx addr, bool strict)
12126 struct ix86_address parts;
12127 rtx base, index, disp;
12128 HOST_WIDE_INT scale;
12130 if (ix86_decompose_address (addr, &parts) <= 0)
12131 /* Decomposition failed. */
12135 index = parts.index;
12137 scale = parts.scale;
12139 /* Validate base register.
12141 Don't allow SUBREG's that span more than a word here. It can lead to spill
12142 failures when the base is one word out of a two word structure, which is
12143 represented internally as a DImode int. */
12151 else if (GET_CODE (base) == SUBREG
12152 && REG_P (SUBREG_REG (base))
12153 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
12155 reg = SUBREG_REG (base);
12157 /* Base is not a register. */
12160 if (GET_MODE (base) != Pmode)
12161 /* Base is not in Pmode. */
12164 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12165 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12166 /* Base is not valid. */
12170 /* Validate index register.
12172 Don't allow SUBREG's that span more than a word here -- same as above. */
12180 else if (GET_CODE (index) == SUBREG
12181 && REG_P (SUBREG_REG (index))
12182 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
12184 reg = SUBREG_REG (index);
12186 /* Index is not a register. */
12189 if (GET_MODE (index) != Pmode)
12190 /* Index is not in Pmode. */
12193 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12194 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12195 /* Index is not valid. */
12199 /* Validate scale factor. */
12203 /* Scale without index. */
12206 if (scale != 2 && scale != 4 && scale != 8)
12207 /* Scale is not a valid multiplier. */
12211 /* Validate displacement. */
12214 if (GET_CODE (disp) == CONST
12215 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12216 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12217 switch (XINT (XEXP (disp, 0), 1))
12219 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12220 used. While ABI specify also 32bit relocations, we don't produce
12221 them at all and use IP relative instead. */
12223 case UNSPEC_GOTOFF:
12224 gcc_assert (flag_pic);
12226 goto is_legitimate_pic;
12228 /* 64bit address unspec. */
12231 case UNSPEC_GOTPCREL:
12233 gcc_assert (flag_pic);
12234 goto is_legitimate_pic;
12236 case UNSPEC_GOTTPOFF:
12237 case UNSPEC_GOTNTPOFF:
12238 case UNSPEC_INDNTPOFF:
12239 case UNSPEC_NTPOFF:
12240 case UNSPEC_DTPOFF:
12243 case UNSPEC_STACK_CHECK:
12244 gcc_assert (flag_split_stack);
12248 /* Invalid address unspec. */
12252 else if (SYMBOLIC_CONST (disp)
12256 && MACHOPIC_INDIRECT
12257 && !machopic_operand_p (disp)
12263 if (TARGET_64BIT && (index || base))
12265 /* foo@dtpoff(%rX) is ok. */
12266 if (GET_CODE (disp) != CONST
12267 || GET_CODE (XEXP (disp, 0)) != PLUS
12268 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12269 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12270 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12271 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12272 /* Non-constant pic memory reference. */
12275 else if ((!TARGET_MACHO || flag_pic)
12276 && ! legitimate_pic_address_disp_p (disp))
12277 /* Displacement is an invalid pic construct. */
12280 else if (MACHO_DYNAMIC_NO_PIC_P
12281 && !ix86_legitimate_constant_p (Pmode, disp))
12282 /* displacment must be referenced via non_lazy_pointer */
12286 /* This code used to verify that a symbolic pic displacement
12287 includes the pic_offset_table_rtx register.
12289 While this is good idea, unfortunately these constructs may
12290 be created by "adds using lea" optimization for incorrect
12299 This code is nonsensical, but results in addressing
12300 GOT table with pic_offset_table_rtx base. We can't
12301 just refuse it easily, since it gets matched by
12302 "addsi3" pattern, that later gets split to lea in the
12303 case output register differs from input. While this
12304 can be handled by separate addsi pattern for this case
12305 that never results in lea, this seems to be easier and
12306 correct fix for crash to disable this test. */
12308 else if (GET_CODE (disp) != LABEL_REF
12309 && !CONST_INT_P (disp)
12310 && (GET_CODE (disp) != CONST
12311 || !ix86_legitimate_constant_p (Pmode, disp))
12312 && (GET_CODE (disp) != SYMBOL_REF
12313 || !ix86_legitimate_constant_p (Pmode, disp)))
12314 /* Displacement is not constant. */
12316 else if (TARGET_64BIT
12317 && !x86_64_immediate_operand (disp, VOIDmode))
12318 /* Displacement is out of range. */
12322 /* Everything looks valid. */
12326 /* Determine if a given RTX is a valid constant address. */
12329 constant_address_p (rtx x)
12331 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12334 /* Return a unique alias set for the GOT. */
12336 static alias_set_type
12337 ix86_GOT_alias_set (void)
12339 static alias_set_type set = -1;
12341 set = new_alias_set ();
12345 /* Return a legitimate reference for ORIG (an address) using the
12346 register REG. If REG is 0, a new pseudo is generated.
12348 There are two types of references that must be handled:
12350 1. Global data references must load the address from the GOT, via
12351 the PIC reg. An insn is emitted to do this load, and the reg is
12354 2. Static data references, constant pool addresses, and code labels
12355 compute the address as an offset from the GOT, whose base is in
12356 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12357 differentiate them from global data objects. The returned
12358 address is the PIC reg + an unspec constant.
12360 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12361 reg also appears in the address. */
12364 legitimize_pic_address (rtx orig, rtx reg)
12367 rtx new_rtx = orig;
12371 if (TARGET_MACHO && !TARGET_64BIT)
12374 reg = gen_reg_rtx (Pmode);
12375 /* Use the generic Mach-O PIC machinery. */
12376 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12380 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12382 else if (TARGET_64BIT
12383 && ix86_cmodel != CM_SMALL_PIC
12384 && gotoff_operand (addr, Pmode))
12387 /* This symbol may be referenced via a displacement from the PIC
12388 base address (@GOTOFF). */
12390 if (reload_in_progress)
12391 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12392 if (GET_CODE (addr) == CONST)
12393 addr = XEXP (addr, 0);
12394 if (GET_CODE (addr) == PLUS)
12396 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12398 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12401 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12402 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12404 tmpreg = gen_reg_rtx (Pmode);
12407 emit_move_insn (tmpreg, new_rtx);
12411 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12412 tmpreg, 1, OPTAB_DIRECT);
12415 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12417 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12419 /* This symbol may be referenced via a displacement from the PIC
12420 base address (@GOTOFF). */
12422 if (reload_in_progress)
12423 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12424 if (GET_CODE (addr) == CONST)
12425 addr = XEXP (addr, 0);
12426 if (GET_CODE (addr) == PLUS)
12428 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12430 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12433 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12434 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12435 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12439 emit_move_insn (reg, new_rtx);
12443 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12444 /* We can't use @GOTOFF for text labels on VxWorks;
12445 see gotoff_operand. */
12446 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12448 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12450 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12451 return legitimize_dllimport_symbol (addr, true);
12452 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12453 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12454 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12456 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12457 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12461 /* For x64 PE-COFF there is no GOT table. So we use address
12463 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12465 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12466 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12469 reg = gen_reg_rtx (Pmode);
12470 emit_move_insn (reg, new_rtx);
12473 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12475 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12476 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12477 new_rtx = gen_const_mem (Pmode, new_rtx);
12478 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12481 reg = gen_reg_rtx (Pmode);
12482 /* Use directly gen_movsi, otherwise the address is loaded
12483 into register for CSE. We don't want to CSE this addresses,
12484 instead we CSE addresses from the GOT table, so skip this. */
12485 emit_insn (gen_movsi (reg, new_rtx));
12490 /* This symbol must be referenced via a load from the
12491 Global Offset Table (@GOT). */
12493 if (reload_in_progress)
12494 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12495 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12496 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12498 new_rtx = force_reg (Pmode, new_rtx);
12499 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12500 new_rtx = gen_const_mem (Pmode, new_rtx);
12501 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12504 reg = gen_reg_rtx (Pmode);
12505 emit_move_insn (reg, new_rtx);
12511 if (CONST_INT_P (addr)
12512 && !x86_64_immediate_operand (addr, VOIDmode))
12516 emit_move_insn (reg, addr);
12520 new_rtx = force_reg (Pmode, addr);
12522 else if (GET_CODE (addr) == CONST)
12524 addr = XEXP (addr, 0);
12526 /* We must match stuff we generate before. Assume the only
12527 unspecs that can get here are ours. Not that we could do
12528 anything with them anyway.... */
12529 if (GET_CODE (addr) == UNSPEC
12530 || (GET_CODE (addr) == PLUS
12531 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12533 gcc_assert (GET_CODE (addr) == PLUS);
12535 if (GET_CODE (addr) == PLUS)
12537 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12539 /* Check first to see if this is a constant offset from a @GOTOFF
12540 symbol reference. */
12541 if (gotoff_operand (op0, Pmode)
12542 && CONST_INT_P (op1))
12546 if (reload_in_progress)
12547 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12548 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12550 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12551 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12552 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12556 emit_move_insn (reg, new_rtx);
12562 if (INTVAL (op1) < -16*1024*1024
12563 || INTVAL (op1) >= 16*1024*1024)
12565 if (!x86_64_immediate_operand (op1, Pmode))
12566 op1 = force_reg (Pmode, op1);
12567 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12573 base = legitimize_pic_address (XEXP (addr, 0), reg);
12574 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12575 base == reg ? NULL_RTX : reg);
12577 if (CONST_INT_P (new_rtx))
12578 new_rtx = plus_constant (base, INTVAL (new_rtx));
12581 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12583 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12584 new_rtx = XEXP (new_rtx, 1);
12586 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12594 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12597 get_thread_pointer (bool to_reg)
12601 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12605 reg = gen_reg_rtx (Pmode);
12606 insn = gen_rtx_SET (VOIDmode, reg, tp);
12607 insn = emit_insn (insn);
12612 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12614 static GTY(()) rtx ix86_tls_symbol;
12617 ix86_tls_get_addr (void)
12619 if (!ix86_tls_symbol)
12622 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12623 ? "___tls_get_addr" : "__tls_get_addr");
12625 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12628 return ix86_tls_symbol;
12631 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12633 static GTY(()) rtx ix86_tls_module_base_symbol;
12636 ix86_tls_module_base (void)
12638 if (!ix86_tls_module_base_symbol)
12640 ix86_tls_module_base_symbol
12641 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12643 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12644 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12647 return ix86_tls_module_base_symbol;
12650 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12651 false if we expect this to be used for a memory address and true if
12652 we expect to load the address into a register. */
12655 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12657 rtx dest, base, off;
12658 rtx pic = NULL_RTX, tp = NULL_RTX;
12663 case TLS_MODEL_GLOBAL_DYNAMIC:
12664 dest = gen_reg_rtx (Pmode);
12669 pic = pic_offset_table_rtx;
12672 pic = gen_reg_rtx (Pmode);
12673 emit_insn (gen_set_got (pic));
12677 if (TARGET_GNU2_TLS)
12680 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12682 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12684 tp = get_thread_pointer (true);
12685 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12687 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12691 rtx caddr = ix86_tls_get_addr ();
12695 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12698 emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
12699 insns = get_insns ();
12702 RTL_CONST_CALL_P (insns) = 1;
12703 emit_libcall_block (insns, dest, rax, x);
12706 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12710 case TLS_MODEL_LOCAL_DYNAMIC:
12711 base = gen_reg_rtx (Pmode);
12716 pic = pic_offset_table_rtx;
12719 pic = gen_reg_rtx (Pmode);
12720 emit_insn (gen_set_got (pic));
12724 if (TARGET_GNU2_TLS)
12726 rtx tmp = ix86_tls_module_base ();
12729 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
12731 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
12733 tp = get_thread_pointer (true);
12734 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12735 gen_rtx_MINUS (Pmode, tmp, tp));
12739 rtx caddr = ix86_tls_get_addr ();
12743 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
12746 emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
12747 insns = get_insns ();
12750 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
12751 share the LD_BASE result with other LD model accesses. */
12752 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12753 UNSPEC_TLS_LD_BASE);
12755 RTL_CONST_CALL_P (insns) = 1;
12756 emit_libcall_block (insns, base, rax, eqv);
12759 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
12762 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12763 off = gen_rtx_CONST (Pmode, off);
12765 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12767 if (TARGET_GNU2_TLS)
12769 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12771 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12775 case TLS_MODEL_INITIAL_EXEC:
12778 if (TARGET_SUN_TLS)
12780 /* The Sun linker took the AMD64 TLS spec literally
12781 and can only handle %rax as destination of the
12782 initial executable code sequence. */
12784 dest = gen_reg_rtx (Pmode);
12785 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12790 type = UNSPEC_GOTNTPOFF;
12794 if (reload_in_progress)
12795 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12796 pic = pic_offset_table_rtx;
12797 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12799 else if (!TARGET_ANY_GNU_TLS)
12801 pic = gen_reg_rtx (Pmode);
12802 emit_insn (gen_set_got (pic));
12803 type = UNSPEC_GOTTPOFF;
12808 type = UNSPEC_INDNTPOFF;
12811 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12812 off = gen_rtx_CONST (Pmode, off);
12814 off = gen_rtx_PLUS (Pmode, pic, off);
12815 off = gen_const_mem (Pmode, off);
12816 set_mem_alias_set (off, ix86_GOT_alias_set ());
12818 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12820 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12821 off = force_reg (Pmode, off);
12822 return gen_rtx_PLUS (Pmode, base, off);
12826 base = get_thread_pointer (true);
12827 dest = gen_reg_rtx (Pmode);
12828 emit_insn (gen_subsi3 (dest, base, off));
12832 case TLS_MODEL_LOCAL_EXEC:
12833 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12834 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12835 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12836 off = gen_rtx_CONST (Pmode, off);
12838 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12840 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12841 return gen_rtx_PLUS (Pmode, base, off);
12845 base = get_thread_pointer (true);
12846 dest = gen_reg_rtx (Pmode);
12847 emit_insn (gen_subsi3 (dest, base, off));
12852 gcc_unreachable ();
12858 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12861 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12862 htab_t dllimport_map;
12865 get_dllimport_decl (tree decl)
12867 struct tree_map *h, in;
12870 const char *prefix;
12871 size_t namelen, prefixlen;
12876 if (!dllimport_map)
12877 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12879 in.hash = htab_hash_pointer (decl);
12880 in.base.from = decl;
12881 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12882 h = (struct tree_map *) *loc;
12886 *loc = h = ggc_alloc_tree_map ();
12888 h->base.from = decl;
12889 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12890 VAR_DECL, NULL, ptr_type_node);
12891 DECL_ARTIFICIAL (to) = 1;
12892 DECL_IGNORED_P (to) = 1;
12893 DECL_EXTERNAL (to) = 1;
12894 TREE_READONLY (to) = 1;
12896 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12897 name = targetm.strip_name_encoding (name);
12898 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12899 ? "*__imp_" : "*__imp__";
12900 namelen = strlen (name);
12901 prefixlen = strlen (prefix);
12902 imp_name = (char *) alloca (namelen + prefixlen + 1);
12903 memcpy (imp_name, prefix, prefixlen);
12904 memcpy (imp_name + prefixlen, name, namelen + 1);
12906 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12907 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12908 SET_SYMBOL_REF_DECL (rtl, to);
12909 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12911 rtl = gen_const_mem (Pmode, rtl);
12912 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12914 SET_DECL_RTL (to, rtl);
12915 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12920 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12921 true if we require the result be a register. */
12924 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12929 gcc_assert (SYMBOL_REF_DECL (symbol));
12930 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12932 x = DECL_RTL (imp_decl);
12934 x = force_reg (Pmode, x);
12938 /* Try machine-dependent ways of modifying an illegitimate address
12939 to be legitimate. If we find one, return the new, valid address.
12940 This macro is used in only one place: `memory_address' in explow.c.
12942 OLDX is the address as it was before break_out_memory_refs was called.
12943 In some cases it is useful to look at this to decide what needs to be done.
12945 It is always safe for this macro to do nothing. It exists to recognize
12946 opportunities to optimize the output.
12948 For the 80386, we handle X+REG by loading X into a register R and
12949 using R+REG. R will go in a general reg and indexing will be used.
12950 However, if REG is a broken-out memory address or multiplication,
12951 nothing needs to be done because REG can certainly go in a general reg.
12953 When -fpic is used, special handling is needed for symbolic references.
12954 See comments by legitimize_pic_address in i386.c for details. */
12957 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12958 enum machine_mode mode)
12963 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12965 return legitimize_tls_address (x, (enum tls_model) log, false);
12966 if (GET_CODE (x) == CONST
12967 && GET_CODE (XEXP (x, 0)) == PLUS
12968 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12969 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12971 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12972 (enum tls_model) log, false);
12973 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12976 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12978 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12979 return legitimize_dllimport_symbol (x, true);
12980 if (GET_CODE (x) == CONST
12981 && GET_CODE (XEXP (x, 0)) == PLUS
12982 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12983 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12985 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12986 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12990 if (flag_pic && SYMBOLIC_CONST (x))
12991 return legitimize_pic_address (x, 0);
12994 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12995 return machopic_indirect_data_reference (x, 0);
12998 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12999 if (GET_CODE (x) == ASHIFT
13000 && CONST_INT_P (XEXP (x, 1))
13001 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13004 log = INTVAL (XEXP (x, 1));
13005 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13006 GEN_INT (1 << log));
13009 if (GET_CODE (x) == PLUS)
13011 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13013 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13014 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13015 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13018 log = INTVAL (XEXP (XEXP (x, 0), 1));
13019 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13020 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13021 GEN_INT (1 << log));
13024 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13025 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13026 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13029 log = INTVAL (XEXP (XEXP (x, 1), 1));
13030 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13031 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13032 GEN_INT (1 << log));
13035 /* Put multiply first if it isn't already. */
13036 if (GET_CODE (XEXP (x, 1)) == MULT)
13038 rtx tmp = XEXP (x, 0);
13039 XEXP (x, 0) = XEXP (x, 1);
13044 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13045 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13046 created by virtual register instantiation, register elimination, and
13047 similar optimizations. */
13048 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13051 x = gen_rtx_PLUS (Pmode,
13052 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13053 XEXP (XEXP (x, 1), 0)),
13054 XEXP (XEXP (x, 1), 1));
13058 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13059 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13060 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13061 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13062 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13063 && CONSTANT_P (XEXP (x, 1)))
13066 rtx other = NULL_RTX;
13068 if (CONST_INT_P (XEXP (x, 1)))
13070 constant = XEXP (x, 1);
13071 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13073 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13075 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13076 other = XEXP (x, 1);
13084 x = gen_rtx_PLUS (Pmode,
13085 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13086 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13087 plus_constant (other, INTVAL (constant)));
13091 if (changed && ix86_legitimate_address_p (mode, x, false))
13094 if (GET_CODE (XEXP (x, 0)) == MULT)
13097 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13100 if (GET_CODE (XEXP (x, 1)) == MULT)
13103 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13107 && REG_P (XEXP (x, 1))
13108 && REG_P (XEXP (x, 0)))
13111 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13114 x = legitimize_pic_address (x, 0);
13117 if (changed && ix86_legitimate_address_p (mode, x, false))
13120 if (REG_P (XEXP (x, 0)))
13122 rtx temp = gen_reg_rtx (Pmode);
13123 rtx val = force_operand (XEXP (x, 1), temp);
13125 emit_move_insn (temp, val);
13127 XEXP (x, 1) = temp;
13131 else if (REG_P (XEXP (x, 1)))
13133 rtx temp = gen_reg_rtx (Pmode);
13134 rtx val = force_operand (XEXP (x, 0), temp);
13136 emit_move_insn (temp, val);
13138 XEXP (x, 0) = temp;
13146 /* Print an integer constant expression in assembler syntax. Addition
13147 and subtraction are the only arithmetic that may appear in these
13148 expressions. FILE is the stdio stream to write to, X is the rtx, and
13149 CODE is the operand print code from the output string. */
13152 output_pic_addr_const (FILE *file, rtx x, int code)
13156 switch (GET_CODE (x))
13159 gcc_assert (flag_pic);
13164 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13165 output_addr_const (file, x);
13168 const char *name = XSTR (x, 0);
13170 /* Mark the decl as referenced so that cgraph will
13171 output the function. */
13172 if (SYMBOL_REF_DECL (x))
13173 mark_decl_referenced (SYMBOL_REF_DECL (x));
13176 if (MACHOPIC_INDIRECT
13177 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13178 name = machopic_indirection_name (x, /*stub_p=*/true);
13180 assemble_name (file, name);
13182 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
13183 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13184 fputs ("@PLT", file);
13191 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13192 assemble_name (asm_out_file, buf);
13196 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13200 /* This used to output parentheses around the expression,
13201 but that does not work on the 386 (either ATT or BSD assembler). */
13202 output_pic_addr_const (file, XEXP (x, 0), code);
13206 if (GET_MODE (x) == VOIDmode)
13208 /* We can use %d if the number is <32 bits and positive. */
13209 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13210 fprintf (file, "0x%lx%08lx",
13211 (unsigned long) CONST_DOUBLE_HIGH (x),
13212 (unsigned long) CONST_DOUBLE_LOW (x));
13214 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13217 /* We can't handle floating point constants;
13218 TARGET_PRINT_OPERAND must handle them. */
13219 output_operand_lossage ("floating constant misused");
13223 /* Some assemblers need integer constants to appear first. */
13224 if (CONST_INT_P (XEXP (x, 0)))
13226 output_pic_addr_const (file, XEXP (x, 0), code);
13228 output_pic_addr_const (file, XEXP (x, 1), code);
13232 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13233 output_pic_addr_const (file, XEXP (x, 1), code);
13235 output_pic_addr_const (file, XEXP (x, 0), code);
13241 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13242 output_pic_addr_const (file, XEXP (x, 0), code);
13244 output_pic_addr_const (file, XEXP (x, 1), code);
13246 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13250 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13252 bool f = i386_asm_output_addr_const_extra (file, x);
13257 gcc_assert (XVECLEN (x, 0) == 1);
13258 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13259 switch (XINT (x, 1))
13262 fputs ("@GOT", file);
13264 case UNSPEC_GOTOFF:
13265 fputs ("@GOTOFF", file);
13267 case UNSPEC_PLTOFF:
13268 fputs ("@PLTOFF", file);
13271 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13272 "(%rip)" : "[rip]", file);
13274 case UNSPEC_GOTPCREL:
13275 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13276 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13278 case UNSPEC_GOTTPOFF:
13279 /* FIXME: This might be @TPOFF in Sun ld too. */
13280 fputs ("@gottpoff", file);
13283 fputs ("@tpoff", file);
13285 case UNSPEC_NTPOFF:
13287 fputs ("@tpoff", file);
13289 fputs ("@ntpoff", file);
13291 case UNSPEC_DTPOFF:
13292 fputs ("@dtpoff", file);
13294 case UNSPEC_GOTNTPOFF:
13296 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13297 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13299 fputs ("@gotntpoff", file);
13301 case UNSPEC_INDNTPOFF:
13302 fputs ("@indntpoff", file);
13305 case UNSPEC_MACHOPIC_OFFSET:
13307 machopic_output_function_base_name (file);
13311 output_operand_lossage ("invalid UNSPEC as operand");
13317 output_operand_lossage ("invalid expression as operand");
13321 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13322 We need to emit DTP-relative relocations. */
13324 static void ATTRIBUTE_UNUSED
13325 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13327 fputs (ASM_LONG, file);
13328 output_addr_const (file, x);
13329 fputs ("@dtpoff", file);
13335 fputs (", 0", file);
13338 gcc_unreachable ();
13342 /* Return true if X is a representation of the PIC register. This copes
13343 with calls from ix86_find_base_term, where the register might have
13344 been replaced by a cselib value. */
13347 ix86_pic_register_p (rtx x)
13349 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13350 return (pic_offset_table_rtx
13351 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13353 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13356 /* Helper function for ix86_delegitimize_address.
13357 Attempt to delegitimize TLS local-exec accesses. */
13360 ix86_delegitimize_tls_address (rtx orig_x)
13362 rtx x = orig_x, unspec;
13363 struct ix86_address addr;
13365 if (!TARGET_TLS_DIRECT_SEG_REFS)
13369 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13371 if (ix86_decompose_address (x, &addr) == 0
13372 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13373 || addr.disp == NULL_RTX
13374 || GET_CODE (addr.disp) != CONST)
13376 unspec = XEXP (addr.disp, 0);
13377 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13378 unspec = XEXP (unspec, 0);
13379 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13381 x = XVECEXP (unspec, 0, 0);
13382 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13383 if (unspec != XEXP (addr.disp, 0))
13384 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13387 rtx idx = addr.index;
13388 if (addr.scale != 1)
13389 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13390 x = gen_rtx_PLUS (Pmode, idx, x);
13393 x = gen_rtx_PLUS (Pmode, addr.base, x);
13394 if (MEM_P (orig_x))
13395 x = replace_equiv_address_nv (orig_x, x);
13399 /* In the name of slightly smaller debug output, and to cater to
13400 general assembler lossage, recognize PIC+GOTOFF and turn it back
13401 into a direct symbol reference.
13403 On Darwin, this is necessary to avoid a crash, because Darwin
13404 has a different PIC label for each routine but the DWARF debugging
13405 information is not associated with any particular routine, so it's
13406 necessary to remove references to the PIC label from RTL stored by
13407 the DWARF output code. */
13410 ix86_delegitimize_address (rtx x)
13412 rtx orig_x = delegitimize_mem_from_attrs (x);
13413 /* addend is NULL or some rtx if x is something+GOTOFF where
13414 something doesn't include the PIC register. */
13415 rtx addend = NULL_RTX;
13416 /* reg_addend is NULL or a multiple of some register. */
13417 rtx reg_addend = NULL_RTX;
13418 /* const_addend is NULL or a const_int. */
13419 rtx const_addend = NULL_RTX;
13420 /* This is the result, or NULL. */
13421 rtx result = NULL_RTX;
13430 if (GET_CODE (x) != CONST
13431 || GET_CODE (XEXP (x, 0)) != UNSPEC
13432 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13433 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13434 || !MEM_P (orig_x))
13435 return ix86_delegitimize_tls_address (orig_x);
13436 x = XVECEXP (XEXP (x, 0), 0, 0);
13437 if (GET_MODE (orig_x) != Pmode)
13439 x = simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
13446 if (GET_CODE (x) != PLUS
13447 || GET_CODE (XEXP (x, 1)) != CONST)
13448 return ix86_delegitimize_tls_address (orig_x);
13450 if (ix86_pic_register_p (XEXP (x, 0)))
13451 /* %ebx + GOT/GOTOFF */
13453 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13455 /* %ebx + %reg * scale + GOT/GOTOFF */
13456 reg_addend = XEXP (x, 0);
13457 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13458 reg_addend = XEXP (reg_addend, 1);
13459 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13460 reg_addend = XEXP (reg_addend, 0);
13463 reg_addend = NULL_RTX;
13464 addend = XEXP (x, 0);
13468 addend = XEXP (x, 0);
13470 x = XEXP (XEXP (x, 1), 0);
13471 if (GET_CODE (x) == PLUS
13472 && CONST_INT_P (XEXP (x, 1)))
13474 const_addend = XEXP (x, 1);
13478 if (GET_CODE (x) == UNSPEC
13479 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13480 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13481 result = XVECEXP (x, 0, 0);
13483 if (TARGET_MACHO && darwin_local_data_pic (x)
13484 && !MEM_P (orig_x))
13485 result = XVECEXP (x, 0, 0);
13488 return ix86_delegitimize_tls_address (orig_x);
13491 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13493 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13496 /* If the rest of original X doesn't involve the PIC register, add
13497 addend and subtract pic_offset_table_rtx. This can happen e.g.
13499 leal (%ebx, %ecx, 4), %ecx
13501 movl foo@GOTOFF(%ecx), %edx
13502 in which case we return (%ecx - %ebx) + foo. */
13503 if (pic_offset_table_rtx)
13504 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13505 pic_offset_table_rtx),
13510 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13512 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13513 if (result == NULL_RTX)
13519 /* If X is a machine specific address (i.e. a symbol or label being
13520 referenced as a displacement from the GOT implemented using an
13521 UNSPEC), then return the base term. Otherwise return X. */
13524 ix86_find_base_term (rtx x)
13530 if (GET_CODE (x) != CONST)
13532 term = XEXP (x, 0);
13533 if (GET_CODE (term) == PLUS
13534 && (CONST_INT_P (XEXP (term, 1))
13535 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13536 term = XEXP (term, 0);
13537 if (GET_CODE (term) != UNSPEC
13538 || (XINT (term, 1) != UNSPEC_GOTPCREL
13539 && XINT (term, 1) != UNSPEC_PCREL))
13542 return XVECEXP (term, 0, 0);
13545 return ix86_delegitimize_address (x);
13549 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13550 int fp, FILE *file)
13552 const char *suffix;
13554 if (mode == CCFPmode || mode == CCFPUmode)
13556 code = ix86_fp_compare_code_to_integer (code);
13560 code = reverse_condition (code);
13611 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13615 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13616 Those same assemblers have the same but opposite lossage on cmov. */
13617 if (mode == CCmode)
13618 suffix = fp ? "nbe" : "a";
13619 else if (mode == CCCmode)
13622 gcc_unreachable ();
13638 gcc_unreachable ();
13642 gcc_assert (mode == CCmode || mode == CCCmode);
13659 gcc_unreachable ();
13663 /* ??? As above. */
13664 gcc_assert (mode == CCmode || mode == CCCmode);
13665 suffix = fp ? "nb" : "ae";
13668 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13672 /* ??? As above. */
13673 if (mode == CCmode)
13675 else if (mode == CCCmode)
13676 suffix = fp ? "nb" : "ae";
13678 gcc_unreachable ();
13681 suffix = fp ? "u" : "p";
13684 suffix = fp ? "nu" : "np";
13687 gcc_unreachable ();
13689 fputs (suffix, file);
13692 /* Print the name of register X to FILE based on its machine mode and number.
13693 If CODE is 'w', pretend the mode is HImode.
13694 If CODE is 'b', pretend the mode is QImode.
13695 If CODE is 'k', pretend the mode is SImode.
13696 If CODE is 'q', pretend the mode is DImode.
13697 If CODE is 'x', pretend the mode is V4SFmode.
13698 If CODE is 't', pretend the mode is V8SFmode.
13699 If CODE is 'h', pretend the reg is the 'high' byte register.
13700 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13701 If CODE is 'd', duplicate the operand for AVX instruction.
13705 print_reg (rtx x, int code, FILE *file)
13708 bool duplicated = code == 'd' && TARGET_AVX;
13710 gcc_assert (x == pc_rtx
13711 || (REGNO (x) != ARG_POINTER_REGNUM
13712 && REGNO (x) != FRAME_POINTER_REGNUM
13713 && REGNO (x) != FLAGS_REG
13714 && REGNO (x) != FPSR_REG
13715 && REGNO (x) != FPCR_REG));
13717 if (ASSEMBLER_DIALECT == ASM_ATT)
13722 gcc_assert (TARGET_64BIT);
13723 fputs ("rip", file);
13727 if (code == 'w' || MMX_REG_P (x))
13729 else if (code == 'b')
13731 else if (code == 'k')
13733 else if (code == 'q')
13735 else if (code == 'y')
13737 else if (code == 'h')
13739 else if (code == 'x')
13741 else if (code == 't')
13744 code = GET_MODE_SIZE (GET_MODE (x));
13746 /* Irritatingly, AMD extended registers use different naming convention
13747 from the normal registers. */
13748 if (REX_INT_REG_P (x))
13750 gcc_assert (TARGET_64BIT);
13754 error ("extended registers have no high halves");
13757 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13760 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13763 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13766 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13769 error ("unsupported operand size for extended register");
13779 if (STACK_TOP_P (x))
13788 if (! ANY_FP_REG_P (x))
13789 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13794 reg = hi_reg_name[REGNO (x)];
13797 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13799 reg = qi_reg_name[REGNO (x)];
13802 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13804 reg = qi_high_reg_name[REGNO (x)];
13809 gcc_assert (!duplicated);
13811 fputs (hi_reg_name[REGNO (x)] + 1, file);
13816 gcc_unreachable ();
13822 if (ASSEMBLER_DIALECT == ASM_ATT)
13823 fprintf (file, ", %%%s", reg);
13825 fprintf (file, ", %s", reg);
13829 /* Locate some local-dynamic symbol still in use by this function
13830 so that we can print its name in some tls_local_dynamic_base
13834 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13838 if (GET_CODE (x) == SYMBOL_REF
13839 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13841 cfun->machine->some_ld_name = XSTR (x, 0);
13848 static const char *
13849 get_some_local_dynamic_name (void)
13853 if (cfun->machine->some_ld_name)
13854 return cfun->machine->some_ld_name;
13856 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13857 if (NONDEBUG_INSN_P (insn)
13858 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13859 return cfun->machine->some_ld_name;
13864 /* Meaning of CODE:
13865 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13866 C -- print opcode suffix for set/cmov insn.
13867 c -- like C, but print reversed condition
13868 F,f -- likewise, but for floating-point.
13869 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13871 R -- print the prefix for register names.
13872 z -- print the opcode suffix for the size of the current operand.
13873 Z -- likewise, with special suffixes for x87 instructions.
13874 * -- print a star (in certain assembler syntax)
13875 A -- print an absolute memory reference.
13876 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13877 s -- print a shift double count, followed by the assemblers argument
13879 b -- print the QImode name of the register for the indicated operand.
13880 %b0 would print %al if operands[0] is reg 0.
13881 w -- likewise, print the HImode name of the register.
13882 k -- likewise, print the SImode name of the register.
13883 q -- likewise, print the DImode name of the register.
13884 x -- likewise, print the V4SFmode name of the register.
13885 t -- likewise, print the V8SFmode name of the register.
13886 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13887 y -- print "st(0)" instead of "st" as a register.
13888 d -- print duplicated register operand for AVX instruction.
13889 D -- print condition for SSE cmp instruction.
13890 P -- if PIC, print an @PLT suffix.
13891 X -- don't print any sort of PIC '@' suffix for a symbol.
13892 & -- print some in-use local-dynamic symbol name.
13893 H -- print a memory address offset by 8; used for sse high-parts
13894 Y -- print condition for XOP pcom* instruction.
13895 + -- print a branch hint as 'cs' or 'ds' prefix
13896 ; -- print a semicolon (after prefixes due to bug in older gas).
13897 @ -- print a segment register of thread base pointer load
13901 ix86_print_operand (FILE *file, rtx x, int code)
13908 if (ASSEMBLER_DIALECT == ASM_ATT)
13914 const char *name = get_some_local_dynamic_name ();
13916 output_operand_lossage ("'%%&' used without any "
13917 "local dynamic TLS references");
13919 assemble_name (file, name);
13924 switch (ASSEMBLER_DIALECT)
13931 /* Intel syntax. For absolute addresses, registers should not
13932 be surrounded by braces. */
13936 ix86_print_operand (file, x, 0);
13943 gcc_unreachable ();
13946 ix86_print_operand (file, x, 0);
13951 if (ASSEMBLER_DIALECT == ASM_ATT)
13956 if (ASSEMBLER_DIALECT == ASM_ATT)
13961 if (ASSEMBLER_DIALECT == ASM_ATT)
13966 if (ASSEMBLER_DIALECT == ASM_ATT)
13971 if (ASSEMBLER_DIALECT == ASM_ATT)
13976 if (ASSEMBLER_DIALECT == ASM_ATT)
13981 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13983 /* Opcodes don't get size suffixes if using Intel opcodes. */
13984 if (ASSEMBLER_DIALECT == ASM_INTEL)
13987 switch (GET_MODE_SIZE (GET_MODE (x)))
14006 output_operand_lossage
14007 ("invalid operand size for operand code '%c'", code);
14012 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14014 (0, "non-integer operand used with operand code '%c'", code);
14018 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14019 if (ASSEMBLER_DIALECT == ASM_INTEL)
14022 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14024 switch (GET_MODE_SIZE (GET_MODE (x)))
14027 #ifdef HAVE_AS_IX86_FILDS
14037 #ifdef HAVE_AS_IX86_FILDQ
14040 fputs ("ll", file);
14048 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14050 /* 387 opcodes don't get size suffixes
14051 if the operands are registers. */
14052 if (STACK_REG_P (x))
14055 switch (GET_MODE_SIZE (GET_MODE (x)))
14076 output_operand_lossage
14077 ("invalid operand type used with operand code '%c'", code);
14081 output_operand_lossage
14082 ("invalid operand size for operand code '%c'", code);
14099 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14101 ix86_print_operand (file, x, 0);
14102 fputs (", ", file);
14107 /* Little bit of braindamage here. The SSE compare instructions
14108 does use completely different names for the comparisons that the
14109 fp conditional moves. */
14112 switch (GET_CODE (x))
14115 fputs ("eq", file);
14118 fputs ("eq_us", file);
14121 fputs ("lt", file);
14124 fputs ("nge", file);
14127 fputs ("le", file);
14130 fputs ("ngt", file);
14133 fputs ("unord", file);
14136 fputs ("neq", file);
14139 fputs ("neq_oq", file);
14142 fputs ("ge", file);
14145 fputs ("nlt", file);
14148 fputs ("gt", file);
14151 fputs ("nle", file);
14154 fputs ("ord", file);
14157 output_operand_lossage ("operand is not a condition code, "
14158 "invalid operand code 'D'");
14164 switch (GET_CODE (x))
14168 fputs ("eq", file);
14172 fputs ("lt", file);
14176 fputs ("le", file);
14179 fputs ("unord", file);
14183 fputs ("neq", file);
14187 fputs ("nlt", file);
14191 fputs ("nle", file);
14194 fputs ("ord", file);
14197 output_operand_lossage ("operand is not a condition code, "
14198 "invalid operand code 'D'");
14204 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14205 if (ASSEMBLER_DIALECT == ASM_ATT)
14207 switch (GET_MODE (x))
14209 case HImode: putc ('w', file); break;
14211 case SFmode: putc ('l', file); break;
14213 case DFmode: putc ('q', file); break;
14214 default: gcc_unreachable ();
14221 if (!COMPARISON_P (x))
14223 output_operand_lossage ("operand is neither a constant nor a "
14224 "condition code, invalid operand code "
14228 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
14231 if (!COMPARISON_P (x))
14233 output_operand_lossage ("operand is neither a constant nor a "
14234 "condition code, invalid operand code "
14238 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14239 if (ASSEMBLER_DIALECT == ASM_ATT)
14242 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
14245 /* Like above, but reverse condition */
14247 /* Check to see if argument to %c is really a constant
14248 and not a condition code which needs to be reversed. */
14249 if (!COMPARISON_P (x))
14251 output_operand_lossage ("operand is neither a constant nor a "
14252 "condition code, invalid operand "
14256 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
14259 if (!COMPARISON_P (x))
14261 output_operand_lossage ("operand is neither a constant nor a "
14262 "condition code, invalid operand "
14266 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14267 if (ASSEMBLER_DIALECT == ASM_ATT)
14270 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
14274 /* It doesn't actually matter what mode we use here, as we're
14275 only going to use this for printing. */
14276 x = adjust_address_nv (x, DImode, 8);
14284 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
14287 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14290 int pred_val = INTVAL (XEXP (x, 0));
14292 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14293 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14295 int taken = pred_val > REG_BR_PROB_BASE / 2;
14296 int cputaken = final_forward_branch_p (current_output_insn) == 0;
14298 /* Emit hints only in the case default branch prediction
14299 heuristics would fail. */
14300 if (taken != cputaken)
14302 /* We use 3e (DS) prefix for taken branches and
14303 2e (CS) prefix for not taken branches. */
14305 fputs ("ds ; ", file);
14307 fputs ("cs ; ", file);
14315 switch (GET_CODE (x))
14318 fputs ("neq", file);
14321 fputs ("eq", file);
14325 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14329 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14333 fputs ("le", file);
14337 fputs ("lt", file);
14340 fputs ("unord", file);
14343 fputs ("ord", file);
14346 fputs ("ueq", file);
14349 fputs ("nlt", file);
14352 fputs ("nle", file);
14355 fputs ("ule", file);
14358 fputs ("ult", file);
14361 fputs ("une", file);
14364 output_operand_lossage ("operand is not a condition code, "
14365 "invalid operand code 'Y'");
14371 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14377 if (ASSEMBLER_DIALECT == ASM_ATT)
14380 /* The kernel uses a different segment register for performance
14381 reasons; a system call would not have to trash the userspace
14382 segment register, which would be expensive. */
14383 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14384 fputs ("fs", file);
14386 fputs ("gs", file);
14390 output_operand_lossage ("invalid operand code '%c'", code);
14395 print_reg (x, code, file);
14397 else if (MEM_P (x))
14399 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14400 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14401 && GET_MODE (x) != BLKmode)
14404 switch (GET_MODE_SIZE (GET_MODE (x)))
14406 case 1: size = "BYTE"; break;
14407 case 2: size = "WORD"; break;
14408 case 4: size = "DWORD"; break;
14409 case 8: size = "QWORD"; break;
14410 case 12: size = "TBYTE"; break;
14412 if (GET_MODE (x) == XFmode)
14417 case 32: size = "YMMWORD"; break;
14419 gcc_unreachable ();
14422 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14425 else if (code == 'w')
14427 else if (code == 'k')
14430 fputs (size, file);
14431 fputs (" PTR ", file);
14435 /* Avoid (%rip) for call operands. */
14436 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14437 && !CONST_INT_P (x))
14438 output_addr_const (file, x);
14439 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14440 output_operand_lossage ("invalid constraints for operand");
14442 output_address (x);
14445 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14450 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14451 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14453 if (ASSEMBLER_DIALECT == ASM_ATT)
14455 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14457 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14459 fprintf (file, "0x%08x", (unsigned int) l);
14462 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14467 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14468 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14470 if (ASSEMBLER_DIALECT == ASM_ATT)
14472 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14475 /* These float cases don't actually occur as immediate operands. */
14476 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14480 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14481 fputs (dstr, file);
14486 /* We have patterns that allow zero sets of memory, for instance.
14487 In 64-bit mode, we should probably support all 8-byte vectors,
14488 since we can in fact encode that into an immediate. */
14489 if (GET_CODE (x) == CONST_VECTOR)
14491 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14497 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14499 if (ASSEMBLER_DIALECT == ASM_ATT)
14502 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14503 || GET_CODE (x) == LABEL_REF)
14505 if (ASSEMBLER_DIALECT == ASM_ATT)
14508 fputs ("OFFSET FLAT:", file);
14511 if (CONST_INT_P (x))
14512 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14513 else if (flag_pic || MACHOPIC_INDIRECT)
14514 output_pic_addr_const (file, x, code);
14516 output_addr_const (file, x);
14521 ix86_print_operand_punct_valid_p (unsigned char code)
14523 return (code == '@' || code == '*' || code == '+'
14524 || code == '&' || code == ';');
14527 /* Print a memory operand whose address is ADDR. */
14530 ix86_print_operand_address (FILE *file, rtx addr)
14532 struct ix86_address parts;
14533 rtx base, index, disp;
14535 int ok = ix86_decompose_address (addr, &parts);
14540 index = parts.index;
14542 scale = parts.scale;
14550 if (ASSEMBLER_DIALECT == ASM_ATT)
14552 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14555 gcc_unreachable ();
14558 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14559 if (TARGET_64BIT && !base && !index)
14563 if (GET_CODE (disp) == CONST
14564 && GET_CODE (XEXP (disp, 0)) == PLUS
14565 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14566 symbol = XEXP (XEXP (disp, 0), 0);
14568 if (GET_CODE (symbol) == LABEL_REF
14569 || (GET_CODE (symbol) == SYMBOL_REF
14570 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14573 if (!base && !index)
14575 /* Displacement only requires special attention. */
14577 if (CONST_INT_P (disp))
14579 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14580 fputs ("ds:", file);
14581 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14584 output_pic_addr_const (file, disp, 0);
14586 output_addr_const (file, disp);
14590 if (ASSEMBLER_DIALECT == ASM_ATT)
14595 output_pic_addr_const (file, disp, 0);
14596 else if (GET_CODE (disp) == LABEL_REF)
14597 output_asm_label (disp);
14599 output_addr_const (file, disp);
14604 print_reg (base, 0, file);
14608 print_reg (index, 0, file);
14610 fprintf (file, ",%d", scale);
14616 rtx offset = NULL_RTX;
14620 /* Pull out the offset of a symbol; print any symbol itself. */
14621 if (GET_CODE (disp) == CONST
14622 && GET_CODE (XEXP (disp, 0)) == PLUS
14623 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14625 offset = XEXP (XEXP (disp, 0), 1);
14626 disp = gen_rtx_CONST (VOIDmode,
14627 XEXP (XEXP (disp, 0), 0));
14631 output_pic_addr_const (file, disp, 0);
14632 else if (GET_CODE (disp) == LABEL_REF)
14633 output_asm_label (disp);
14634 else if (CONST_INT_P (disp))
14637 output_addr_const (file, disp);
14643 print_reg (base, 0, file);
14646 if (INTVAL (offset) >= 0)
14648 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14652 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14659 print_reg (index, 0, file);
14661 fprintf (file, "*%d", scale);
14668 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14671 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14675 if (GET_CODE (x) != UNSPEC)
14678 op = XVECEXP (x, 0, 0);
14679 switch (XINT (x, 1))
14681 case UNSPEC_GOTTPOFF:
14682 output_addr_const (file, op);
14683 /* FIXME: This might be @TPOFF in Sun ld. */
14684 fputs ("@gottpoff", file);
14687 output_addr_const (file, op);
14688 fputs ("@tpoff", file);
14690 case UNSPEC_NTPOFF:
14691 output_addr_const (file, op);
14693 fputs ("@tpoff", file);
14695 fputs ("@ntpoff", file);
14697 case UNSPEC_DTPOFF:
14698 output_addr_const (file, op);
14699 fputs ("@dtpoff", file);
14701 case UNSPEC_GOTNTPOFF:
14702 output_addr_const (file, op);
14704 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14705 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14707 fputs ("@gotntpoff", file);
14709 case UNSPEC_INDNTPOFF:
14710 output_addr_const (file, op);
14711 fputs ("@indntpoff", file);
14714 case UNSPEC_MACHOPIC_OFFSET:
14715 output_addr_const (file, op);
14717 machopic_output_function_base_name (file);
14721 case UNSPEC_STACK_CHECK:
14725 gcc_assert (flag_split_stack);
14727 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14728 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14730 gcc_unreachable ();
14733 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14744 /* Split one or more double-mode RTL references into pairs of half-mode
14745 references. The RTL can be REG, offsettable MEM, integer constant, or
14746 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14747 split and "num" is its length. lo_half and hi_half are output arrays
14748 that parallel "operands". */
14751 split_double_mode (enum machine_mode mode, rtx operands[],
14752 int num, rtx lo_half[], rtx hi_half[])
14754 enum machine_mode half_mode;
14760 half_mode = DImode;
14763 half_mode = SImode;
14766 gcc_unreachable ();
14769 byte = GET_MODE_SIZE (half_mode);
14773 rtx op = operands[num];
14775 /* simplify_subreg refuse to split volatile memory addresses,
14776 but we still have to handle it. */
14779 lo_half[num] = adjust_address (op, half_mode, 0);
14780 hi_half[num] = adjust_address (op, half_mode, byte);
14784 lo_half[num] = simplify_gen_subreg (half_mode, op,
14785 GET_MODE (op) == VOIDmode
14786 ? mode : GET_MODE (op), 0);
14787 hi_half[num] = simplify_gen_subreg (half_mode, op,
14788 GET_MODE (op) == VOIDmode
14789 ? mode : GET_MODE (op), byte);
14794 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14795 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14796 is the expression of the binary operation. The output may either be
14797 emitted here, or returned to the caller, like all output_* functions.
14799 There is no guarantee that the operands are the same mode, as they
14800 might be within FLOAT or FLOAT_EXTEND expressions. */
14802 #ifndef SYSV386_COMPAT
14803 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14804 wants to fix the assemblers because that causes incompatibility
14805 with gcc. No-one wants to fix gcc because that causes
14806 incompatibility with assemblers... You can use the option of
14807 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14808 #define SYSV386_COMPAT 1
14812 output_387_binary_op (rtx insn, rtx *operands)
14814 static char buf[40];
14817 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14819 #ifdef ENABLE_CHECKING
14820 /* Even if we do not want to check the inputs, this documents input
14821 constraints. Which helps in understanding the following code. */
14822 if (STACK_REG_P (operands[0])
14823 && ((REG_P (operands[1])
14824 && REGNO (operands[0]) == REGNO (operands[1])
14825 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14826 || (REG_P (operands[2])
14827 && REGNO (operands[0]) == REGNO (operands[2])
14828 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14829 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14832 gcc_assert (is_sse);
14835 switch (GET_CODE (operands[3]))
14838 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14839 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14847 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14848 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14856 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14857 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14865 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14866 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14874 gcc_unreachable ();
14881 strcpy (buf, ssep);
14882 if (GET_MODE (operands[0]) == SFmode)
14883 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14885 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14889 strcpy (buf, ssep + 1);
14890 if (GET_MODE (operands[0]) == SFmode)
14891 strcat (buf, "ss\t{%2, %0|%0, %2}");
14893 strcat (buf, "sd\t{%2, %0|%0, %2}");
14899 switch (GET_CODE (operands[3]))
14903 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14905 rtx temp = operands[2];
14906 operands[2] = operands[1];
14907 operands[1] = temp;
14910 /* know operands[0] == operands[1]. */
14912 if (MEM_P (operands[2]))
14918 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14920 if (STACK_TOP_P (operands[0]))
14921 /* How is it that we are storing to a dead operand[2]?
14922 Well, presumably operands[1] is dead too. We can't
14923 store the result to st(0) as st(0) gets popped on this
14924 instruction. Instead store to operands[2] (which I
14925 think has to be st(1)). st(1) will be popped later.
14926 gcc <= 2.8.1 didn't have this check and generated
14927 assembly code that the Unixware assembler rejected. */
14928 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14930 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14934 if (STACK_TOP_P (operands[0]))
14935 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14937 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14942 if (MEM_P (operands[1]))
14948 if (MEM_P (operands[2]))
14954 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14957 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14958 derived assemblers, confusingly reverse the direction of
14959 the operation for fsub{r} and fdiv{r} when the
14960 destination register is not st(0). The Intel assembler
14961 doesn't have this brain damage. Read !SYSV386_COMPAT to
14962 figure out what the hardware really does. */
14963 if (STACK_TOP_P (operands[0]))
14964 p = "{p\t%0, %2|rp\t%2, %0}";
14966 p = "{rp\t%2, %0|p\t%0, %2}";
14968 if (STACK_TOP_P (operands[0]))
14969 /* As above for fmul/fadd, we can't store to st(0). */
14970 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14972 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14977 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14980 if (STACK_TOP_P (operands[0]))
14981 p = "{rp\t%0, %1|p\t%1, %0}";
14983 p = "{p\t%1, %0|rp\t%0, %1}";
14985 if (STACK_TOP_P (operands[0]))
14986 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14988 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14993 if (STACK_TOP_P (operands[0]))
14995 if (STACK_TOP_P (operands[1]))
14996 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14998 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15001 else if (STACK_TOP_P (operands[1]))
15004 p = "{\t%1, %0|r\t%0, %1}";
15006 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15012 p = "{r\t%2, %0|\t%0, %2}";
15014 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15020 gcc_unreachable ();
15027 /* Return needed mode for entity in optimize_mode_switching pass. */
15030 ix86_mode_needed (int entity, rtx insn)
15032 enum attr_i387_cw mode;
15034 /* The mode UNINITIALIZED is used to store control word after a
15035 function call or ASM pattern. The mode ANY specify that function
15036 has no requirements on the control word and make no changes in the
15037 bits we are interested in. */
15040 || (NONJUMP_INSN_P (insn)
15041 && (asm_noperands (PATTERN (insn)) >= 0
15042 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15043 return I387_CW_UNINITIALIZED;
15045 if (recog_memoized (insn) < 0)
15046 return I387_CW_ANY;
15048 mode = get_attr_i387_cw (insn);
15053 if (mode == I387_CW_TRUNC)
15058 if (mode == I387_CW_FLOOR)
15063 if (mode == I387_CW_CEIL)
15068 if (mode == I387_CW_MASK_PM)
15073 gcc_unreachable ();
15076 return I387_CW_ANY;
15079 /* Output code to initialize control word copies used by trunc?f?i and
15080 rounding patterns. CURRENT_MODE is set to current control word,
15081 while NEW_MODE is set to new control word. */
15084 emit_i387_cw_initialization (int mode)
15086 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15089 enum ix86_stack_slot slot;
15091 rtx reg = gen_reg_rtx (HImode);
15093 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15094 emit_move_insn (reg, copy_rtx (stored_mode));
15096 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
15097 || optimize_function_for_size_p (cfun))
15101 case I387_CW_TRUNC:
15102 /* round toward zero (truncate) */
15103 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15104 slot = SLOT_CW_TRUNC;
15107 case I387_CW_FLOOR:
15108 /* round down toward -oo */
15109 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15110 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15111 slot = SLOT_CW_FLOOR;
15115 /* round up toward +oo */
15116 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15117 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15118 slot = SLOT_CW_CEIL;
15121 case I387_CW_MASK_PM:
15122 /* mask precision exception for nearbyint() */
15123 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15124 slot = SLOT_CW_MASK_PM;
15128 gcc_unreachable ();
15135 case I387_CW_TRUNC:
15136 /* round toward zero (truncate) */
15137 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
15138 slot = SLOT_CW_TRUNC;
15141 case I387_CW_FLOOR:
15142 /* round down toward -oo */
15143 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
15144 slot = SLOT_CW_FLOOR;
15148 /* round up toward +oo */
15149 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
15150 slot = SLOT_CW_CEIL;
15153 case I387_CW_MASK_PM:
15154 /* mask precision exception for nearbyint() */
15155 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15156 slot = SLOT_CW_MASK_PM;
15160 gcc_unreachable ();
15164 gcc_assert (slot < MAX_386_STACK_LOCALS);
15166 new_mode = assign_386_stack_local (HImode, slot);
15167 emit_move_insn (new_mode, reg);
15170 /* Output code for INSN to convert a float to a signed int. OPERANDS
15171 are the insn operands. The output may be [HSD]Imode and the input
15172 operand may be [SDX]Fmode. */
15175 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
15177 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15178 int dimode_p = GET_MODE (operands[0]) == DImode;
15179 int round_mode = get_attr_i387_cw (insn);
15181 /* Jump through a hoop or two for DImode, since the hardware has no
15182 non-popping instruction. We used to do this a different way, but
15183 that was somewhat fragile and broke with post-reload splitters. */
15184 if ((dimode_p || fisttp) && !stack_top_dies)
15185 output_asm_insn ("fld\t%y1", operands);
15187 gcc_assert (STACK_TOP_P (operands[1]));
15188 gcc_assert (MEM_P (operands[0]));
15189 gcc_assert (GET_MODE (operands[1]) != TFmode);
15192 output_asm_insn ("fisttp%Z0\t%0", operands);
15195 if (round_mode != I387_CW_ANY)
15196 output_asm_insn ("fldcw\t%3", operands);
15197 if (stack_top_dies || dimode_p)
15198 output_asm_insn ("fistp%Z0\t%0", operands);
15200 output_asm_insn ("fist%Z0\t%0", operands);
15201 if (round_mode != I387_CW_ANY)
15202 output_asm_insn ("fldcw\t%2", operands);
15208 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15209 have the values zero or one, indicates the ffreep insn's operand
15210 from the OPERANDS array. */
15212 static const char *
15213 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15215 if (TARGET_USE_FFREEP)
15216 #ifdef HAVE_AS_IX86_FFREEP
15217 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15220 static char retval[32];
15221 int regno = REGNO (operands[opno]);
15223 gcc_assert (FP_REGNO_P (regno));
15225 regno -= FIRST_STACK_REG;
15227 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15232 return opno ? "fstp\t%y1" : "fstp\t%y0";
15236 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15237 should be used. UNORDERED_P is true when fucom should be used. */
15240 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
15242 int stack_top_dies;
15243 rtx cmp_op0, cmp_op1;
15244 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
15248 cmp_op0 = operands[0];
15249 cmp_op1 = operands[1];
15253 cmp_op0 = operands[1];
15254 cmp_op1 = operands[2];
15259 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
15260 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
15261 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
15262 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
15264 if (GET_MODE (operands[0]) == SFmode)
15266 return &ucomiss[TARGET_AVX ? 0 : 1];
15268 return &comiss[TARGET_AVX ? 0 : 1];
15271 return &ucomisd[TARGET_AVX ? 0 : 1];
15273 return &comisd[TARGET_AVX ? 0 : 1];
15276 gcc_assert (STACK_TOP_P (cmp_op0));
15278 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15280 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15282 if (stack_top_dies)
15284 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15285 return output_387_ffreep (operands, 1);
15288 return "ftst\n\tfnstsw\t%0";
15291 if (STACK_REG_P (cmp_op1)
15293 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15294 && REGNO (cmp_op1) != FIRST_STACK_REG)
15296 /* If both the top of the 387 stack dies, and the other operand
15297 is also a stack register that dies, then this must be a
15298 `fcompp' float compare */
15302 /* There is no double popping fcomi variant. Fortunately,
15303 eflags is immune from the fstp's cc clobbering. */
15305 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15307 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15308 return output_387_ffreep (operands, 0);
15313 return "fucompp\n\tfnstsw\t%0";
15315 return "fcompp\n\tfnstsw\t%0";
15320 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15322 static const char * const alt[16] =
15324 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15325 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15326 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15327 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15329 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15330 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15334 "fcomi\t{%y1, %0|%0, %y1}",
15335 "fcomip\t{%y1, %0|%0, %y1}",
15336 "fucomi\t{%y1, %0|%0, %y1}",
15337 "fucomip\t{%y1, %0|%0, %y1}",
15348 mask = eflags_p << 3;
15349 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15350 mask |= unordered_p << 1;
15351 mask |= stack_top_dies;
15353 gcc_assert (mask < 16);
15362 ix86_output_addr_vec_elt (FILE *file, int value)
15364 const char *directive = ASM_LONG;
15368 directive = ASM_QUAD;
15370 gcc_assert (!TARGET_64BIT);
15373 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15377 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15379 const char *directive = ASM_LONG;
15382 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15383 directive = ASM_QUAD;
15385 gcc_assert (!TARGET_64BIT);
15387 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15388 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15389 fprintf (file, "%s%s%d-%s%d\n",
15390 directive, LPREFIX, value, LPREFIX, rel);
15391 else if (HAVE_AS_GOTOFF_IN_DATA)
15392 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15394 else if (TARGET_MACHO)
15396 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15397 machopic_output_function_base_name (file);
15402 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15403 GOT_SYMBOL_NAME, LPREFIX, value);
15406 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15410 ix86_expand_clear (rtx dest)
15414 /* We play register width games, which are only valid after reload. */
15415 gcc_assert (reload_completed);
15417 /* Avoid HImode and its attendant prefix byte. */
15418 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15419 dest = gen_rtx_REG (SImode, REGNO (dest));
15420 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15422 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15423 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15425 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15426 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15432 /* X is an unchanging MEM. If it is a constant pool reference, return
15433 the constant pool rtx, else NULL. */
15436 maybe_get_pool_constant (rtx x)
15438 x = ix86_delegitimize_address (XEXP (x, 0));
15440 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15441 return get_pool_constant (x);
15447 ix86_expand_move (enum machine_mode mode, rtx operands[])
15450 enum tls_model model;
15455 if (GET_CODE (op1) == SYMBOL_REF)
15457 model = SYMBOL_REF_TLS_MODEL (op1);
15460 op1 = legitimize_tls_address (op1, model, true);
15461 op1 = force_operand (op1, op0);
15465 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15466 && SYMBOL_REF_DLLIMPORT_P (op1))
15467 op1 = legitimize_dllimport_symbol (op1, false);
15469 else if (GET_CODE (op1) == CONST
15470 && GET_CODE (XEXP (op1, 0)) == PLUS
15471 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15473 rtx addend = XEXP (XEXP (op1, 0), 1);
15474 rtx symbol = XEXP (XEXP (op1, 0), 0);
15477 model = SYMBOL_REF_TLS_MODEL (symbol);
15479 tmp = legitimize_tls_address (symbol, model, true);
15480 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15481 && SYMBOL_REF_DLLIMPORT_P (symbol))
15482 tmp = legitimize_dllimport_symbol (symbol, true);
15486 tmp = force_operand (tmp, NULL);
15487 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15488 op0, 1, OPTAB_DIRECT);
15494 if ((flag_pic || MACHOPIC_INDIRECT)
15495 && mode == Pmode && symbolic_operand (op1, Pmode))
15497 if (TARGET_MACHO && !TARGET_64BIT)
15500 /* dynamic-no-pic */
15501 if (MACHOPIC_INDIRECT)
15503 rtx temp = ((reload_in_progress
15504 || ((op0 && REG_P (op0))
15506 ? op0 : gen_reg_rtx (Pmode));
15507 op1 = machopic_indirect_data_reference (op1, temp);
15509 op1 = machopic_legitimize_pic_address (op1, mode,
15510 temp == op1 ? 0 : temp);
15512 if (op0 != op1 && GET_CODE (op0) != MEM)
15514 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15518 if (GET_CODE (op0) == MEM)
15519 op1 = force_reg (Pmode, op1);
15523 if (GET_CODE (temp) != REG)
15524 temp = gen_reg_rtx (Pmode);
15525 temp = legitimize_pic_address (op1, temp);
15530 /* dynamic-no-pic */
15536 op1 = force_reg (Pmode, op1);
15537 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
15539 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15540 op1 = legitimize_pic_address (op1, reg);
15549 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15550 || !push_operand (op0, mode))
15552 op1 = force_reg (mode, op1);
15554 if (push_operand (op0, mode)
15555 && ! general_no_elim_operand (op1, mode))
15556 op1 = copy_to_mode_reg (mode, op1);
15558 /* Force large constants in 64bit compilation into register
15559 to get them CSEed. */
15560 if (can_create_pseudo_p ()
15561 && (mode == DImode) && TARGET_64BIT
15562 && immediate_operand (op1, mode)
15563 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15564 && !register_operand (op0, mode)
15566 op1 = copy_to_mode_reg (mode, op1);
15568 if (can_create_pseudo_p ()
15569 && FLOAT_MODE_P (mode)
15570 && GET_CODE (op1) == CONST_DOUBLE)
15572 /* If we are loading a floating point constant to a register,
15573 force the value to memory now, since we'll get better code
15574 out the back end. */
15576 op1 = validize_mem (force_const_mem (mode, op1));
15577 if (!register_operand (op0, mode))
15579 rtx temp = gen_reg_rtx (mode);
15580 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15581 emit_move_insn (op0, temp);
15587 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15591 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15593 rtx op0 = operands[0], op1 = operands[1];
15594 unsigned int align = GET_MODE_ALIGNMENT (mode);
15596 /* Force constants other than zero into memory. We do not know how
15597 the instructions used to build constants modify the upper 64 bits
15598 of the register, once we have that information we may be able
15599 to handle some of them more efficiently. */
15600 if (can_create_pseudo_p ()
15601 && register_operand (op0, mode)
15602 && (CONSTANT_P (op1)
15603 || (GET_CODE (op1) == SUBREG
15604 && CONSTANT_P (SUBREG_REG (op1))))
15605 && !standard_sse_constant_p (op1))
15606 op1 = validize_mem (force_const_mem (mode, op1));
15608 /* We need to check memory alignment for SSE mode since attribute
15609 can make operands unaligned. */
15610 if (can_create_pseudo_p ()
15611 && SSE_REG_MODE_P (mode)
15612 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15613 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15617 /* ix86_expand_vector_move_misalign() does not like constants ... */
15618 if (CONSTANT_P (op1)
15619 || (GET_CODE (op1) == SUBREG
15620 && CONSTANT_P (SUBREG_REG (op1))))
15621 op1 = validize_mem (force_const_mem (mode, op1));
15623 /* ... nor both arguments in memory. */
15624 if (!register_operand (op0, mode)
15625 && !register_operand (op1, mode))
15626 op1 = force_reg (mode, op1);
15628 tmp[0] = op0; tmp[1] = op1;
15629 ix86_expand_vector_move_misalign (mode, tmp);
15633 /* Make operand1 a register if it isn't already. */
15634 if (can_create_pseudo_p ()
15635 && !register_operand (op0, mode)
15636 && !register_operand (op1, mode))
15638 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15642 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15645 /* Split 32-byte AVX unaligned load and store if needed. */
15648 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15651 rtx (*extract) (rtx, rtx, rtx);
15652 rtx (*move_unaligned) (rtx, rtx);
15653 enum machine_mode mode;
15655 switch (GET_MODE (op0))
15658 gcc_unreachable ();
15660 extract = gen_avx_vextractf128v32qi;
15661 move_unaligned = gen_avx_movdqu256;
15665 extract = gen_avx_vextractf128v8sf;
15666 move_unaligned = gen_avx_movups256;
15670 extract = gen_avx_vextractf128v4df;
15671 move_unaligned = gen_avx_movupd256;
15676 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15678 rtx r = gen_reg_rtx (mode);
15679 m = adjust_address (op1, mode, 0);
15680 emit_move_insn (r, m);
15681 m = adjust_address (op1, mode, 16);
15682 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15683 emit_move_insn (op0, r);
15685 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15687 m = adjust_address (op0, mode, 0);
15688 emit_insn (extract (m, op1, const0_rtx));
15689 m = adjust_address (op0, mode, 16);
15690 emit_insn (extract (m, op1, const1_rtx));
15693 emit_insn (move_unaligned (op0, op1));
15696 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15697 straight to ix86_expand_vector_move. */
15698 /* Code generation for scalar reg-reg moves of single and double precision data:
15699 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15703 if (x86_sse_partial_reg_dependency == true)
15708 Code generation for scalar loads of double precision data:
15709 if (x86_sse_split_regs == true)
15710 movlpd mem, reg (gas syntax)
15714 Code generation for unaligned packed loads of single precision data
15715 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15716 if (x86_sse_unaligned_move_optimal)
15719 if (x86_sse_partial_reg_dependency == true)
15731 Code generation for unaligned packed loads of double precision data
15732 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15733 if (x86_sse_unaligned_move_optimal)
15736 if (x86_sse_split_regs == true)
15749 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15758 switch (GET_MODE_CLASS (mode))
15760 case MODE_VECTOR_INT:
15762 switch (GET_MODE_SIZE (mode))
15765 /* If we're optimizing for size, movups is the smallest. */
15766 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15768 op0 = gen_lowpart (V4SFmode, op0);
15769 op1 = gen_lowpart (V4SFmode, op1);
15770 emit_insn (gen_sse_movups (op0, op1));
15773 op0 = gen_lowpart (V16QImode, op0);
15774 op1 = gen_lowpart (V16QImode, op1);
15775 emit_insn (gen_sse2_movdqu (op0, op1));
15778 op0 = gen_lowpart (V32QImode, op0);
15779 op1 = gen_lowpart (V32QImode, op1);
15780 ix86_avx256_split_vector_move_misalign (op0, op1);
15783 gcc_unreachable ();
15786 case MODE_VECTOR_FLOAT:
15787 op0 = gen_lowpart (mode, op0);
15788 op1 = gen_lowpart (mode, op1);
15793 emit_insn (gen_sse_movups (op0, op1));
15796 ix86_avx256_split_vector_move_misalign (op0, op1);
15799 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15801 op0 = gen_lowpart (V4SFmode, op0);
15802 op1 = gen_lowpart (V4SFmode, op1);
15803 emit_insn (gen_sse_movups (op0, op1));
15806 emit_insn (gen_sse2_movupd (op0, op1));
15809 ix86_avx256_split_vector_move_misalign (op0, op1);
15812 gcc_unreachable ();
15817 gcc_unreachable ();
15825 /* If we're optimizing for size, movups is the smallest. */
15826 if (optimize_insn_for_size_p ()
15827 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15829 op0 = gen_lowpart (V4SFmode, op0);
15830 op1 = gen_lowpart (V4SFmode, op1);
15831 emit_insn (gen_sse_movups (op0, op1));
15835 /* ??? If we have typed data, then it would appear that using
15836 movdqu is the only way to get unaligned data loaded with
15838 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15840 op0 = gen_lowpart (V16QImode, op0);
15841 op1 = gen_lowpart (V16QImode, op1);
15842 emit_insn (gen_sse2_movdqu (op0, op1));
15846 if (TARGET_SSE2 && mode == V2DFmode)
15850 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15852 op0 = gen_lowpart (V2DFmode, op0);
15853 op1 = gen_lowpart (V2DFmode, op1);
15854 emit_insn (gen_sse2_movupd (op0, op1));
15858 /* When SSE registers are split into halves, we can avoid
15859 writing to the top half twice. */
15860 if (TARGET_SSE_SPLIT_REGS)
15862 emit_clobber (op0);
15867 /* ??? Not sure about the best option for the Intel chips.
15868 The following would seem to satisfy; the register is
15869 entirely cleared, breaking the dependency chain. We
15870 then store to the upper half, with a dependency depth
15871 of one. A rumor has it that Intel recommends two movsd
15872 followed by an unpacklpd, but this is unconfirmed. And
15873 given that the dependency depth of the unpacklpd would
15874 still be one, I'm not sure why this would be better. */
15875 zero = CONST0_RTX (V2DFmode);
15878 m = adjust_address (op1, DFmode, 0);
15879 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15880 m = adjust_address (op1, DFmode, 8);
15881 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15885 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15887 op0 = gen_lowpart (V4SFmode, op0);
15888 op1 = gen_lowpart (V4SFmode, op1);
15889 emit_insn (gen_sse_movups (op0, op1));
15893 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15894 emit_move_insn (op0, CONST0_RTX (mode));
15896 emit_clobber (op0);
15898 if (mode != V4SFmode)
15899 op0 = gen_lowpart (V4SFmode, op0);
15900 m = adjust_address (op1, V2SFmode, 0);
15901 emit_insn (gen_sse_loadlps (op0, op0, m));
15902 m = adjust_address (op1, V2SFmode, 8);
15903 emit_insn (gen_sse_loadhps (op0, op0, m));
15906 else if (MEM_P (op0))
15908 /* If we're optimizing for size, movups is the smallest. */
15909 if (optimize_insn_for_size_p ()
15910 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15912 op0 = gen_lowpart (V4SFmode, op0);
15913 op1 = gen_lowpart (V4SFmode, op1);
15914 emit_insn (gen_sse_movups (op0, op1));
15918 /* ??? Similar to above, only less clear because of quote
15919 typeless stores unquote. */
15920 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15921 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15923 op0 = gen_lowpart (V16QImode, op0);
15924 op1 = gen_lowpart (V16QImode, op1);
15925 emit_insn (gen_sse2_movdqu (op0, op1));
15929 if (TARGET_SSE2 && mode == V2DFmode)
15931 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15933 op0 = gen_lowpart (V2DFmode, op0);
15934 op1 = gen_lowpart (V2DFmode, op1);
15935 emit_insn (gen_sse2_movupd (op0, op1));
15939 m = adjust_address (op0, DFmode, 0);
15940 emit_insn (gen_sse2_storelpd (m, op1));
15941 m = adjust_address (op0, DFmode, 8);
15942 emit_insn (gen_sse2_storehpd (m, op1));
15947 if (mode != V4SFmode)
15948 op1 = gen_lowpart (V4SFmode, op1);
15950 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15952 op0 = gen_lowpart (V4SFmode, op0);
15953 emit_insn (gen_sse_movups (op0, op1));
15957 m = adjust_address (op0, V2SFmode, 0);
15958 emit_insn (gen_sse_storelps (m, op1));
15959 m = adjust_address (op0, V2SFmode, 8);
15960 emit_insn (gen_sse_storehps (m, op1));
15965 gcc_unreachable ();
15968 /* Expand a push in MODE. This is some mode for which we do not support
15969 proper push instructions, at least from the registers that we expect
15970 the value to live in. */
15973 ix86_expand_push (enum machine_mode mode, rtx x)
15977 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15978 GEN_INT (-GET_MODE_SIZE (mode)),
15979 stack_pointer_rtx, 1, OPTAB_DIRECT);
15980 if (tmp != stack_pointer_rtx)
15981 emit_move_insn (stack_pointer_rtx, tmp);
15983 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15985 /* When we push an operand onto stack, it has to be aligned at least
15986 at the function argument boundary. However since we don't have
15987 the argument type, we can't determine the actual argument
15989 emit_move_insn (tmp, x);
15992 /* Helper function of ix86_fixup_binary_operands to canonicalize
15993 operand order. Returns true if the operands should be swapped. */
15996 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15999 rtx dst = operands[0];
16000 rtx src1 = operands[1];
16001 rtx src2 = operands[2];
16003 /* If the operation is not commutative, we can't do anything. */
16004 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
16007 /* Highest priority is that src1 should match dst. */
16008 if (rtx_equal_p (dst, src1))
16010 if (rtx_equal_p (dst, src2))
16013 /* Next highest priority is that immediate constants come second. */
16014 if (immediate_operand (src2, mode))
16016 if (immediate_operand (src1, mode))
16019 /* Lowest priority is that memory references should come second. */
16029 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16030 destination to use for the operation. If different from the true
16031 destination in operands[0], a copy operation will be required. */
16034 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
16037 rtx dst = operands[0];
16038 rtx src1 = operands[1];
16039 rtx src2 = operands[2];
16041 /* Canonicalize operand order. */
16042 if (ix86_swap_binary_operands_p (code, mode, operands))
16046 /* It is invalid to swap operands of different modes. */
16047 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
16054 /* Both source operands cannot be in memory. */
16055 if (MEM_P (src1) && MEM_P (src2))
16057 /* Optimization: Only read from memory once. */
16058 if (rtx_equal_p (src1, src2))
16060 src2 = force_reg (mode, src2);
16064 src2 = force_reg (mode, src2);
16067 /* If the destination is memory, and we do not have matching source
16068 operands, do things in registers. */
16069 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16070 dst = gen_reg_rtx (mode);
16072 /* Source 1 cannot be a constant. */
16073 if (CONSTANT_P (src1))
16074 src1 = force_reg (mode, src1);
16076 /* Source 1 cannot be a non-matching memory. */
16077 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16078 src1 = force_reg (mode, src1);
16080 operands[1] = src1;
16081 operands[2] = src2;
16085 /* Similarly, but assume that the destination has already been
16086 set up properly. */
16089 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
16090 enum machine_mode mode, rtx operands[])
16092 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
16093 gcc_assert (dst == operands[0]);
16096 /* Attempt to expand a binary operator. Make the expansion closer to the
16097 actual machine, then just general_operand, which will allow 3 separate
16098 memory references (one output, two input) in a single insn. */
16101 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
16104 rtx src1, src2, dst, op, clob;
16106 dst = ix86_fixup_binary_operands (code, mode, operands);
16107 src1 = operands[1];
16108 src2 = operands[2];
16110 /* Emit the instruction. */
16112 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
16113 if (reload_in_progress)
16115 /* Reload doesn't know about the flags register, and doesn't know that
16116 it doesn't want to clobber it. We can only do this with PLUS. */
16117 gcc_assert (code == PLUS);
16120 else if (reload_completed
16122 && !rtx_equal_p (dst, src1))
16124 /* This is going to be an LEA; avoid splitting it later. */
16129 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16130 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16133 /* Fix up the destination if needed. */
16134 if (dst != operands[0])
16135 emit_move_insn (operands[0], dst);
16138 /* Return TRUE or FALSE depending on whether the binary operator meets the
16139 appropriate constraints. */
16142 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
16145 rtx dst = operands[0];
16146 rtx src1 = operands[1];
16147 rtx src2 = operands[2];
16149 /* Both source operands cannot be in memory. */
16150 if (MEM_P (src1) && MEM_P (src2))
16153 /* Canonicalize operand order for commutative operators. */
16154 if (ix86_swap_binary_operands_p (code, mode, operands))
16161 /* If the destination is memory, we must have a matching source operand. */
16162 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16165 /* Source 1 cannot be a constant. */
16166 if (CONSTANT_P (src1))
16169 /* Source 1 cannot be a non-matching memory. */
16170 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16172 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16173 return (code == AND
16176 || (TARGET_64BIT && mode == DImode))
16177 && CONST_INT_P (src2)
16178 && (INTVAL (src2) == 0xff
16179 || INTVAL (src2) == 0xffff));
16185 /* Attempt to expand a unary operator. Make the expansion closer to the
16186 actual machine, then just general_operand, which will allow 2 separate
16187 memory references (one output, one input) in a single insn. */
16190 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
16193 int matching_memory;
16194 rtx src, dst, op, clob;
16199 /* If the destination is memory, and we do not have matching source
16200 operands, do things in registers. */
16201 matching_memory = 0;
16204 if (rtx_equal_p (dst, src))
16205 matching_memory = 1;
16207 dst = gen_reg_rtx (mode);
16210 /* When source operand is memory, destination must match. */
16211 if (MEM_P (src) && !matching_memory)
16212 src = force_reg (mode, src);
16214 /* Emit the instruction. */
16216 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
16217 if (reload_in_progress || code == NOT)
16219 /* Reload doesn't know about the flags register, and doesn't know that
16220 it doesn't want to clobber it. */
16221 gcc_assert (code == NOT);
16226 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16227 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16230 /* Fix up the destination if needed. */
16231 if (dst != operands[0])
16232 emit_move_insn (operands[0], dst);
16235 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16236 divisor are within the range [0-255]. */
16239 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
16242 rtx end_label, qimode_label;
16243 rtx insn, div, mod;
16244 rtx scratch, tmp0, tmp1, tmp2;
16245 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
16246 rtx (*gen_zero_extend) (rtx, rtx);
16247 rtx (*gen_test_ccno_1) (rtx, rtx);
16252 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
16253 gen_test_ccno_1 = gen_testsi_ccno_1;
16254 gen_zero_extend = gen_zero_extendqisi2;
16257 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
16258 gen_test_ccno_1 = gen_testdi_ccno_1;
16259 gen_zero_extend = gen_zero_extendqidi2;
16262 gcc_unreachable ();
16265 end_label = gen_label_rtx ();
16266 qimode_label = gen_label_rtx ();
16268 scratch = gen_reg_rtx (mode);
16270 /* Use 8bit unsigned divimod if dividend and divisor are within
16271 the range [0-255]. */
16272 emit_move_insn (scratch, operands[2]);
16273 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
16274 scratch, 1, OPTAB_DIRECT);
16275 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
16276 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
16277 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16278 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16279 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16281 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16282 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16283 JUMP_LABEL (insn) = qimode_label;
16285 /* Generate original signed/unsigned divimod. */
16286 div = gen_divmod4_1 (operands[0], operands[1],
16287 operands[2], operands[3]);
16290 /* Branch to the end. */
16291 emit_jump_insn (gen_jump (end_label));
16294 /* Generate 8bit unsigned divide. */
16295 emit_label (qimode_label);
16296 /* Don't use operands[0] for result of 8bit divide since not all
16297 registers support QImode ZERO_EXTRACT. */
16298 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16299 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16300 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16301 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16305 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16306 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16310 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16311 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16314 /* Extract remainder from AH. */
16315 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16316 if (REG_P (operands[1]))
16317 insn = emit_move_insn (operands[1], tmp1);
16320 /* Need a new scratch register since the old one has result
16322 scratch = gen_reg_rtx (mode);
16323 emit_move_insn (scratch, tmp1);
16324 insn = emit_move_insn (operands[1], scratch);
16326 set_unique_reg_note (insn, REG_EQUAL, mod);
16328 /* Zero extend quotient from AL. */
16329 tmp1 = gen_lowpart (QImode, tmp0);
16330 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16331 set_unique_reg_note (insn, REG_EQUAL, div);
16333 emit_label (end_label);
16336 #define LEA_SEARCH_THRESHOLD 12
16338 /* Search backward for non-agu definition of register number REGNO1
16339 or register number REGNO2 in INSN's basic block until
16340 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16341 2. Reach BB boundary, or
16342 3. Reach agu definition.
16343 Returns the distance between the non-agu definition point and INSN.
16344 If no definition point, returns -1. */
16347 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16350 basic_block bb = BLOCK_FOR_INSN (insn);
16353 enum attr_type insn_type;
16355 if (insn != BB_HEAD (bb))
16357 rtx prev = PREV_INSN (insn);
16358 while (prev && distance < LEA_SEARCH_THRESHOLD)
16360 if (NONDEBUG_INSN_P (prev))
16363 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16364 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16365 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16366 && (regno1 == DF_REF_REGNO (*def_rec)
16367 || regno2 == DF_REF_REGNO (*def_rec)))
16369 insn_type = get_attr_type (prev);
16370 if (insn_type != TYPE_LEA)
16374 if (prev == BB_HEAD (bb))
16376 prev = PREV_INSN (prev);
16380 if (distance < LEA_SEARCH_THRESHOLD)
16384 bool simple_loop = false;
16386 FOR_EACH_EDGE (e, ei, bb->preds)
16389 simple_loop = true;
16395 rtx prev = BB_END (bb);
16398 && distance < LEA_SEARCH_THRESHOLD)
16400 if (NONDEBUG_INSN_P (prev))
16403 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16404 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16405 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16406 && (regno1 == DF_REF_REGNO (*def_rec)
16407 || regno2 == DF_REF_REGNO (*def_rec)))
16409 insn_type = get_attr_type (prev);
16410 if (insn_type != TYPE_LEA)
16414 prev = PREV_INSN (prev);
16422 /* get_attr_type may modify recog data. We want to make sure
16423 that recog data is valid for instruction INSN, on which
16424 distance_non_agu_define is called. INSN is unchanged here. */
16425 extract_insn_cached (insn);
16429 /* Return the distance between INSN and the next insn that uses
16430 register number REGNO0 in memory address. Return -1 if no such
16431 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16434 distance_agu_use (unsigned int regno0, rtx insn)
16436 basic_block bb = BLOCK_FOR_INSN (insn);
16441 if (insn != BB_END (bb))
16443 rtx next = NEXT_INSN (insn);
16444 while (next && distance < LEA_SEARCH_THRESHOLD)
16446 if (NONDEBUG_INSN_P (next))
16450 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16451 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16452 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16453 && regno0 == DF_REF_REGNO (*use_rec))
16455 /* Return DISTANCE if OP0 is used in memory
16456 address in NEXT. */
16460 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16461 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16462 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16463 && regno0 == DF_REF_REGNO (*def_rec))
16465 /* Return -1 if OP0 is set in NEXT. */
16469 if (next == BB_END (bb))
16471 next = NEXT_INSN (next);
16475 if (distance < LEA_SEARCH_THRESHOLD)
16479 bool simple_loop = false;
16481 FOR_EACH_EDGE (e, ei, bb->succs)
16484 simple_loop = true;
16490 rtx next = BB_HEAD (bb);
16493 && distance < LEA_SEARCH_THRESHOLD)
16495 if (NONDEBUG_INSN_P (next))
16499 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16500 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16501 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16502 && regno0 == DF_REF_REGNO (*use_rec))
16504 /* Return DISTANCE if OP0 is used in memory
16505 address in NEXT. */
16509 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16510 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16511 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16512 && regno0 == DF_REF_REGNO (*def_rec))
16514 /* Return -1 if OP0 is set in NEXT. */
16519 next = NEXT_INSN (next);
16527 /* Define this macro to tune LEA priority vs ADD, it take effect when
16528 there is a dilemma of choicing LEA or ADD
16529 Negative value: ADD is more preferred than LEA
16531 Positive value: LEA is more preferred than ADD*/
16532 #define IX86_LEA_PRIORITY 2
16534 /* Return true if it is ok to optimize an ADD operation to LEA
16535 operation to avoid flag register consumation. For most processors,
16536 ADD is faster than LEA. For the processors like ATOM, if the
16537 destination register of LEA holds an actual address which will be
16538 used soon, LEA is better and otherwise ADD is better. */
16541 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16543 unsigned int regno0 = true_regnum (operands[0]);
16544 unsigned int regno1 = true_regnum (operands[1]);
16545 unsigned int regno2 = true_regnum (operands[2]);
16547 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16548 if (regno0 != regno1 && regno0 != regno2)
16551 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16555 int dist_define, dist_use;
16557 /* Return false if REGNO0 isn't used in memory address. */
16558 dist_use = distance_agu_use (regno0, insn);
16562 dist_define = distance_non_agu_define (regno1, regno2, insn);
16563 if (dist_define <= 0)
16566 /* If this insn has both backward non-agu dependence and forward
16567 agu dependence, the one with short distance take effect. */
16568 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16575 /* Return true if destination reg of SET_BODY is shift count of
16579 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16585 /* Retrieve destination of SET_BODY. */
16586 switch (GET_CODE (set_body))
16589 set_dest = SET_DEST (set_body);
16590 if (!set_dest || !REG_P (set_dest))
16594 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16595 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16603 /* Retrieve shift count of USE_BODY. */
16604 switch (GET_CODE (use_body))
16607 shift_rtx = XEXP (use_body, 1);
16610 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16611 if (ix86_dep_by_shift_count_body (set_body,
16612 XVECEXP (use_body, 0, i)))
16620 && (GET_CODE (shift_rtx) == ASHIFT
16621 || GET_CODE (shift_rtx) == LSHIFTRT
16622 || GET_CODE (shift_rtx) == ASHIFTRT
16623 || GET_CODE (shift_rtx) == ROTATE
16624 || GET_CODE (shift_rtx) == ROTATERT))
16626 rtx shift_count = XEXP (shift_rtx, 1);
16628 /* Return true if shift count is dest of SET_BODY. */
16629 if (REG_P (shift_count)
16630 && true_regnum (set_dest) == true_regnum (shift_count))
16637 /* Return true if destination reg of SET_INSN is shift count of
16641 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16643 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16644 PATTERN (use_insn));
16647 /* Return TRUE or FALSE depending on whether the unary operator meets the
16648 appropriate constraints. */
16651 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16652 enum machine_mode mode ATTRIBUTE_UNUSED,
16653 rtx operands[2] ATTRIBUTE_UNUSED)
16655 /* If one of operands is memory, source and destination must match. */
16656 if ((MEM_P (operands[0])
16657 || MEM_P (operands[1]))
16658 && ! rtx_equal_p (operands[0], operands[1]))
16663 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16664 are ok, keeping in mind the possible movddup alternative. */
16667 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16669 if (MEM_P (operands[0]))
16670 return rtx_equal_p (operands[0], operands[1 + high]);
16671 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16672 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16676 /* Post-reload splitter for converting an SF or DFmode value in an
16677 SSE register into an unsigned SImode. */
16680 ix86_split_convert_uns_si_sse (rtx operands[])
16682 enum machine_mode vecmode;
16683 rtx value, large, zero_or_two31, input, two31, x;
16685 large = operands[1];
16686 zero_or_two31 = operands[2];
16687 input = operands[3];
16688 two31 = operands[4];
16689 vecmode = GET_MODE (large);
16690 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16692 /* Load up the value into the low element. We must ensure that the other
16693 elements are valid floats -- zero is the easiest such value. */
16696 if (vecmode == V4SFmode)
16697 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16699 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16703 input = gen_rtx_REG (vecmode, REGNO (input));
16704 emit_move_insn (value, CONST0_RTX (vecmode));
16705 if (vecmode == V4SFmode)
16706 emit_insn (gen_sse_movss (value, value, input));
16708 emit_insn (gen_sse2_movsd (value, value, input));
16711 emit_move_insn (large, two31);
16712 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16714 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16715 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16717 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16718 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16720 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16721 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16723 large = gen_rtx_REG (V4SImode, REGNO (large));
16724 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16726 x = gen_rtx_REG (V4SImode, REGNO (value));
16727 if (vecmode == V4SFmode)
16728 emit_insn (gen_sse2_cvttps2dq (x, value));
16730 emit_insn (gen_sse2_cvttpd2dq (x, value));
16733 emit_insn (gen_xorv4si3 (value, value, large));
16736 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16737 Expects the 64-bit DImode to be supplied in a pair of integral
16738 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16739 -mfpmath=sse, !optimize_size only. */
16742 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16744 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16745 rtx int_xmm, fp_xmm;
16746 rtx biases, exponents;
16749 int_xmm = gen_reg_rtx (V4SImode);
16750 if (TARGET_INTER_UNIT_MOVES)
16751 emit_insn (gen_movdi_to_sse (int_xmm, input));
16752 else if (TARGET_SSE_SPLIT_REGS)
16754 emit_clobber (int_xmm);
16755 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16759 x = gen_reg_rtx (V2DImode);
16760 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16761 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16764 x = gen_rtx_CONST_VECTOR (V4SImode,
16765 gen_rtvec (4, GEN_INT (0x43300000UL),
16766 GEN_INT (0x45300000UL),
16767 const0_rtx, const0_rtx));
16768 exponents = validize_mem (force_const_mem (V4SImode, x));
16770 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16771 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16773 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16774 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16775 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16776 (0x1.0p84 + double(fp_value_hi_xmm)).
16777 Note these exponents differ by 32. */
16779 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16781 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16782 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16783 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16784 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16785 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16786 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16787 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16788 biases = validize_mem (force_const_mem (V2DFmode, biases));
16789 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16791 /* Add the upper and lower DFmode values together. */
16793 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16796 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16797 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16798 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16801 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16804 /* Not used, but eases macroization of patterns. */
16806 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16807 rtx input ATTRIBUTE_UNUSED)
16809 gcc_unreachable ();
16812 /* Convert an unsigned SImode value into a DFmode. Only currently used
16813 for SSE, but applicable anywhere. */
16816 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16818 REAL_VALUE_TYPE TWO31r;
16821 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16822 NULL, 1, OPTAB_DIRECT);
16824 fp = gen_reg_rtx (DFmode);
16825 emit_insn (gen_floatsidf2 (fp, x));
16827 real_ldexp (&TWO31r, &dconst1, 31);
16828 x = const_double_from_real_value (TWO31r, DFmode);
16830 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16832 emit_move_insn (target, x);
16835 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16836 32-bit mode; otherwise we have a direct convert instruction. */
16839 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16841 REAL_VALUE_TYPE TWO32r;
16842 rtx fp_lo, fp_hi, x;
16844 fp_lo = gen_reg_rtx (DFmode);
16845 fp_hi = gen_reg_rtx (DFmode);
16847 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16849 real_ldexp (&TWO32r, &dconst1, 32);
16850 x = const_double_from_real_value (TWO32r, DFmode);
16851 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16853 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16855 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16858 emit_move_insn (target, x);
16861 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16862 For x86_32, -mfpmath=sse, !optimize_size only. */
16864 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16866 REAL_VALUE_TYPE ONE16r;
16867 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16869 real_ldexp (&ONE16r, &dconst1, 16);
16870 x = const_double_from_real_value (ONE16r, SFmode);
16871 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16872 NULL, 0, OPTAB_DIRECT);
16873 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16874 NULL, 0, OPTAB_DIRECT);
16875 fp_hi = gen_reg_rtx (SFmode);
16876 fp_lo = gen_reg_rtx (SFmode);
16877 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16878 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16879 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16881 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16883 if (!rtx_equal_p (target, fp_hi))
16884 emit_move_insn (target, fp_hi);
16887 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16888 then replicate the value for all elements of the vector
16892 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16899 v = gen_rtvec (4, value, value, value, value);
16900 return gen_rtx_CONST_VECTOR (V4SImode, v);
16904 v = gen_rtvec (2, value, value);
16905 return gen_rtx_CONST_VECTOR (V2DImode, v);
16909 v = gen_rtvec (8, value, value, value, value,
16910 value, value, value, value);
16912 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16913 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16914 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16915 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16916 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16920 v = gen_rtvec (4, value, value, value, value);
16922 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16923 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16924 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16928 v = gen_rtvec (4, value, value, value, value);
16930 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16931 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16932 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16936 v = gen_rtvec (2, value, value);
16938 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16939 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16942 gcc_unreachable ();
16946 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16947 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16948 for an SSE register. If VECT is true, then replicate the mask for
16949 all elements of the vector register. If INVERT is true, then create
16950 a mask excluding the sign bit. */
16953 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16955 enum machine_mode vec_mode, imode;
16956 HOST_WIDE_INT hi, lo;
16961 /* Find the sign bit, sign extended to 2*HWI. */
16968 mode = GET_MODE_INNER (mode);
16970 lo = 0x80000000, hi = lo < 0;
16977 mode = GET_MODE_INNER (mode);
16979 if (HOST_BITS_PER_WIDE_INT >= 64)
16980 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16982 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16987 vec_mode = VOIDmode;
16988 if (HOST_BITS_PER_WIDE_INT >= 64)
16991 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16998 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
17002 lo = ~lo, hi = ~hi;
17008 mask = immed_double_const (lo, hi, imode);
17010 vec = gen_rtvec (2, v, mask);
17011 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
17012 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
17019 gcc_unreachable ();
17023 lo = ~lo, hi = ~hi;
17025 /* Force this value into the low part of a fp vector constant. */
17026 mask = immed_double_const (lo, hi, imode);
17027 mask = gen_lowpart (mode, mask);
17029 if (vec_mode == VOIDmode)
17030 return force_reg (mode, mask);
17032 v = ix86_build_const_vector (vec_mode, vect, mask);
17033 return force_reg (vec_mode, v);
17036 /* Generate code for floating point ABS or NEG. */
17039 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
17042 rtx mask, set, dst, src;
17043 bool use_sse = false;
17044 bool vector_mode = VECTOR_MODE_P (mode);
17045 enum machine_mode vmode = mode;
17049 else if (mode == TFmode)
17051 else if (TARGET_SSE_MATH)
17053 use_sse = SSE_FLOAT_MODE_P (mode);
17054 if (mode == SFmode)
17056 else if (mode == DFmode)
17060 /* NEG and ABS performed with SSE use bitwise mask operations.
17061 Create the appropriate mask now. */
17063 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
17070 set = gen_rtx_fmt_e (code, mode, src);
17071 set = gen_rtx_SET (VOIDmode, dst, set);
17078 use = gen_rtx_USE (VOIDmode, mask);
17080 par = gen_rtvec (2, set, use);
17083 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17084 par = gen_rtvec (3, set, use, clob);
17086 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
17092 /* Expand a copysign operation. Special case operand 0 being a constant. */
17095 ix86_expand_copysign (rtx operands[])
17097 enum machine_mode mode, vmode;
17098 rtx dest, op0, op1, mask, nmask;
17100 dest = operands[0];
17104 mode = GET_MODE (dest);
17106 if (mode == SFmode)
17108 else if (mode == DFmode)
17113 if (GET_CODE (op0) == CONST_DOUBLE)
17115 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
17117 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
17118 op0 = simplify_unary_operation (ABS, mode, op0, mode);
17120 if (mode == SFmode || mode == DFmode)
17122 if (op0 == CONST0_RTX (mode))
17123 op0 = CONST0_RTX (vmode);
17126 rtx v = ix86_build_const_vector (vmode, false, op0);
17128 op0 = force_reg (vmode, v);
17131 else if (op0 != CONST0_RTX (mode))
17132 op0 = force_reg (mode, op0);
17134 mask = ix86_build_signbit_mask (vmode, 0, 0);
17136 if (mode == SFmode)
17137 copysign_insn = gen_copysignsf3_const;
17138 else if (mode == DFmode)
17139 copysign_insn = gen_copysigndf3_const;
17141 copysign_insn = gen_copysigntf3_const;
17143 emit_insn (copysign_insn (dest, op0, op1, mask));
17147 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17149 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17150 mask = ix86_build_signbit_mask (vmode, 0, 0);
17152 if (mode == SFmode)
17153 copysign_insn = gen_copysignsf3_var;
17154 else if (mode == DFmode)
17155 copysign_insn = gen_copysigndf3_var;
17157 copysign_insn = gen_copysigntf3_var;
17159 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17163 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17164 be a constant, and so has already been expanded into a vector constant. */
17167 ix86_split_copysign_const (rtx operands[])
17169 enum machine_mode mode, vmode;
17170 rtx dest, op0, mask, x;
17172 dest = operands[0];
17174 mask = operands[3];
17176 mode = GET_MODE (dest);
17177 vmode = GET_MODE (mask);
17179 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17180 x = gen_rtx_AND (vmode, dest, mask);
17181 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17183 if (op0 != CONST0_RTX (vmode))
17185 x = gen_rtx_IOR (vmode, dest, op0);
17186 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17190 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17191 so we have to do two masks. */
17194 ix86_split_copysign_var (rtx operands[])
17196 enum machine_mode mode, vmode;
17197 rtx dest, scratch, op0, op1, mask, nmask, x;
17199 dest = operands[0];
17200 scratch = operands[1];
17203 nmask = operands[4];
17204 mask = operands[5];
17206 mode = GET_MODE (dest);
17207 vmode = GET_MODE (mask);
17209 if (rtx_equal_p (op0, op1))
17211 /* Shouldn't happen often (it's useless, obviously), but when it does
17212 we'd generate incorrect code if we continue below. */
17213 emit_move_insn (dest, op0);
17217 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17219 gcc_assert (REGNO (op1) == REGNO (scratch));
17221 x = gen_rtx_AND (vmode, scratch, mask);
17222 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17225 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17226 x = gen_rtx_NOT (vmode, dest);
17227 x = gen_rtx_AND (vmode, x, op0);
17228 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17232 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17234 x = gen_rtx_AND (vmode, scratch, mask);
17236 else /* alternative 2,4 */
17238 gcc_assert (REGNO (mask) == REGNO (scratch));
17239 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17240 x = gen_rtx_AND (vmode, scratch, op1);
17242 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17244 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17246 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17247 x = gen_rtx_AND (vmode, dest, nmask);
17249 else /* alternative 3,4 */
17251 gcc_assert (REGNO (nmask) == REGNO (dest));
17253 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17254 x = gen_rtx_AND (vmode, dest, op0);
17256 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17259 x = gen_rtx_IOR (vmode, dest, scratch);
17260 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17263 /* Return TRUE or FALSE depending on whether the first SET in INSN
17264 has source and destination with matching CC modes, and that the
17265 CC mode is at least as constrained as REQ_MODE. */
17268 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17271 enum machine_mode set_mode;
17273 set = PATTERN (insn);
17274 if (GET_CODE (set) == PARALLEL)
17275 set = XVECEXP (set, 0, 0);
17276 gcc_assert (GET_CODE (set) == SET);
17277 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17279 set_mode = GET_MODE (SET_DEST (set));
17283 if (req_mode != CCNOmode
17284 && (req_mode != CCmode
17285 || XEXP (SET_SRC (set), 1) != const0_rtx))
17289 if (req_mode == CCGCmode)
17293 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17297 if (req_mode == CCZmode)
17307 if (set_mode != req_mode)
17312 gcc_unreachable ();
17315 return GET_MODE (SET_SRC (set)) == set_mode;
17318 /* Generate insn patterns to do an integer compare of OPERANDS. */
17321 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17323 enum machine_mode cmpmode;
17326 cmpmode = SELECT_CC_MODE (code, op0, op1);
17327 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17329 /* This is very simple, but making the interface the same as in the
17330 FP case makes the rest of the code easier. */
17331 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17332 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17334 /* Return the test that should be put into the flags user, i.e.
17335 the bcc, scc, or cmov instruction. */
17336 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17339 /* Figure out whether to use ordered or unordered fp comparisons.
17340 Return the appropriate mode to use. */
17343 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17345 /* ??? In order to make all comparisons reversible, we do all comparisons
17346 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17347 all forms trapping and nontrapping comparisons, we can make inequality
17348 comparisons trapping again, since it results in better code when using
17349 FCOM based compares. */
17350 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17354 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17356 enum machine_mode mode = GET_MODE (op0);
17358 if (SCALAR_FLOAT_MODE_P (mode))
17360 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17361 return ix86_fp_compare_mode (code);
17366 /* Only zero flag is needed. */
17367 case EQ: /* ZF=0 */
17368 case NE: /* ZF!=0 */
17370 /* Codes needing carry flag. */
17371 case GEU: /* CF=0 */
17372 case LTU: /* CF=1 */
17373 /* Detect overflow checks. They need just the carry flag. */
17374 if (GET_CODE (op0) == PLUS
17375 && rtx_equal_p (op1, XEXP (op0, 0)))
17379 case GTU: /* CF=0 & ZF=0 */
17380 case LEU: /* CF=1 | ZF=1 */
17381 /* Detect overflow checks. They need just the carry flag. */
17382 if (GET_CODE (op0) == MINUS
17383 && rtx_equal_p (op1, XEXP (op0, 0)))
17387 /* Codes possibly doable only with sign flag when
17388 comparing against zero. */
17389 case GE: /* SF=OF or SF=0 */
17390 case LT: /* SF<>OF or SF=1 */
17391 if (op1 == const0_rtx)
17394 /* For other cases Carry flag is not required. */
17396 /* Codes doable only with sign flag when comparing
17397 against zero, but we miss jump instruction for it
17398 so we need to use relational tests against overflow
17399 that thus needs to be zero. */
17400 case GT: /* ZF=0 & SF=OF */
17401 case LE: /* ZF=1 | SF<>OF */
17402 if (op1 == const0_rtx)
17406 /* strcmp pattern do (use flags) and combine may ask us for proper
17411 gcc_unreachable ();
17415 /* Return the fixed registers used for condition codes. */
17418 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17425 /* If two condition code modes are compatible, return a condition code
17426 mode which is compatible with both. Otherwise, return
17429 static enum machine_mode
17430 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17435 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17438 if ((m1 == CCGCmode && m2 == CCGOCmode)
17439 || (m1 == CCGOCmode && m2 == CCGCmode))
17445 gcc_unreachable ();
17475 /* These are only compatible with themselves, which we already
17482 /* Return a comparison we can do and that it is equivalent to
17483 swap_condition (code) apart possibly from orderedness.
17484 But, never change orderedness if TARGET_IEEE_FP, returning
17485 UNKNOWN in that case if necessary. */
17487 static enum rtx_code
17488 ix86_fp_swap_condition (enum rtx_code code)
17492 case GT: /* GTU - CF=0 & ZF=0 */
17493 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17494 case GE: /* GEU - CF=0 */
17495 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17496 case UNLT: /* LTU - CF=1 */
17497 return TARGET_IEEE_FP ? UNKNOWN : GT;
17498 case UNLE: /* LEU - CF=1 | ZF=1 */
17499 return TARGET_IEEE_FP ? UNKNOWN : GE;
17501 return swap_condition (code);
17505 /* Return cost of comparison CODE using the best strategy for performance.
17506 All following functions do use number of instructions as a cost metrics.
17507 In future this should be tweaked to compute bytes for optimize_size and
17508 take into account performance of various instructions on various CPUs. */
17511 ix86_fp_comparison_cost (enum rtx_code code)
17515 /* The cost of code using bit-twiddling on %ah. */
17532 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17536 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17539 gcc_unreachable ();
17542 switch (ix86_fp_comparison_strategy (code))
17544 case IX86_FPCMP_COMI:
17545 return arith_cost > 4 ? 3 : 2;
17546 case IX86_FPCMP_SAHF:
17547 return arith_cost > 4 ? 4 : 3;
17553 /* Return strategy to use for floating-point. We assume that fcomi is always
17554 preferrable where available, since that is also true when looking at size
17555 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17557 enum ix86_fpcmp_strategy
17558 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17560 /* Do fcomi/sahf based test when profitable. */
17563 return IX86_FPCMP_COMI;
17565 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17566 return IX86_FPCMP_SAHF;
17568 return IX86_FPCMP_ARITH;
17571 /* Swap, force into registers, or otherwise massage the two operands
17572 to a fp comparison. The operands are updated in place; the new
17573 comparison code is returned. */
17575 static enum rtx_code
17576 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17578 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17579 rtx op0 = *pop0, op1 = *pop1;
17580 enum machine_mode op_mode = GET_MODE (op0);
17581 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17583 /* All of the unordered compare instructions only work on registers.
17584 The same is true of the fcomi compare instructions. The XFmode
17585 compare instructions require registers except when comparing
17586 against zero or when converting operand 1 from fixed point to
17590 && (fpcmp_mode == CCFPUmode
17591 || (op_mode == XFmode
17592 && ! (standard_80387_constant_p (op0) == 1
17593 || standard_80387_constant_p (op1) == 1)
17594 && GET_CODE (op1) != FLOAT)
17595 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17597 op0 = force_reg (op_mode, op0);
17598 op1 = force_reg (op_mode, op1);
17602 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17603 things around if they appear profitable, otherwise force op0
17604 into a register. */
17606 if (standard_80387_constant_p (op0) == 0
17608 && ! (standard_80387_constant_p (op1) == 0
17611 enum rtx_code new_code = ix86_fp_swap_condition (code);
17612 if (new_code != UNKNOWN)
17615 tmp = op0, op0 = op1, op1 = tmp;
17621 op0 = force_reg (op_mode, op0);
17623 if (CONSTANT_P (op1))
17625 int tmp = standard_80387_constant_p (op1);
17627 op1 = validize_mem (force_const_mem (op_mode, op1));
17631 op1 = force_reg (op_mode, op1);
17634 op1 = force_reg (op_mode, op1);
17638 /* Try to rearrange the comparison to make it cheaper. */
17639 if (ix86_fp_comparison_cost (code)
17640 > ix86_fp_comparison_cost (swap_condition (code))
17641 && (REG_P (op1) || can_create_pseudo_p ()))
17644 tmp = op0, op0 = op1, op1 = tmp;
17645 code = swap_condition (code);
17647 op0 = force_reg (op_mode, op0);
17655 /* Convert comparison codes we use to represent FP comparison to integer
17656 code that will result in proper branch. Return UNKNOWN if no such code
17660 ix86_fp_compare_code_to_integer (enum rtx_code code)
17689 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17692 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17694 enum machine_mode fpcmp_mode, intcmp_mode;
17697 fpcmp_mode = ix86_fp_compare_mode (code);
17698 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17700 /* Do fcomi/sahf based test when profitable. */
17701 switch (ix86_fp_comparison_strategy (code))
17703 case IX86_FPCMP_COMI:
17704 intcmp_mode = fpcmp_mode;
17705 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17706 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17711 case IX86_FPCMP_SAHF:
17712 intcmp_mode = fpcmp_mode;
17713 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17714 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17718 scratch = gen_reg_rtx (HImode);
17719 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17720 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17723 case IX86_FPCMP_ARITH:
17724 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17725 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17726 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17728 scratch = gen_reg_rtx (HImode);
17729 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17731 /* In the unordered case, we have to check C2 for NaN's, which
17732 doesn't happen to work out to anything nice combination-wise.
17733 So do some bit twiddling on the value we've got in AH to come
17734 up with an appropriate set of condition codes. */
17736 intcmp_mode = CCNOmode;
17741 if (code == GT || !TARGET_IEEE_FP)
17743 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17748 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17749 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17750 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17751 intcmp_mode = CCmode;
17757 if (code == LT && TARGET_IEEE_FP)
17759 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17760 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17761 intcmp_mode = CCmode;
17766 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17772 if (code == GE || !TARGET_IEEE_FP)
17774 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17779 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17780 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17786 if (code == LE && TARGET_IEEE_FP)
17788 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17789 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17790 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17791 intcmp_mode = CCmode;
17796 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17802 if (code == EQ && TARGET_IEEE_FP)
17804 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17805 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17806 intcmp_mode = CCmode;
17811 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17817 if (code == NE && TARGET_IEEE_FP)
17819 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17820 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17826 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17832 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17836 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17841 gcc_unreachable ();
17849 /* Return the test that should be put into the flags user, i.e.
17850 the bcc, scc, or cmov instruction. */
17851 return gen_rtx_fmt_ee (code, VOIDmode,
17852 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17857 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17861 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17862 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17864 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17866 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17867 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17870 ret = ix86_expand_int_compare (code, op0, op1);
17876 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17878 enum machine_mode mode = GET_MODE (op0);
17890 tmp = ix86_expand_compare (code, op0, op1);
17891 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17892 gen_rtx_LABEL_REF (VOIDmode, label),
17894 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17901 /* Expand DImode branch into multiple compare+branch. */
17903 rtx lo[2], hi[2], label2;
17904 enum rtx_code code1, code2, code3;
17905 enum machine_mode submode;
17907 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17909 tmp = op0, op0 = op1, op1 = tmp;
17910 code = swap_condition (code);
17913 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17914 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17916 submode = mode == DImode ? SImode : DImode;
17918 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17919 avoid two branches. This costs one extra insn, so disable when
17920 optimizing for size. */
17922 if ((code == EQ || code == NE)
17923 && (!optimize_insn_for_size_p ()
17924 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17929 if (hi[1] != const0_rtx)
17930 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17931 NULL_RTX, 0, OPTAB_WIDEN);
17934 if (lo[1] != const0_rtx)
17935 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17936 NULL_RTX, 0, OPTAB_WIDEN);
17938 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17939 NULL_RTX, 0, OPTAB_WIDEN);
17941 ix86_expand_branch (code, tmp, const0_rtx, label);
17945 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17946 op1 is a constant and the low word is zero, then we can just
17947 examine the high word. Similarly for low word -1 and
17948 less-or-equal-than or greater-than. */
17950 if (CONST_INT_P (hi[1]))
17953 case LT: case LTU: case GE: case GEU:
17954 if (lo[1] == const0_rtx)
17956 ix86_expand_branch (code, hi[0], hi[1], label);
17960 case LE: case LEU: case GT: case GTU:
17961 if (lo[1] == constm1_rtx)
17963 ix86_expand_branch (code, hi[0], hi[1], label);
17971 /* Otherwise, we need two or three jumps. */
17973 label2 = gen_label_rtx ();
17976 code2 = swap_condition (code);
17977 code3 = unsigned_condition (code);
17981 case LT: case GT: case LTU: case GTU:
17984 case LE: code1 = LT; code2 = GT; break;
17985 case GE: code1 = GT; code2 = LT; break;
17986 case LEU: code1 = LTU; code2 = GTU; break;
17987 case GEU: code1 = GTU; code2 = LTU; break;
17989 case EQ: code1 = UNKNOWN; code2 = NE; break;
17990 case NE: code2 = UNKNOWN; break;
17993 gcc_unreachable ();
17998 * if (hi(a) < hi(b)) goto true;
17999 * if (hi(a) > hi(b)) goto false;
18000 * if (lo(a) < lo(b)) goto true;
18004 if (code1 != UNKNOWN)
18005 ix86_expand_branch (code1, hi[0], hi[1], label);
18006 if (code2 != UNKNOWN)
18007 ix86_expand_branch (code2, hi[0], hi[1], label2);
18009 ix86_expand_branch (code3, lo[0], lo[1], label);
18011 if (code2 != UNKNOWN)
18012 emit_label (label2);
18017 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
18022 /* Split branch based on floating point condition. */
18024 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
18025 rtx target1, rtx target2, rtx tmp, rtx pushed)
18030 if (target2 != pc_rtx)
18033 code = reverse_condition_maybe_unordered (code);
18038 condition = ix86_expand_fp_compare (code, op1, op2,
18041 /* Remove pushed operand from stack. */
18043 ix86_free_from_memory (GET_MODE (pushed));
18045 i = emit_jump_insn (gen_rtx_SET
18047 gen_rtx_IF_THEN_ELSE (VOIDmode,
18048 condition, target1, target2)));
18049 if (split_branch_probability >= 0)
18050 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
18054 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
18058 gcc_assert (GET_MODE (dest) == QImode);
18060 ret = ix86_expand_compare (code, op0, op1);
18061 PUT_MODE (ret, QImode);
18062 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
18065 /* Expand comparison setting or clearing carry flag. Return true when
18066 successful and set pop for the operation. */
18068 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
18070 enum machine_mode mode =
18071 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
18073 /* Do not handle double-mode compares that go through special path. */
18074 if (mode == (TARGET_64BIT ? TImode : DImode))
18077 if (SCALAR_FLOAT_MODE_P (mode))
18079 rtx compare_op, compare_seq;
18081 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18083 /* Shortcut: following common codes never translate
18084 into carry flag compares. */
18085 if (code == EQ || code == NE || code == UNEQ || code == LTGT
18086 || code == ORDERED || code == UNORDERED)
18089 /* These comparisons require zero flag; swap operands so they won't. */
18090 if ((code == GT || code == UNLE || code == LE || code == UNGT)
18091 && !TARGET_IEEE_FP)
18096 code = swap_condition (code);
18099 /* Try to expand the comparison and verify that we end up with
18100 carry flag based comparison. This fails to be true only when
18101 we decide to expand comparison using arithmetic that is not
18102 too common scenario. */
18104 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18105 compare_seq = get_insns ();
18108 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
18109 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
18110 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
18112 code = GET_CODE (compare_op);
18114 if (code != LTU && code != GEU)
18117 emit_insn (compare_seq);
18122 if (!INTEGRAL_MODE_P (mode))
18131 /* Convert a==0 into (unsigned)a<1. */
18134 if (op1 != const0_rtx)
18137 code = (code == EQ ? LTU : GEU);
18140 /* Convert a>b into b<a or a>=b-1. */
18143 if (CONST_INT_P (op1))
18145 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18146 /* Bail out on overflow. We still can swap operands but that
18147 would force loading of the constant into register. */
18148 if (op1 == const0_rtx
18149 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18151 code = (code == GTU ? GEU : LTU);
18158 code = (code == GTU ? LTU : GEU);
18162 /* Convert a>=0 into (unsigned)a<0x80000000. */
18165 if (mode == DImode || op1 != const0_rtx)
18167 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18168 code = (code == LT ? GEU : LTU);
18172 if (mode == DImode || op1 != constm1_rtx)
18174 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18175 code = (code == LE ? GEU : LTU);
18181 /* Swapping operands may cause constant to appear as first operand. */
18182 if (!nonimmediate_operand (op0, VOIDmode))
18184 if (!can_create_pseudo_p ())
18186 op0 = force_reg (mode, op0);
18188 *pop = ix86_expand_compare (code, op0, op1);
18189 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18194 ix86_expand_int_movcc (rtx operands[])
18196 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18197 rtx compare_seq, compare_op;
18198 enum machine_mode mode = GET_MODE (operands[0]);
18199 bool sign_bit_compare_p = false;
18200 rtx op0 = XEXP (operands[1], 0);
18201 rtx op1 = XEXP (operands[1], 1);
18204 compare_op = ix86_expand_compare (code, op0, op1);
18205 compare_seq = get_insns ();
18208 compare_code = GET_CODE (compare_op);
18210 if ((op1 == const0_rtx && (code == GE || code == LT))
18211 || (op1 == constm1_rtx && (code == GT || code == LE)))
18212 sign_bit_compare_p = true;
18214 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18215 HImode insns, we'd be swallowed in word prefix ops. */
18217 if ((mode != HImode || TARGET_FAST_PREFIX)
18218 && (mode != (TARGET_64BIT ? TImode : DImode))
18219 && CONST_INT_P (operands[2])
18220 && CONST_INT_P (operands[3]))
18222 rtx out = operands[0];
18223 HOST_WIDE_INT ct = INTVAL (operands[2]);
18224 HOST_WIDE_INT cf = INTVAL (operands[3]);
18225 HOST_WIDE_INT diff;
18228 /* Sign bit compares are better done using shifts than we do by using
18230 if (sign_bit_compare_p
18231 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18233 /* Detect overlap between destination and compare sources. */
18236 if (!sign_bit_compare_p)
18239 bool fpcmp = false;
18241 compare_code = GET_CODE (compare_op);
18243 flags = XEXP (compare_op, 0);
18245 if (GET_MODE (flags) == CCFPmode
18246 || GET_MODE (flags) == CCFPUmode)
18250 = ix86_fp_compare_code_to_integer (compare_code);
18253 /* To simplify rest of code, restrict to the GEU case. */
18254 if (compare_code == LTU)
18256 HOST_WIDE_INT tmp = ct;
18259 compare_code = reverse_condition (compare_code);
18260 code = reverse_condition (code);
18265 PUT_CODE (compare_op,
18266 reverse_condition_maybe_unordered
18267 (GET_CODE (compare_op)));
18269 PUT_CODE (compare_op,
18270 reverse_condition (GET_CODE (compare_op)));
18274 if (reg_overlap_mentioned_p (out, op0)
18275 || reg_overlap_mentioned_p (out, op1))
18276 tmp = gen_reg_rtx (mode);
18278 if (mode == DImode)
18279 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18281 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18282 flags, compare_op));
18286 if (code == GT || code == GE)
18287 code = reverse_condition (code);
18290 HOST_WIDE_INT tmp = ct;
18295 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18308 tmp = expand_simple_binop (mode, PLUS,
18310 copy_rtx (tmp), 1, OPTAB_DIRECT);
18321 tmp = expand_simple_binop (mode, IOR,
18323 copy_rtx (tmp), 1, OPTAB_DIRECT);
18325 else if (diff == -1 && ct)
18335 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18337 tmp = expand_simple_binop (mode, PLUS,
18338 copy_rtx (tmp), GEN_INT (cf),
18339 copy_rtx (tmp), 1, OPTAB_DIRECT);
18347 * andl cf - ct, dest
18357 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18360 tmp = expand_simple_binop (mode, AND,
18362 gen_int_mode (cf - ct, mode),
18363 copy_rtx (tmp), 1, OPTAB_DIRECT);
18365 tmp = expand_simple_binop (mode, PLUS,
18366 copy_rtx (tmp), GEN_INT (ct),
18367 copy_rtx (tmp), 1, OPTAB_DIRECT);
18370 if (!rtx_equal_p (tmp, out))
18371 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18378 enum machine_mode cmp_mode = GET_MODE (op0);
18381 tmp = ct, ct = cf, cf = tmp;
18384 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18386 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18388 /* We may be reversing unordered compare to normal compare, that
18389 is not valid in general (we may convert non-trapping condition
18390 to trapping one), however on i386 we currently emit all
18391 comparisons unordered. */
18392 compare_code = reverse_condition_maybe_unordered (compare_code);
18393 code = reverse_condition_maybe_unordered (code);
18397 compare_code = reverse_condition (compare_code);
18398 code = reverse_condition (code);
18402 compare_code = UNKNOWN;
18403 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18404 && CONST_INT_P (op1))
18406 if (op1 == const0_rtx
18407 && (code == LT || code == GE))
18408 compare_code = code;
18409 else if (op1 == constm1_rtx)
18413 else if (code == GT)
18418 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18419 if (compare_code != UNKNOWN
18420 && GET_MODE (op0) == GET_MODE (out)
18421 && (cf == -1 || ct == -1))
18423 /* If lea code below could be used, only optimize
18424 if it results in a 2 insn sequence. */
18426 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18427 || diff == 3 || diff == 5 || diff == 9)
18428 || (compare_code == LT && ct == -1)
18429 || (compare_code == GE && cf == -1))
18432 * notl op1 (if necessary)
18440 code = reverse_condition (code);
18443 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18445 out = expand_simple_binop (mode, IOR,
18447 out, 1, OPTAB_DIRECT);
18448 if (out != operands[0])
18449 emit_move_insn (operands[0], out);
18456 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18457 || diff == 3 || diff == 5 || diff == 9)
18458 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18460 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18466 * lea cf(dest*(ct-cf)),dest
18470 * This also catches the degenerate setcc-only case.
18476 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18479 /* On x86_64 the lea instruction operates on Pmode, so we need
18480 to get arithmetics done in proper mode to match. */
18482 tmp = copy_rtx (out);
18486 out1 = copy_rtx (out);
18487 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18491 tmp = gen_rtx_PLUS (mode, tmp, out1);
18497 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18500 if (!rtx_equal_p (tmp, out))
18503 out = force_operand (tmp, copy_rtx (out));
18505 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18507 if (!rtx_equal_p (out, operands[0]))
18508 emit_move_insn (operands[0], copy_rtx (out));
18514 * General case: Jumpful:
18515 * xorl dest,dest cmpl op1, op2
18516 * cmpl op1, op2 movl ct, dest
18517 * setcc dest jcc 1f
18518 * decl dest movl cf, dest
18519 * andl (cf-ct),dest 1:
18522 * Size 20. Size 14.
18524 * This is reasonably steep, but branch mispredict costs are
18525 * high on modern cpus, so consider failing only if optimizing
18529 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18530 && BRANCH_COST (optimize_insn_for_speed_p (),
18535 enum machine_mode cmp_mode = GET_MODE (op0);
18540 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18542 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18544 /* We may be reversing unordered compare to normal compare,
18545 that is not valid in general (we may convert non-trapping
18546 condition to trapping one), however on i386 we currently
18547 emit all comparisons unordered. */
18548 code = reverse_condition_maybe_unordered (code);
18552 code = reverse_condition (code);
18553 if (compare_code != UNKNOWN)
18554 compare_code = reverse_condition (compare_code);
18558 if (compare_code != UNKNOWN)
18560 /* notl op1 (if needed)
18565 For x < 0 (resp. x <= -1) there will be no notl,
18566 so if possible swap the constants to get rid of the
18568 True/false will be -1/0 while code below (store flag
18569 followed by decrement) is 0/-1, so the constants need
18570 to be exchanged once more. */
18572 if (compare_code == GE || !cf)
18574 code = reverse_condition (code);
18579 HOST_WIDE_INT tmp = cf;
18584 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18588 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18590 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18592 copy_rtx (out), 1, OPTAB_DIRECT);
18595 out = expand_simple_binop (mode, AND, copy_rtx (out),
18596 gen_int_mode (cf - ct, mode),
18597 copy_rtx (out), 1, OPTAB_DIRECT);
18599 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18600 copy_rtx (out), 1, OPTAB_DIRECT);
18601 if (!rtx_equal_p (out, operands[0]))
18602 emit_move_insn (operands[0], copy_rtx (out));
18608 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18610 /* Try a few things more with specific constants and a variable. */
18613 rtx var, orig_out, out, tmp;
18615 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18618 /* If one of the two operands is an interesting constant, load a
18619 constant with the above and mask it in with a logical operation. */
18621 if (CONST_INT_P (operands[2]))
18624 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18625 operands[3] = constm1_rtx, op = and_optab;
18626 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18627 operands[3] = const0_rtx, op = ior_optab;
18631 else if (CONST_INT_P (operands[3]))
18634 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18635 operands[2] = constm1_rtx, op = and_optab;
18636 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18637 operands[2] = const0_rtx, op = ior_optab;
18644 orig_out = operands[0];
18645 tmp = gen_reg_rtx (mode);
18648 /* Recurse to get the constant loaded. */
18649 if (ix86_expand_int_movcc (operands) == 0)
18652 /* Mask in the interesting variable. */
18653 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18655 if (!rtx_equal_p (out, orig_out))
18656 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18662 * For comparison with above,
18672 if (! nonimmediate_operand (operands[2], mode))
18673 operands[2] = force_reg (mode, operands[2]);
18674 if (! nonimmediate_operand (operands[3], mode))
18675 operands[3] = force_reg (mode, operands[3]);
18677 if (! register_operand (operands[2], VOIDmode)
18679 || ! register_operand (operands[3], VOIDmode)))
18680 operands[2] = force_reg (mode, operands[2]);
18683 && ! register_operand (operands[3], VOIDmode))
18684 operands[3] = force_reg (mode, operands[3]);
18686 emit_insn (compare_seq);
18687 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18688 gen_rtx_IF_THEN_ELSE (mode,
18689 compare_op, operands[2],
18694 /* Swap, force into registers, or otherwise massage the two operands
18695 to an sse comparison with a mask result. Thus we differ a bit from
18696 ix86_prepare_fp_compare_args which expects to produce a flags result.
18698 The DEST operand exists to help determine whether to commute commutative
18699 operators. The POP0/POP1 operands are updated in place. The new
18700 comparison code is returned, or UNKNOWN if not implementable. */
18702 static enum rtx_code
18703 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18704 rtx *pop0, rtx *pop1)
18712 /* We have no LTGT as an operator. We could implement it with
18713 NE & ORDERED, but this requires an extra temporary. It's
18714 not clear that it's worth it. */
18721 /* These are supported directly. */
18728 /* For commutative operators, try to canonicalize the destination
18729 operand to be first in the comparison - this helps reload to
18730 avoid extra moves. */
18731 if (!dest || !rtx_equal_p (dest, *pop1))
18739 /* These are not supported directly. Swap the comparison operands
18740 to transform into something that is supported. */
18744 code = swap_condition (code);
18748 gcc_unreachable ();
18754 /* Detect conditional moves that exactly match min/max operational
18755 semantics. Note that this is IEEE safe, as long as we don't
18756 interchange the operands.
18758 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18759 and TRUE if the operation is successful and instructions are emitted. */
18762 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18763 rtx cmp_op1, rtx if_true, rtx if_false)
18765 enum machine_mode mode;
18771 else if (code == UNGE)
18774 if_true = if_false;
18780 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18782 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18787 mode = GET_MODE (dest);
18789 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18790 but MODE may be a vector mode and thus not appropriate. */
18791 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18793 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18796 if_true = force_reg (mode, if_true);
18797 v = gen_rtvec (2, if_true, if_false);
18798 tmp = gen_rtx_UNSPEC (mode, v, u);
18802 code = is_min ? SMIN : SMAX;
18803 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18806 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18810 /* Expand an sse vector comparison. Return the register with the result. */
18813 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18814 rtx op_true, rtx op_false)
18816 enum machine_mode mode = GET_MODE (dest);
18819 cmp_op0 = force_reg (mode, cmp_op0);
18820 if (!nonimmediate_operand (cmp_op1, mode))
18821 cmp_op1 = force_reg (mode, cmp_op1);
18824 || reg_overlap_mentioned_p (dest, op_true)
18825 || reg_overlap_mentioned_p (dest, op_false))
18826 dest = gen_reg_rtx (mode);
18828 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18829 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18834 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18835 operations. This is used for both scalar and vector conditional moves. */
18838 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18840 enum machine_mode mode = GET_MODE (dest);
18843 if (op_false == CONST0_RTX (mode))
18845 op_true = force_reg (mode, op_true);
18846 x = gen_rtx_AND (mode, cmp, op_true);
18847 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18849 else if (op_true == CONST0_RTX (mode))
18851 op_false = force_reg (mode, op_false);
18852 x = gen_rtx_NOT (mode, cmp);
18853 x = gen_rtx_AND (mode, x, op_false);
18854 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18856 else if (TARGET_XOP)
18858 rtx pcmov = gen_rtx_SET (mode, dest,
18859 gen_rtx_IF_THEN_ELSE (mode, cmp,
18866 op_true = force_reg (mode, op_true);
18867 op_false = force_reg (mode, op_false);
18869 t2 = gen_reg_rtx (mode);
18871 t3 = gen_reg_rtx (mode);
18875 x = gen_rtx_AND (mode, op_true, cmp);
18876 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18878 x = gen_rtx_NOT (mode, cmp);
18879 x = gen_rtx_AND (mode, x, op_false);
18880 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18882 x = gen_rtx_IOR (mode, t3, t2);
18883 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18887 /* Expand a floating-point conditional move. Return true if successful. */
18890 ix86_expand_fp_movcc (rtx operands[])
18892 enum machine_mode mode = GET_MODE (operands[0]);
18893 enum rtx_code code = GET_CODE (operands[1]);
18894 rtx tmp, compare_op;
18895 rtx op0 = XEXP (operands[1], 0);
18896 rtx op1 = XEXP (operands[1], 1);
18898 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18900 enum machine_mode cmode;
18902 /* Since we've no cmove for sse registers, don't force bad register
18903 allocation just to gain access to it. Deny movcc when the
18904 comparison mode doesn't match the move mode. */
18905 cmode = GET_MODE (op0);
18906 if (cmode == VOIDmode)
18907 cmode = GET_MODE (op1);
18911 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18912 if (code == UNKNOWN)
18915 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18916 operands[2], operands[3]))
18919 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18920 operands[2], operands[3]);
18921 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18925 /* The floating point conditional move instructions don't directly
18926 support conditions resulting from a signed integer comparison. */
18928 compare_op = ix86_expand_compare (code, op0, op1);
18929 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18931 tmp = gen_reg_rtx (QImode);
18932 ix86_expand_setcc (tmp, code, op0, op1);
18934 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18937 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18938 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18939 operands[2], operands[3])));
18944 /* Expand a floating-point vector conditional move; a vcond operation
18945 rather than a movcc operation. */
18948 ix86_expand_fp_vcond (rtx operands[])
18950 enum rtx_code code = GET_CODE (operands[3]);
18953 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18954 &operands[4], &operands[5]);
18955 if (code == UNKNOWN)
18958 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18959 operands[5], operands[1], operands[2]))
18962 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18963 operands[1], operands[2]);
18964 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18968 /* Expand a signed/unsigned integral vector conditional move. */
18971 ix86_expand_int_vcond (rtx operands[])
18973 enum machine_mode mode = GET_MODE (operands[0]);
18974 enum rtx_code code = GET_CODE (operands[3]);
18975 bool negate = false;
18978 cop0 = operands[4];
18979 cop1 = operands[5];
18981 /* XOP supports all of the comparisons on all vector int types. */
18984 /* Canonicalize the comparison to EQ, GT, GTU. */
18995 code = reverse_condition (code);
19001 code = reverse_condition (code);
19007 code = swap_condition (code);
19008 x = cop0, cop0 = cop1, cop1 = x;
19012 gcc_unreachable ();
19015 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19016 if (mode == V2DImode)
19021 /* SSE4.1 supports EQ. */
19022 if (!TARGET_SSE4_1)
19028 /* SSE4.2 supports GT/GTU. */
19029 if (!TARGET_SSE4_2)
19034 gcc_unreachable ();
19038 /* Unsigned parallel compare is not supported by the hardware.
19039 Play some tricks to turn this into a signed comparison
19043 cop0 = force_reg (mode, cop0);
19051 rtx (*gen_sub3) (rtx, rtx, rtx);
19053 /* Subtract (-(INT MAX) - 1) from both operands to make
19055 mask = ix86_build_signbit_mask (mode, true, false);
19056 gen_sub3 = (mode == V4SImode
19057 ? gen_subv4si3 : gen_subv2di3);
19058 t1 = gen_reg_rtx (mode);
19059 emit_insn (gen_sub3 (t1, cop0, mask));
19061 t2 = gen_reg_rtx (mode);
19062 emit_insn (gen_sub3 (t2, cop1, mask));
19072 /* Perform a parallel unsigned saturating subtraction. */
19073 x = gen_reg_rtx (mode);
19074 emit_insn (gen_rtx_SET (VOIDmode, x,
19075 gen_rtx_US_MINUS (mode, cop0, cop1)));
19078 cop1 = CONST0_RTX (mode);
19084 gcc_unreachable ();
19089 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
19090 operands[1+negate], operands[2-negate]);
19092 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
19093 operands[2-negate]);
19097 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
19098 true if we should do zero extension, else sign extension. HIGH_P is
19099 true if we want the N/2 high elements, else the low elements. */
19102 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
19104 enum machine_mode imode = GET_MODE (operands[1]);
19109 rtx (*unpack)(rtx, rtx);
19115 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
19117 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
19121 unpack = gen_sse4_1_zero_extendv4hiv4si2;
19123 unpack = gen_sse4_1_sign_extendv4hiv4si2;
19127 unpack = gen_sse4_1_zero_extendv2siv2di2;
19129 unpack = gen_sse4_1_sign_extendv2siv2di2;
19132 gcc_unreachable ();
19137 /* Shift higher 8 bytes to lower 8 bytes. */
19138 tmp = gen_reg_rtx (imode);
19139 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
19140 gen_lowpart (V1TImode, operands[1]),
19146 emit_insn (unpack (operands[0], tmp));
19150 rtx (*unpack)(rtx, rtx, rtx);
19156 unpack = gen_vec_interleave_highv16qi;
19158 unpack = gen_vec_interleave_lowv16qi;
19162 unpack = gen_vec_interleave_highv8hi;
19164 unpack = gen_vec_interleave_lowv8hi;
19168 unpack = gen_vec_interleave_highv4si;
19170 unpack = gen_vec_interleave_lowv4si;
19173 gcc_unreachable ();
19176 dest = gen_lowpart (imode, operands[0]);
19179 tmp = force_reg (imode, CONST0_RTX (imode));
19181 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
19182 operands[1], pc_rtx, pc_rtx);
19184 emit_insn (unpack (dest, operands[1], tmp));
19188 /* Expand conditional increment or decrement using adb/sbb instructions.
19189 The default case using setcc followed by the conditional move can be
19190 done by generic code. */
19192 ix86_expand_int_addcc (rtx operands[])
19194 enum rtx_code code = GET_CODE (operands[1]);
19196 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
19198 rtx val = const0_rtx;
19199 bool fpcmp = false;
19200 enum machine_mode mode;
19201 rtx op0 = XEXP (operands[1], 0);
19202 rtx op1 = XEXP (operands[1], 1);
19204 if (operands[3] != const1_rtx
19205 && operands[3] != constm1_rtx)
19207 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
19209 code = GET_CODE (compare_op);
19211 flags = XEXP (compare_op, 0);
19213 if (GET_MODE (flags) == CCFPmode
19214 || GET_MODE (flags) == CCFPUmode)
19217 code = ix86_fp_compare_code_to_integer (code);
19224 PUT_CODE (compare_op,
19225 reverse_condition_maybe_unordered
19226 (GET_CODE (compare_op)));
19228 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
19231 mode = GET_MODE (operands[0]);
19233 /* Construct either adc or sbb insn. */
19234 if ((code == LTU) == (operands[3] == constm1_rtx))
19239 insn = gen_subqi3_carry;
19242 insn = gen_subhi3_carry;
19245 insn = gen_subsi3_carry;
19248 insn = gen_subdi3_carry;
19251 gcc_unreachable ();
19259 insn = gen_addqi3_carry;
19262 insn = gen_addhi3_carry;
19265 insn = gen_addsi3_carry;
19268 insn = gen_adddi3_carry;
19271 gcc_unreachable ();
19274 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
19280 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
19281 but works for floating pointer parameters and nonoffsetable memories.
19282 For pushes, it returns just stack offsets; the values will be saved
19283 in the right order. Maximally three parts are generated. */
19286 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
19291 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
19293 size = (GET_MODE_SIZE (mode) + 4) / 8;
19295 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
19296 gcc_assert (size >= 2 && size <= 4);
19298 /* Optimize constant pool reference to immediates. This is used by fp
19299 moves, that force all constants to memory to allow combining. */
19300 if (MEM_P (operand) && MEM_READONLY_P (operand))
19302 rtx tmp = maybe_get_pool_constant (operand);
19307 if (MEM_P (operand) && !offsettable_memref_p (operand))
19309 /* The only non-offsetable memories we handle are pushes. */
19310 int ok = push_operand (operand, VOIDmode);
19314 operand = copy_rtx (operand);
19315 PUT_MODE (operand, Pmode);
19316 parts[0] = parts[1] = parts[2] = parts[3] = operand;
19320 if (GET_CODE (operand) == CONST_VECTOR)
19322 enum machine_mode imode = int_mode_for_mode (mode);
19323 /* Caution: if we looked through a constant pool memory above,
19324 the operand may actually have a different mode now. That's
19325 ok, since we want to pun this all the way back to an integer. */
19326 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
19327 gcc_assert (operand != NULL);
19333 if (mode == DImode)
19334 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19339 if (REG_P (operand))
19341 gcc_assert (reload_completed);
19342 for (i = 0; i < size; i++)
19343 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
19345 else if (offsettable_memref_p (operand))
19347 operand = adjust_address (operand, SImode, 0);
19348 parts[0] = operand;
19349 for (i = 1; i < size; i++)
19350 parts[i] = adjust_address (operand, SImode, 4 * i);
19352 else if (GET_CODE (operand) == CONST_DOUBLE)
19357 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19361 real_to_target (l, &r, mode);
19362 parts[3] = gen_int_mode (l[3], SImode);
19363 parts[2] = gen_int_mode (l[2], SImode);
19366 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
19367 parts[2] = gen_int_mode (l[2], SImode);
19370 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
19373 gcc_unreachable ();
19375 parts[1] = gen_int_mode (l[1], SImode);
19376 parts[0] = gen_int_mode (l[0], SImode);
19379 gcc_unreachable ();
19384 if (mode == TImode)
19385 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19386 if (mode == XFmode || mode == TFmode)
19388 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
19389 if (REG_P (operand))
19391 gcc_assert (reload_completed);
19392 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
19393 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
19395 else if (offsettable_memref_p (operand))
19397 operand = adjust_address (operand, DImode, 0);
19398 parts[0] = operand;
19399 parts[1] = adjust_address (operand, upper_mode, 8);
19401 else if (GET_CODE (operand) == CONST_DOUBLE)
19406 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19407 real_to_target (l, &r, mode);
19409 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19410 if (HOST_BITS_PER_WIDE_INT >= 64)
19413 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
19414 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
19417 parts[0] = immed_double_const (l[0], l[1], DImode);
19419 if (upper_mode == SImode)
19420 parts[1] = gen_int_mode (l[2], SImode);
19421 else if (HOST_BITS_PER_WIDE_INT >= 64)
19424 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
19425 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
19428 parts[1] = immed_double_const (l[2], l[3], DImode);
19431 gcc_unreachable ();
19438 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19439 Return false when normal moves are needed; true when all required
19440 insns have been emitted. Operands 2-4 contain the input values
19441 int the correct order; operands 5-7 contain the output values. */
19444 ix86_split_long_move (rtx operands[])
19449 int collisions = 0;
19450 enum machine_mode mode = GET_MODE (operands[0]);
19451 bool collisionparts[4];
19453 /* The DFmode expanders may ask us to move double.
19454 For 64bit target this is single move. By hiding the fact
19455 here we simplify i386.md splitters. */
19456 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19458 /* Optimize constant pool reference to immediates. This is used by
19459 fp moves, that force all constants to memory to allow combining. */
19461 if (MEM_P (operands[1])
19462 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19463 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19464 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19465 if (push_operand (operands[0], VOIDmode))
19467 operands[0] = copy_rtx (operands[0]);
19468 PUT_MODE (operands[0], Pmode);
19471 operands[0] = gen_lowpart (DImode, operands[0]);
19472 operands[1] = gen_lowpart (DImode, operands[1]);
19473 emit_move_insn (operands[0], operands[1]);
19477 /* The only non-offsettable memory we handle is push. */
19478 if (push_operand (operands[0], VOIDmode))
19481 gcc_assert (!MEM_P (operands[0])
19482 || offsettable_memref_p (operands[0]));
19484 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19485 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19487 /* When emitting push, take care for source operands on the stack. */
19488 if (push && MEM_P (operands[1])
19489 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19491 rtx src_base = XEXP (part[1][nparts - 1], 0);
19493 /* Compensate for the stack decrement by 4. */
19494 if (!TARGET_64BIT && nparts == 3
19495 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19496 src_base = plus_constant (src_base, 4);
19498 /* src_base refers to the stack pointer and is
19499 automatically decreased by emitted push. */
19500 for (i = 0; i < nparts; i++)
19501 part[1][i] = change_address (part[1][i],
19502 GET_MODE (part[1][i]), src_base);
19505 /* We need to do copy in the right order in case an address register
19506 of the source overlaps the destination. */
19507 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19511 for (i = 0; i < nparts; i++)
19514 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19515 if (collisionparts[i])
19519 /* Collision in the middle part can be handled by reordering. */
19520 if (collisions == 1 && nparts == 3 && collisionparts [1])
19522 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19523 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19525 else if (collisions == 1
19527 && (collisionparts [1] || collisionparts [2]))
19529 if (collisionparts [1])
19531 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19532 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19536 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19537 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19541 /* If there are more collisions, we can't handle it by reordering.
19542 Do an lea to the last part and use only one colliding move. */
19543 else if (collisions > 1)
19549 base = part[0][nparts - 1];
19551 /* Handle the case when the last part isn't valid for lea.
19552 Happens in 64-bit mode storing the 12-byte XFmode. */
19553 if (GET_MODE (base) != Pmode)
19554 base = gen_rtx_REG (Pmode, REGNO (base));
19556 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19557 part[1][0] = replace_equiv_address (part[1][0], base);
19558 for (i = 1; i < nparts; i++)
19560 tmp = plus_constant (base, UNITS_PER_WORD * i);
19561 part[1][i] = replace_equiv_address (part[1][i], tmp);
19572 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19573 emit_insn (gen_addsi3 (stack_pointer_rtx,
19574 stack_pointer_rtx, GEN_INT (-4)));
19575 emit_move_insn (part[0][2], part[1][2]);
19577 else if (nparts == 4)
19579 emit_move_insn (part[0][3], part[1][3]);
19580 emit_move_insn (part[0][2], part[1][2]);
19585 /* In 64bit mode we don't have 32bit push available. In case this is
19586 register, it is OK - we will just use larger counterpart. We also
19587 retype memory - these comes from attempt to avoid REX prefix on
19588 moving of second half of TFmode value. */
19589 if (GET_MODE (part[1][1]) == SImode)
19591 switch (GET_CODE (part[1][1]))
19594 part[1][1] = adjust_address (part[1][1], DImode, 0);
19598 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19602 gcc_unreachable ();
19605 if (GET_MODE (part[1][0]) == SImode)
19606 part[1][0] = part[1][1];
19609 emit_move_insn (part[0][1], part[1][1]);
19610 emit_move_insn (part[0][0], part[1][0]);
19614 /* Choose correct order to not overwrite the source before it is copied. */
19615 if ((REG_P (part[0][0])
19616 && REG_P (part[1][1])
19617 && (REGNO (part[0][0]) == REGNO (part[1][1])
19619 && REGNO (part[0][0]) == REGNO (part[1][2]))
19621 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19623 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19625 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19627 operands[2 + i] = part[0][j];
19628 operands[6 + i] = part[1][j];
19633 for (i = 0; i < nparts; i++)
19635 operands[2 + i] = part[0][i];
19636 operands[6 + i] = part[1][i];
19640 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19641 if (optimize_insn_for_size_p ())
19643 for (j = 0; j < nparts - 1; j++)
19644 if (CONST_INT_P (operands[6 + j])
19645 && operands[6 + j] != const0_rtx
19646 && REG_P (operands[2 + j]))
19647 for (i = j; i < nparts - 1; i++)
19648 if (CONST_INT_P (operands[7 + i])
19649 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19650 operands[7 + i] = operands[2 + j];
19653 for (i = 0; i < nparts; i++)
19654 emit_move_insn (operands[2 + i], operands[6 + i]);
19659 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19660 left shift by a constant, either using a single shift or
19661 a sequence of add instructions. */
19664 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19666 rtx (*insn)(rtx, rtx, rtx);
19669 || (count * ix86_cost->add <= ix86_cost->shift_const
19670 && !optimize_insn_for_size_p ()))
19672 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19673 while (count-- > 0)
19674 emit_insn (insn (operand, operand, operand));
19678 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19679 emit_insn (insn (operand, operand, GEN_INT (count)));
19684 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19686 rtx (*gen_ashl3)(rtx, rtx, rtx);
19687 rtx (*gen_shld)(rtx, rtx, rtx);
19688 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19690 rtx low[2], high[2];
19693 if (CONST_INT_P (operands[2]))
19695 split_double_mode (mode, operands, 2, low, high);
19696 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19698 if (count >= half_width)
19700 emit_move_insn (high[0], low[1]);
19701 emit_move_insn (low[0], const0_rtx);
19703 if (count > half_width)
19704 ix86_expand_ashl_const (high[0], count - half_width, mode);
19708 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19710 if (!rtx_equal_p (operands[0], operands[1]))
19711 emit_move_insn (operands[0], operands[1]);
19713 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19714 ix86_expand_ashl_const (low[0], count, mode);
19719 split_double_mode (mode, operands, 1, low, high);
19721 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19723 if (operands[1] == const1_rtx)
19725 /* Assuming we've chosen a QImode capable registers, then 1 << N
19726 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19727 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19729 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19731 ix86_expand_clear (low[0]);
19732 ix86_expand_clear (high[0]);
19733 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19735 d = gen_lowpart (QImode, low[0]);
19736 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19737 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19738 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19740 d = gen_lowpart (QImode, high[0]);
19741 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19742 s = gen_rtx_NE (QImode, flags, const0_rtx);
19743 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19746 /* Otherwise, we can get the same results by manually performing
19747 a bit extract operation on bit 5/6, and then performing the two
19748 shifts. The two methods of getting 0/1 into low/high are exactly
19749 the same size. Avoiding the shift in the bit extract case helps
19750 pentium4 a bit; no one else seems to care much either way. */
19753 enum machine_mode half_mode;
19754 rtx (*gen_lshr3)(rtx, rtx, rtx);
19755 rtx (*gen_and3)(rtx, rtx, rtx);
19756 rtx (*gen_xor3)(rtx, rtx, rtx);
19757 HOST_WIDE_INT bits;
19760 if (mode == DImode)
19762 half_mode = SImode;
19763 gen_lshr3 = gen_lshrsi3;
19764 gen_and3 = gen_andsi3;
19765 gen_xor3 = gen_xorsi3;
19770 half_mode = DImode;
19771 gen_lshr3 = gen_lshrdi3;
19772 gen_and3 = gen_anddi3;
19773 gen_xor3 = gen_xordi3;
19777 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19778 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19780 x = gen_lowpart (half_mode, operands[2]);
19781 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19783 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19784 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19785 emit_move_insn (low[0], high[0]);
19786 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19789 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19790 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19794 if (operands[1] == constm1_rtx)
19796 /* For -1 << N, we can avoid the shld instruction, because we
19797 know that we're shifting 0...31/63 ones into a -1. */
19798 emit_move_insn (low[0], constm1_rtx);
19799 if (optimize_insn_for_size_p ())
19800 emit_move_insn (high[0], low[0]);
19802 emit_move_insn (high[0], constm1_rtx);
19806 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19808 if (!rtx_equal_p (operands[0], operands[1]))
19809 emit_move_insn (operands[0], operands[1]);
19811 split_double_mode (mode, operands, 1, low, high);
19812 emit_insn (gen_shld (high[0], low[0], operands[2]));
19815 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19817 if (TARGET_CMOVE && scratch)
19819 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19820 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19822 ix86_expand_clear (scratch);
19823 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19827 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19828 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19830 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19835 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19837 rtx (*gen_ashr3)(rtx, rtx, rtx)
19838 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19839 rtx (*gen_shrd)(rtx, rtx, rtx);
19840 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19842 rtx low[2], high[2];
19845 if (CONST_INT_P (operands[2]))
19847 split_double_mode (mode, operands, 2, low, high);
19848 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19850 if (count == GET_MODE_BITSIZE (mode) - 1)
19852 emit_move_insn (high[0], high[1]);
19853 emit_insn (gen_ashr3 (high[0], high[0],
19854 GEN_INT (half_width - 1)));
19855 emit_move_insn (low[0], high[0]);
19858 else if (count >= half_width)
19860 emit_move_insn (low[0], high[1]);
19861 emit_move_insn (high[0], low[0]);
19862 emit_insn (gen_ashr3 (high[0], high[0],
19863 GEN_INT (half_width - 1)));
19865 if (count > half_width)
19866 emit_insn (gen_ashr3 (low[0], low[0],
19867 GEN_INT (count - half_width)));
19871 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19873 if (!rtx_equal_p (operands[0], operands[1]))
19874 emit_move_insn (operands[0], operands[1]);
19876 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19877 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19882 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19884 if (!rtx_equal_p (operands[0], operands[1]))
19885 emit_move_insn (operands[0], operands[1]);
19887 split_double_mode (mode, operands, 1, low, high);
19889 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19890 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19892 if (TARGET_CMOVE && scratch)
19894 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19895 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19897 emit_move_insn (scratch, high[0]);
19898 emit_insn (gen_ashr3 (scratch, scratch,
19899 GEN_INT (half_width - 1)));
19900 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19905 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19906 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19908 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19914 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19916 rtx (*gen_lshr3)(rtx, rtx, rtx)
19917 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19918 rtx (*gen_shrd)(rtx, rtx, rtx);
19919 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19921 rtx low[2], high[2];
19924 if (CONST_INT_P (operands[2]))
19926 split_double_mode (mode, operands, 2, low, high);
19927 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19929 if (count >= half_width)
19931 emit_move_insn (low[0], high[1]);
19932 ix86_expand_clear (high[0]);
19934 if (count > half_width)
19935 emit_insn (gen_lshr3 (low[0], low[0],
19936 GEN_INT (count - half_width)));
19940 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19942 if (!rtx_equal_p (operands[0], operands[1]))
19943 emit_move_insn (operands[0], operands[1]);
19945 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19946 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19951 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19953 if (!rtx_equal_p (operands[0], operands[1]))
19954 emit_move_insn (operands[0], operands[1]);
19956 split_double_mode (mode, operands, 1, low, high);
19958 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19959 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19961 if (TARGET_CMOVE && scratch)
19963 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19964 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19966 ix86_expand_clear (scratch);
19967 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19972 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19973 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19975 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19980 /* Predict just emitted jump instruction to be taken with probability PROB. */
19982 predict_jump (int prob)
19984 rtx insn = get_last_insn ();
19985 gcc_assert (JUMP_P (insn));
19986 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19989 /* Helper function for the string operations below. Dest VARIABLE whether
19990 it is aligned to VALUE bytes. If true, jump to the label. */
19992 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19994 rtx label = gen_label_rtx ();
19995 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19996 if (GET_MODE (variable) == DImode)
19997 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19999 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
20000 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
20003 predict_jump (REG_BR_PROB_BASE * 50 / 100);
20005 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20009 /* Adjust COUNTER by the VALUE. */
20011 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
20013 rtx (*gen_add)(rtx, rtx, rtx)
20014 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
20016 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
20019 /* Zero extend possibly SImode EXP to Pmode register. */
20021 ix86_zero_extend_to_Pmode (rtx exp)
20024 if (GET_MODE (exp) == VOIDmode)
20025 return force_reg (Pmode, exp);
20026 if (GET_MODE (exp) == Pmode)
20027 return copy_to_mode_reg (Pmode, exp);
20028 r = gen_reg_rtx (Pmode);
20029 emit_insn (gen_zero_extendsidi2 (r, exp));
20033 /* Divide COUNTREG by SCALE. */
20035 scale_counter (rtx countreg, int scale)
20041 if (CONST_INT_P (countreg))
20042 return GEN_INT (INTVAL (countreg) / scale);
20043 gcc_assert (REG_P (countreg));
20045 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
20046 GEN_INT (exact_log2 (scale)),
20047 NULL, 1, OPTAB_DIRECT);
20051 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
20052 DImode for constant loop counts. */
20054 static enum machine_mode
20055 counter_mode (rtx count_exp)
20057 if (GET_MODE (count_exp) != VOIDmode)
20058 return GET_MODE (count_exp);
20059 if (!CONST_INT_P (count_exp))
20061 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
20066 /* When SRCPTR is non-NULL, output simple loop to move memory
20067 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
20068 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
20069 equivalent loop to set memory by VALUE (supposed to be in MODE).
20071 The size is rounded down to whole number of chunk size moved at once.
20072 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
20076 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
20077 rtx destptr, rtx srcptr, rtx value,
20078 rtx count, enum machine_mode mode, int unroll,
20081 rtx out_label, top_label, iter, tmp;
20082 enum machine_mode iter_mode = counter_mode (count);
20083 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
20084 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
20090 top_label = gen_label_rtx ();
20091 out_label = gen_label_rtx ();
20092 iter = gen_reg_rtx (iter_mode);
20094 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
20095 NULL, 1, OPTAB_DIRECT);
20096 /* Those two should combine. */
20097 if (piece_size == const1_rtx)
20099 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
20101 predict_jump (REG_BR_PROB_BASE * 10 / 100);
20103 emit_move_insn (iter, const0_rtx);
20105 emit_label (top_label);
20107 tmp = convert_modes (Pmode, iter_mode, iter, true);
20108 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
20109 destmem = change_address (destmem, mode, x_addr);
20113 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
20114 srcmem = change_address (srcmem, mode, y_addr);
20116 /* When unrolling for chips that reorder memory reads and writes,
20117 we can save registers by using single temporary.
20118 Also using 4 temporaries is overkill in 32bit mode. */
20119 if (!TARGET_64BIT && 0)
20121 for (i = 0; i < unroll; i++)
20126 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20128 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20130 emit_move_insn (destmem, srcmem);
20136 gcc_assert (unroll <= 4);
20137 for (i = 0; i < unroll; i++)
20139 tmpreg[i] = gen_reg_rtx (mode);
20143 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20145 emit_move_insn (tmpreg[i], srcmem);
20147 for (i = 0; i < unroll; i++)
20152 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20154 emit_move_insn (destmem, tmpreg[i]);
20159 for (i = 0; i < unroll; i++)
20163 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20164 emit_move_insn (destmem, value);
20167 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
20168 true, OPTAB_LIB_WIDEN);
20170 emit_move_insn (iter, tmp);
20172 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
20174 if (expected_size != -1)
20176 expected_size /= GET_MODE_SIZE (mode) * unroll;
20177 if (expected_size == 0)
20179 else if (expected_size > REG_BR_PROB_BASE)
20180 predict_jump (REG_BR_PROB_BASE - 1);
20182 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
20185 predict_jump (REG_BR_PROB_BASE * 80 / 100);
20186 iter = ix86_zero_extend_to_Pmode (iter);
20187 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
20188 true, OPTAB_LIB_WIDEN);
20189 if (tmp != destptr)
20190 emit_move_insn (destptr, tmp);
20193 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
20194 true, OPTAB_LIB_WIDEN);
20196 emit_move_insn (srcptr, tmp);
20198 emit_label (out_label);
20201 /* Output "rep; mov" instruction.
20202 Arguments have same meaning as for previous function */
20204 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
20205 rtx destptr, rtx srcptr,
20207 enum machine_mode mode)
20213 /* If the size is known, it is shorter to use rep movs. */
20214 if (mode == QImode && CONST_INT_P (count)
20215 && !(INTVAL (count) & 3))
20218 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20219 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20220 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
20221 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
20222 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20223 if (mode != QImode)
20225 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20226 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20227 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20228 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
20229 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20230 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
20234 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20235 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
20237 if (CONST_INT_P (count))
20239 count = GEN_INT (INTVAL (count)
20240 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20241 destmem = shallow_copy_rtx (destmem);
20242 srcmem = shallow_copy_rtx (srcmem);
20243 set_mem_size (destmem, count);
20244 set_mem_size (srcmem, count);
20248 if (MEM_SIZE (destmem))
20249 set_mem_size (destmem, NULL_RTX);
20250 if (MEM_SIZE (srcmem))
20251 set_mem_size (srcmem, NULL_RTX);
20253 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
20257 /* Output "rep; stos" instruction.
20258 Arguments have same meaning as for previous function */
20260 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
20261 rtx count, enum machine_mode mode,
20267 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20268 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20269 value = force_reg (mode, gen_lowpart (mode, value));
20270 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20271 if (mode != QImode)
20273 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20274 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20275 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20278 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20279 if (orig_value == const0_rtx && CONST_INT_P (count))
20281 count = GEN_INT (INTVAL (count)
20282 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20283 destmem = shallow_copy_rtx (destmem);
20284 set_mem_size (destmem, count);
20286 else if (MEM_SIZE (destmem))
20287 set_mem_size (destmem, NULL_RTX);
20288 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
20292 emit_strmov (rtx destmem, rtx srcmem,
20293 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
20295 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
20296 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
20297 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20300 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
20302 expand_movmem_epilogue (rtx destmem, rtx srcmem,
20303 rtx destptr, rtx srcptr, rtx count, int max_size)
20306 if (CONST_INT_P (count))
20308 HOST_WIDE_INT countval = INTVAL (count);
20311 if ((countval & 0x10) && max_size > 16)
20315 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20316 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
20319 gcc_unreachable ();
20322 if ((countval & 0x08) && max_size > 8)
20325 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20328 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20329 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
20333 if ((countval & 0x04) && max_size > 4)
20335 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20338 if ((countval & 0x02) && max_size > 2)
20340 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
20343 if ((countval & 0x01) && max_size > 1)
20345 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
20352 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
20353 count, 1, OPTAB_DIRECT);
20354 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
20355 count, QImode, 1, 4);
20359 /* When there are stringops, we can cheaply increase dest and src pointers.
20360 Otherwise we save code size by maintaining offset (zero is readily
20361 available from preceding rep operation) and using x86 addressing modes.
20363 if (TARGET_SINGLE_STRINGOP)
20367 rtx label = ix86_expand_aligntest (count, 4, true);
20368 src = change_address (srcmem, SImode, srcptr);
20369 dest = change_address (destmem, SImode, destptr);
20370 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20371 emit_label (label);
20372 LABEL_NUSES (label) = 1;
20376 rtx label = ix86_expand_aligntest (count, 2, true);
20377 src = change_address (srcmem, HImode, srcptr);
20378 dest = change_address (destmem, HImode, destptr);
20379 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20380 emit_label (label);
20381 LABEL_NUSES (label) = 1;
20385 rtx label = ix86_expand_aligntest (count, 1, true);
20386 src = change_address (srcmem, QImode, srcptr);
20387 dest = change_address (destmem, QImode, destptr);
20388 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20389 emit_label (label);
20390 LABEL_NUSES (label) = 1;
20395 rtx offset = force_reg (Pmode, const0_rtx);
20400 rtx label = ix86_expand_aligntest (count, 4, true);
20401 src = change_address (srcmem, SImode, srcptr);
20402 dest = change_address (destmem, SImode, destptr);
20403 emit_move_insn (dest, src);
20404 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
20405 true, OPTAB_LIB_WIDEN);
20407 emit_move_insn (offset, tmp);
20408 emit_label (label);
20409 LABEL_NUSES (label) = 1;
20413 rtx label = ix86_expand_aligntest (count, 2, true);
20414 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20415 src = change_address (srcmem, HImode, tmp);
20416 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20417 dest = change_address (destmem, HImode, tmp);
20418 emit_move_insn (dest, src);
20419 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
20420 true, OPTAB_LIB_WIDEN);
20422 emit_move_insn (offset, tmp);
20423 emit_label (label);
20424 LABEL_NUSES (label) = 1;
20428 rtx label = ix86_expand_aligntest (count, 1, true);
20429 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20430 src = change_address (srcmem, QImode, tmp);
20431 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20432 dest = change_address (destmem, QImode, tmp);
20433 emit_move_insn (dest, src);
20434 emit_label (label);
20435 LABEL_NUSES (label) = 1;
20440 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20442 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
20443 rtx count, int max_size)
20446 expand_simple_binop (counter_mode (count), AND, count,
20447 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
20448 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
20449 gen_lowpart (QImode, value), count, QImode,
20453 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20455 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20459 if (CONST_INT_P (count))
20461 HOST_WIDE_INT countval = INTVAL (count);
20464 if ((countval & 0x10) && max_size > 16)
20468 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20469 emit_insn (gen_strset (destptr, dest, value));
20470 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20471 emit_insn (gen_strset (destptr, dest, value));
20474 gcc_unreachable ();
20477 if ((countval & 0x08) && max_size > 8)
20481 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20482 emit_insn (gen_strset (destptr, dest, value));
20486 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20487 emit_insn (gen_strset (destptr, dest, value));
20488 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20489 emit_insn (gen_strset (destptr, dest, value));
20493 if ((countval & 0x04) && max_size > 4)
20495 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20496 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20499 if ((countval & 0x02) && max_size > 2)
20501 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20502 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20505 if ((countval & 0x01) && max_size > 1)
20507 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20508 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20515 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20520 rtx label = ix86_expand_aligntest (count, 16, true);
20523 dest = change_address (destmem, DImode, destptr);
20524 emit_insn (gen_strset (destptr, dest, value));
20525 emit_insn (gen_strset (destptr, dest, value));
20529 dest = change_address (destmem, SImode, destptr);
20530 emit_insn (gen_strset (destptr, dest, value));
20531 emit_insn (gen_strset (destptr, dest, value));
20532 emit_insn (gen_strset (destptr, dest, value));
20533 emit_insn (gen_strset (destptr, dest, value));
20535 emit_label (label);
20536 LABEL_NUSES (label) = 1;
20540 rtx label = ix86_expand_aligntest (count, 8, true);
20543 dest = change_address (destmem, DImode, destptr);
20544 emit_insn (gen_strset (destptr, dest, value));
20548 dest = change_address (destmem, SImode, destptr);
20549 emit_insn (gen_strset (destptr, dest, value));
20550 emit_insn (gen_strset (destptr, dest, value));
20552 emit_label (label);
20553 LABEL_NUSES (label) = 1;
20557 rtx label = ix86_expand_aligntest (count, 4, true);
20558 dest = change_address (destmem, SImode, destptr);
20559 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20560 emit_label (label);
20561 LABEL_NUSES (label) = 1;
20565 rtx label = ix86_expand_aligntest (count, 2, true);
20566 dest = change_address (destmem, HImode, destptr);
20567 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20568 emit_label (label);
20569 LABEL_NUSES (label) = 1;
20573 rtx label = ix86_expand_aligntest (count, 1, true);
20574 dest = change_address (destmem, QImode, destptr);
20575 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20576 emit_label (label);
20577 LABEL_NUSES (label) = 1;
20581 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20582 DESIRED_ALIGNMENT. */
20584 expand_movmem_prologue (rtx destmem, rtx srcmem,
20585 rtx destptr, rtx srcptr, rtx count,
20586 int align, int desired_alignment)
20588 if (align <= 1 && desired_alignment > 1)
20590 rtx label = ix86_expand_aligntest (destptr, 1, false);
20591 srcmem = change_address (srcmem, QImode, srcptr);
20592 destmem = change_address (destmem, QImode, destptr);
20593 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20594 ix86_adjust_counter (count, 1);
20595 emit_label (label);
20596 LABEL_NUSES (label) = 1;
20598 if (align <= 2 && desired_alignment > 2)
20600 rtx label = ix86_expand_aligntest (destptr, 2, false);
20601 srcmem = change_address (srcmem, HImode, srcptr);
20602 destmem = change_address (destmem, HImode, destptr);
20603 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20604 ix86_adjust_counter (count, 2);
20605 emit_label (label);
20606 LABEL_NUSES (label) = 1;
20608 if (align <= 4 && desired_alignment > 4)
20610 rtx label = ix86_expand_aligntest (destptr, 4, false);
20611 srcmem = change_address (srcmem, SImode, srcptr);
20612 destmem = change_address (destmem, SImode, destptr);
20613 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20614 ix86_adjust_counter (count, 4);
20615 emit_label (label);
20616 LABEL_NUSES (label) = 1;
20618 gcc_assert (desired_alignment <= 8);
20621 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20622 ALIGN_BYTES is how many bytes need to be copied. */
20624 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20625 int desired_align, int align_bytes)
20628 rtx src_size, dst_size;
20630 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20631 if (src_align_bytes >= 0)
20632 src_align_bytes = desired_align - src_align_bytes;
20633 src_size = MEM_SIZE (src);
20634 dst_size = MEM_SIZE (dst);
20635 if (align_bytes & 1)
20637 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20638 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20640 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20642 if (align_bytes & 2)
20644 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20645 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20646 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20647 set_mem_align (dst, 2 * BITS_PER_UNIT);
20648 if (src_align_bytes >= 0
20649 && (src_align_bytes & 1) == (align_bytes & 1)
20650 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20651 set_mem_align (src, 2 * BITS_PER_UNIT);
20653 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20655 if (align_bytes & 4)
20657 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20658 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20659 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20660 set_mem_align (dst, 4 * BITS_PER_UNIT);
20661 if (src_align_bytes >= 0)
20663 unsigned int src_align = 0;
20664 if ((src_align_bytes & 3) == (align_bytes & 3))
20666 else if ((src_align_bytes & 1) == (align_bytes & 1))
20668 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20669 set_mem_align (src, src_align * BITS_PER_UNIT);
20672 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20674 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20675 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20676 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20677 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20678 if (src_align_bytes >= 0)
20680 unsigned int src_align = 0;
20681 if ((src_align_bytes & 7) == (align_bytes & 7))
20683 else if ((src_align_bytes & 3) == (align_bytes & 3))
20685 else if ((src_align_bytes & 1) == (align_bytes & 1))
20687 if (src_align > (unsigned int) desired_align)
20688 src_align = desired_align;
20689 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20690 set_mem_align (src, src_align * BITS_PER_UNIT);
20693 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20695 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
20700 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20701 DESIRED_ALIGNMENT. */
20703 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20704 int align, int desired_alignment)
20706 if (align <= 1 && desired_alignment > 1)
20708 rtx label = ix86_expand_aligntest (destptr, 1, false);
20709 destmem = change_address (destmem, QImode, destptr);
20710 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20711 ix86_adjust_counter (count, 1);
20712 emit_label (label);
20713 LABEL_NUSES (label) = 1;
20715 if (align <= 2 && desired_alignment > 2)
20717 rtx label = ix86_expand_aligntest (destptr, 2, false);
20718 destmem = change_address (destmem, HImode, destptr);
20719 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20720 ix86_adjust_counter (count, 2);
20721 emit_label (label);
20722 LABEL_NUSES (label) = 1;
20724 if (align <= 4 && desired_alignment > 4)
20726 rtx label = ix86_expand_aligntest (destptr, 4, false);
20727 destmem = change_address (destmem, SImode, destptr);
20728 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20729 ix86_adjust_counter (count, 4);
20730 emit_label (label);
20731 LABEL_NUSES (label) = 1;
20733 gcc_assert (desired_alignment <= 8);
20736 /* Set enough from DST to align DST known to by aligned by ALIGN to
20737 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20739 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20740 int desired_align, int align_bytes)
20743 rtx dst_size = MEM_SIZE (dst);
20744 if (align_bytes & 1)
20746 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20748 emit_insn (gen_strset (destreg, dst,
20749 gen_lowpart (QImode, value)));
20751 if (align_bytes & 2)
20753 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20754 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20755 set_mem_align (dst, 2 * BITS_PER_UNIT);
20757 emit_insn (gen_strset (destreg, dst,
20758 gen_lowpart (HImode, value)));
20760 if (align_bytes & 4)
20762 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20763 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20764 set_mem_align (dst, 4 * BITS_PER_UNIT);
20766 emit_insn (gen_strset (destreg, dst,
20767 gen_lowpart (SImode, value)));
20769 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20770 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20771 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20773 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20777 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20778 static enum stringop_alg
20779 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20780 int *dynamic_check)
20782 const struct stringop_algs * algs;
20783 bool optimize_for_speed;
20784 /* Algorithms using the rep prefix want at least edi and ecx;
20785 additionally, memset wants eax and memcpy wants esi. Don't
20786 consider such algorithms if the user has appropriated those
20787 registers for their own purposes. */
20788 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20790 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20792 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20793 || (alg != rep_prefix_1_byte \
20794 && alg != rep_prefix_4_byte \
20795 && alg != rep_prefix_8_byte))
20796 const struct processor_costs *cost;
20798 /* Even if the string operation call is cold, we still might spend a lot
20799 of time processing large blocks. */
20800 if (optimize_function_for_size_p (cfun)
20801 || (optimize_insn_for_size_p ()
20802 && expected_size != -1 && expected_size < 256))
20803 optimize_for_speed = false;
20805 optimize_for_speed = true;
20807 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20809 *dynamic_check = -1;
20811 algs = &cost->memset[TARGET_64BIT != 0];
20813 algs = &cost->memcpy[TARGET_64BIT != 0];
20814 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
20815 return ix86_stringop_alg;
20816 /* rep; movq or rep; movl is the smallest variant. */
20817 else if (!optimize_for_speed)
20819 if (!count || (count & 3))
20820 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20822 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20824 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20826 else if (expected_size != -1 && expected_size < 4)
20827 return loop_1_byte;
20828 else if (expected_size != -1)
20831 enum stringop_alg alg = libcall;
20832 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20834 /* We get here if the algorithms that were not libcall-based
20835 were rep-prefix based and we are unable to use rep prefixes
20836 based on global register usage. Break out of the loop and
20837 use the heuristic below. */
20838 if (algs->size[i].max == 0)
20840 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20842 enum stringop_alg candidate = algs->size[i].alg;
20844 if (candidate != libcall && ALG_USABLE_P (candidate))
20846 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20847 last non-libcall inline algorithm. */
20848 if (TARGET_INLINE_ALL_STRINGOPS)
20850 /* When the current size is best to be copied by a libcall,
20851 but we are still forced to inline, run the heuristic below
20852 that will pick code for medium sized blocks. */
20853 if (alg != libcall)
20857 else if (ALG_USABLE_P (candidate))
20861 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20863 /* When asked to inline the call anyway, try to pick meaningful choice.
20864 We look for maximal size of block that is faster to copy by hand and
20865 take blocks of at most of that size guessing that average size will
20866 be roughly half of the block.
20868 If this turns out to be bad, we might simply specify the preferred
20869 choice in ix86_costs. */
20870 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20871 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20874 enum stringop_alg alg;
20876 bool any_alg_usable_p = true;
20878 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20880 enum stringop_alg candidate = algs->size[i].alg;
20881 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20883 if (candidate != libcall && candidate
20884 && ALG_USABLE_P (candidate))
20885 max = algs->size[i].max;
20887 /* If there aren't any usable algorithms, then recursing on
20888 smaller sizes isn't going to find anything. Just return the
20889 simple byte-at-a-time copy loop. */
20890 if (!any_alg_usable_p)
20892 /* Pick something reasonable. */
20893 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20894 *dynamic_check = 128;
20895 return loop_1_byte;
20899 alg = decide_alg (count, max / 2, memset, dynamic_check);
20900 gcc_assert (*dynamic_check == -1);
20901 gcc_assert (alg != libcall);
20902 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20903 *dynamic_check = max;
20906 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20907 #undef ALG_USABLE_P
20910 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20911 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20913 decide_alignment (int align,
20914 enum stringop_alg alg,
20917 int desired_align = 0;
20921 gcc_unreachable ();
20923 case unrolled_loop:
20924 desired_align = GET_MODE_SIZE (Pmode);
20926 case rep_prefix_8_byte:
20929 case rep_prefix_4_byte:
20930 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20931 copying whole cacheline at once. */
20932 if (TARGET_PENTIUMPRO)
20937 case rep_prefix_1_byte:
20938 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20939 copying whole cacheline at once. */
20940 if (TARGET_PENTIUMPRO)
20954 if (desired_align < align)
20955 desired_align = align;
20956 if (expected_size != -1 && expected_size < 4)
20957 desired_align = align;
20958 return desired_align;
20961 /* Return the smallest power of 2 greater than VAL. */
20963 smallest_pow2_greater_than (int val)
20971 /* Expand string move (memcpy) operation. Use i386 string operations
20972 when profitable. expand_setmem contains similar code. The code
20973 depends upon architecture, block size and alignment, but always has
20974 the same overall structure:
20976 1) Prologue guard: Conditional that jumps up to epilogues for small
20977 blocks that can be handled by epilogue alone. This is faster
20978 but also needed for correctness, since prologue assume the block
20979 is larger than the desired alignment.
20981 Optional dynamic check for size and libcall for large
20982 blocks is emitted here too, with -minline-stringops-dynamically.
20984 2) Prologue: copy first few bytes in order to get destination
20985 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
20986 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
20987 copied. We emit either a jump tree on power of two sized
20988 blocks, or a byte loop.
20990 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20991 with specified algorithm.
20993 4) Epilogue: code copying tail of the block that is too small to be
20994 handled by main body (or up to size guarded by prologue guard). */
20997 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20998 rtx expected_align_exp, rtx expected_size_exp)
21004 rtx jump_around_label = NULL;
21005 HOST_WIDE_INT align = 1;
21006 unsigned HOST_WIDE_INT count = 0;
21007 HOST_WIDE_INT expected_size = -1;
21008 int size_needed = 0, epilogue_size_needed;
21009 int desired_align = 0, align_bytes = 0;
21010 enum stringop_alg alg;
21012 bool need_zero_guard = false;
21014 if (CONST_INT_P (align_exp))
21015 align = INTVAL (align_exp);
21016 /* i386 can do misaligned access on reasonably increased cost. */
21017 if (CONST_INT_P (expected_align_exp)
21018 && INTVAL (expected_align_exp) > align)
21019 align = INTVAL (expected_align_exp);
21020 /* ALIGN is the minimum of destination and source alignment, but we care here
21021 just about destination alignment. */
21022 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
21023 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
21025 if (CONST_INT_P (count_exp))
21026 count = expected_size = INTVAL (count_exp);
21027 if (CONST_INT_P (expected_size_exp) && count == 0)
21028 expected_size = INTVAL (expected_size_exp);
21030 /* Make sure we don't need to care about overflow later on. */
21031 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21034 /* Step 0: Decide on preferred algorithm, desired alignment and
21035 size of chunks to be copied by main loop. */
21037 alg = decide_alg (count, expected_size, false, &dynamic_check);
21038 desired_align = decide_alignment (align, alg, expected_size);
21040 if (!TARGET_ALIGN_STRINGOPS)
21041 align = desired_align;
21043 if (alg == libcall)
21045 gcc_assert (alg != no_stringop);
21047 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
21048 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21049 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
21054 gcc_unreachable ();
21056 need_zero_guard = true;
21057 size_needed = GET_MODE_SIZE (Pmode);
21059 case unrolled_loop:
21060 need_zero_guard = true;
21061 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
21063 case rep_prefix_8_byte:
21066 case rep_prefix_4_byte:
21069 case rep_prefix_1_byte:
21073 need_zero_guard = true;
21078 epilogue_size_needed = size_needed;
21080 /* Step 1: Prologue guard. */
21082 /* Alignment code needs count to be in register. */
21083 if (CONST_INT_P (count_exp) && desired_align > align)
21085 if (INTVAL (count_exp) > desired_align
21086 && INTVAL (count_exp) > size_needed)
21089 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21090 if (align_bytes <= 0)
21093 align_bytes = desired_align - align_bytes;
21095 if (align_bytes == 0)
21096 count_exp = force_reg (counter_mode (count_exp), count_exp);
21098 gcc_assert (desired_align >= 1 && align >= 1);
21100 /* Ensure that alignment prologue won't copy past end of block. */
21101 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21103 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21104 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
21105 Make sure it is power of 2. */
21106 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21110 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21112 /* If main algorithm works on QImode, no epilogue is needed.
21113 For small sizes just don't align anything. */
21114 if (size_needed == 1)
21115 desired_align = align;
21122 label = gen_label_rtx ();
21123 emit_cmp_and_jump_insns (count_exp,
21124 GEN_INT (epilogue_size_needed),
21125 LTU, 0, counter_mode (count_exp), 1, label);
21126 if (expected_size == -1 || expected_size < epilogue_size_needed)
21127 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21129 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21133 /* Emit code to decide on runtime whether library call or inline should be
21135 if (dynamic_check != -1)
21137 if (CONST_INT_P (count_exp))
21139 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
21141 emit_block_move_via_libcall (dst, src, count_exp, false);
21142 count_exp = const0_rtx;
21148 rtx hot_label = gen_label_rtx ();
21149 jump_around_label = gen_label_rtx ();
21150 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21151 LEU, 0, GET_MODE (count_exp), 1, hot_label);
21152 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21153 emit_block_move_via_libcall (dst, src, count_exp, false);
21154 emit_jump (jump_around_label);
21155 emit_label (hot_label);
21159 /* Step 2: Alignment prologue. */
21161 if (desired_align > align)
21163 if (align_bytes == 0)
21165 /* Except for the first move in epilogue, we no longer know
21166 constant offset in aliasing info. It don't seems to worth
21167 the pain to maintain it for the first move, so throw away
21169 src = change_address (src, BLKmode, srcreg);
21170 dst = change_address (dst, BLKmode, destreg);
21171 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
21176 /* If we know how many bytes need to be stored before dst is
21177 sufficiently aligned, maintain aliasing info accurately. */
21178 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
21179 desired_align, align_bytes);
21180 count_exp = plus_constant (count_exp, -align_bytes);
21181 count -= align_bytes;
21183 if (need_zero_guard
21184 && (count < (unsigned HOST_WIDE_INT) size_needed
21185 || (align_bytes == 0
21186 && count < ((unsigned HOST_WIDE_INT) size_needed
21187 + desired_align - align))))
21189 /* It is possible that we copied enough so the main loop will not
21191 gcc_assert (size_needed > 1);
21192 if (label == NULL_RTX)
21193 label = gen_label_rtx ();
21194 emit_cmp_and_jump_insns (count_exp,
21195 GEN_INT (size_needed),
21196 LTU, 0, counter_mode (count_exp), 1, label);
21197 if (expected_size == -1
21198 || expected_size < (desired_align - align) / 2 + size_needed)
21199 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21201 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21204 if (label && size_needed == 1)
21206 emit_label (label);
21207 LABEL_NUSES (label) = 1;
21209 epilogue_size_needed = 1;
21211 else if (label == NULL_RTX)
21212 epilogue_size_needed = size_needed;
21214 /* Step 3: Main loop. */
21220 gcc_unreachable ();
21222 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21223 count_exp, QImode, 1, expected_size);
21226 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21227 count_exp, Pmode, 1, expected_size);
21229 case unrolled_loop:
21230 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
21231 registers for 4 temporaries anyway. */
21232 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21233 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
21236 case rep_prefix_8_byte:
21237 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21240 case rep_prefix_4_byte:
21241 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21244 case rep_prefix_1_byte:
21245 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21249 /* Adjust properly the offset of src and dest memory for aliasing. */
21250 if (CONST_INT_P (count_exp))
21252 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
21253 (count / size_needed) * size_needed);
21254 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21255 (count / size_needed) * size_needed);
21259 src = change_address (src, BLKmode, srcreg);
21260 dst = change_address (dst, BLKmode, destreg);
21263 /* Step 4: Epilogue to copy the remaining bytes. */
21267 /* When the main loop is done, COUNT_EXP might hold original count,
21268 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21269 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21270 bytes. Compensate if needed. */
21272 if (size_needed < epilogue_size_needed)
21275 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21276 GEN_INT (size_needed - 1), count_exp, 1,
21278 if (tmp != count_exp)
21279 emit_move_insn (count_exp, tmp);
21281 emit_label (label);
21282 LABEL_NUSES (label) = 1;
21285 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21286 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
21287 epilogue_size_needed);
21288 if (jump_around_label)
21289 emit_label (jump_around_label);
21293 /* Helper function for memcpy. For QImode value 0xXY produce
21294 0xXYXYXYXY of wide specified by MODE. This is essentially
21295 a * 0x10101010, but we can do slightly better than
21296 synth_mult by unwinding the sequence by hand on CPUs with
21299 promote_duplicated_reg (enum machine_mode mode, rtx val)
21301 enum machine_mode valmode = GET_MODE (val);
21303 int nops = mode == DImode ? 3 : 2;
21305 gcc_assert (mode == SImode || mode == DImode);
21306 if (val == const0_rtx)
21307 return copy_to_mode_reg (mode, const0_rtx);
21308 if (CONST_INT_P (val))
21310 HOST_WIDE_INT v = INTVAL (val) & 255;
21314 if (mode == DImode)
21315 v |= (v << 16) << 16;
21316 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
21319 if (valmode == VOIDmode)
21321 if (valmode != QImode)
21322 val = gen_lowpart (QImode, val);
21323 if (mode == QImode)
21325 if (!TARGET_PARTIAL_REG_STALL)
21327 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
21328 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
21329 <= (ix86_cost->shift_const + ix86_cost->add) * nops
21330 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
21332 rtx reg = convert_modes (mode, QImode, val, true);
21333 tmp = promote_duplicated_reg (mode, const1_rtx);
21334 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
21339 rtx reg = convert_modes (mode, QImode, val, true);
21341 if (!TARGET_PARTIAL_REG_STALL)
21342 if (mode == SImode)
21343 emit_insn (gen_movsi_insv_1 (reg, reg));
21345 emit_insn (gen_movdi_insv_1 (reg, reg));
21348 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
21349 NULL, 1, OPTAB_DIRECT);
21351 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21353 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
21354 NULL, 1, OPTAB_DIRECT);
21355 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21356 if (mode == SImode)
21358 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
21359 NULL, 1, OPTAB_DIRECT);
21360 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21365 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
21366 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
21367 alignment from ALIGN to DESIRED_ALIGN. */
21369 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
21374 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
21375 promoted_val = promote_duplicated_reg (DImode, val);
21376 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
21377 promoted_val = promote_duplicated_reg (SImode, val);
21378 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
21379 promoted_val = promote_duplicated_reg (HImode, val);
21381 promoted_val = val;
21383 return promoted_val;
21386 /* Expand string clear operation (bzero). Use i386 string operations when
21387 profitable. See expand_movmem comment for explanation of individual
21388 steps performed. */
21390 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
21391 rtx expected_align_exp, rtx expected_size_exp)
21396 rtx jump_around_label = NULL;
21397 HOST_WIDE_INT align = 1;
21398 unsigned HOST_WIDE_INT count = 0;
21399 HOST_WIDE_INT expected_size = -1;
21400 int size_needed = 0, epilogue_size_needed;
21401 int desired_align = 0, align_bytes = 0;
21402 enum stringop_alg alg;
21403 rtx promoted_val = NULL;
21404 bool force_loopy_epilogue = false;
21406 bool need_zero_guard = false;
21408 if (CONST_INT_P (align_exp))
21409 align = INTVAL (align_exp);
21410 /* i386 can do misaligned access on reasonably increased cost. */
21411 if (CONST_INT_P (expected_align_exp)
21412 && INTVAL (expected_align_exp) > align)
21413 align = INTVAL (expected_align_exp);
21414 if (CONST_INT_P (count_exp))
21415 count = expected_size = INTVAL (count_exp);
21416 if (CONST_INT_P (expected_size_exp) && count == 0)
21417 expected_size = INTVAL (expected_size_exp);
21419 /* Make sure we don't need to care about overflow later on. */
21420 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21423 /* Step 0: Decide on preferred algorithm, desired alignment and
21424 size of chunks to be copied by main loop. */
21426 alg = decide_alg (count, expected_size, true, &dynamic_check);
21427 desired_align = decide_alignment (align, alg, expected_size);
21429 if (!TARGET_ALIGN_STRINGOPS)
21430 align = desired_align;
21432 if (alg == libcall)
21434 gcc_assert (alg != no_stringop);
21436 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
21437 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21442 gcc_unreachable ();
21444 need_zero_guard = true;
21445 size_needed = GET_MODE_SIZE (Pmode);
21447 case unrolled_loop:
21448 need_zero_guard = true;
21449 size_needed = GET_MODE_SIZE (Pmode) * 4;
21451 case rep_prefix_8_byte:
21454 case rep_prefix_4_byte:
21457 case rep_prefix_1_byte:
21461 need_zero_guard = true;
21465 epilogue_size_needed = size_needed;
21467 /* Step 1: Prologue guard. */
21469 /* Alignment code needs count to be in register. */
21470 if (CONST_INT_P (count_exp) && desired_align > align)
21472 if (INTVAL (count_exp) > desired_align
21473 && INTVAL (count_exp) > size_needed)
21476 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21477 if (align_bytes <= 0)
21480 align_bytes = desired_align - align_bytes;
21482 if (align_bytes == 0)
21484 enum machine_mode mode = SImode;
21485 if (TARGET_64BIT && (count & ~0xffffffff))
21487 count_exp = force_reg (mode, count_exp);
21490 /* Do the cheap promotion to allow better CSE across the
21491 main loop and epilogue (ie one load of the big constant in the
21492 front of all code. */
21493 if (CONST_INT_P (val_exp))
21494 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21495 desired_align, align);
21496 /* Ensure that alignment prologue won't copy past end of block. */
21497 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21499 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21500 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21501 Make sure it is power of 2. */
21502 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21504 /* To improve performance of small blocks, we jump around the VAL
21505 promoting mode. This mean that if the promoted VAL is not constant,
21506 we might not use it in the epilogue and have to use byte
21508 if (epilogue_size_needed > 2 && !promoted_val)
21509 force_loopy_epilogue = true;
21512 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21514 /* If main algorithm works on QImode, no epilogue is needed.
21515 For small sizes just don't align anything. */
21516 if (size_needed == 1)
21517 desired_align = align;
21524 label = gen_label_rtx ();
21525 emit_cmp_and_jump_insns (count_exp,
21526 GEN_INT (epilogue_size_needed),
21527 LTU, 0, counter_mode (count_exp), 1, label);
21528 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21529 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21531 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21534 if (dynamic_check != -1)
21536 rtx hot_label = gen_label_rtx ();
21537 jump_around_label = gen_label_rtx ();
21538 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21539 LEU, 0, counter_mode (count_exp), 1, hot_label);
21540 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21541 set_storage_via_libcall (dst, count_exp, val_exp, false);
21542 emit_jump (jump_around_label);
21543 emit_label (hot_label);
21546 /* Step 2: Alignment prologue. */
21548 /* Do the expensive promotion once we branched off the small blocks. */
21550 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21551 desired_align, align);
21552 gcc_assert (desired_align >= 1 && align >= 1);
21554 if (desired_align > align)
21556 if (align_bytes == 0)
21558 /* Except for the first move in epilogue, we no longer know
21559 constant offset in aliasing info. It don't seems to worth
21560 the pain to maintain it for the first move, so throw away
21562 dst = change_address (dst, BLKmode, destreg);
21563 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21568 /* If we know how many bytes need to be stored before dst is
21569 sufficiently aligned, maintain aliasing info accurately. */
21570 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21571 desired_align, align_bytes);
21572 count_exp = plus_constant (count_exp, -align_bytes);
21573 count -= align_bytes;
21575 if (need_zero_guard
21576 && (count < (unsigned HOST_WIDE_INT) size_needed
21577 || (align_bytes == 0
21578 && count < ((unsigned HOST_WIDE_INT) size_needed
21579 + desired_align - align))))
21581 /* It is possible that we copied enough so the main loop will not
21583 gcc_assert (size_needed > 1);
21584 if (label == NULL_RTX)
21585 label = gen_label_rtx ();
21586 emit_cmp_and_jump_insns (count_exp,
21587 GEN_INT (size_needed),
21588 LTU, 0, counter_mode (count_exp), 1, label);
21589 if (expected_size == -1
21590 || expected_size < (desired_align - align) / 2 + size_needed)
21591 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21593 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21596 if (label && size_needed == 1)
21598 emit_label (label);
21599 LABEL_NUSES (label) = 1;
21601 promoted_val = val_exp;
21602 epilogue_size_needed = 1;
21604 else if (label == NULL_RTX)
21605 epilogue_size_needed = size_needed;
21607 /* Step 3: Main loop. */
21613 gcc_unreachable ();
21615 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21616 count_exp, QImode, 1, expected_size);
21619 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21620 count_exp, Pmode, 1, expected_size);
21622 case unrolled_loop:
21623 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21624 count_exp, Pmode, 4, expected_size);
21626 case rep_prefix_8_byte:
21627 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21630 case rep_prefix_4_byte:
21631 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21634 case rep_prefix_1_byte:
21635 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21639 /* Adjust properly the offset of src and dest memory for aliasing. */
21640 if (CONST_INT_P (count_exp))
21641 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21642 (count / size_needed) * size_needed);
21644 dst = change_address (dst, BLKmode, destreg);
21646 /* Step 4: Epilogue to copy the remaining bytes. */
21650 /* When the main loop is done, COUNT_EXP might hold original count,
21651 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21652 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21653 bytes. Compensate if needed. */
21655 if (size_needed < epilogue_size_needed)
21658 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21659 GEN_INT (size_needed - 1), count_exp, 1,
21661 if (tmp != count_exp)
21662 emit_move_insn (count_exp, tmp);
21664 emit_label (label);
21665 LABEL_NUSES (label) = 1;
21668 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21670 if (force_loopy_epilogue)
21671 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21672 epilogue_size_needed);
21674 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21675 epilogue_size_needed);
21677 if (jump_around_label)
21678 emit_label (jump_around_label);
21682 /* Expand the appropriate insns for doing strlen if not just doing
21685 out = result, initialized with the start address
21686 align_rtx = alignment of the address.
21687 scratch = scratch register, initialized with the startaddress when
21688 not aligned, otherwise undefined
21690 This is just the body. It needs the initializations mentioned above and
21691 some address computing at the end. These things are done in i386.md. */
21694 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21698 rtx align_2_label = NULL_RTX;
21699 rtx align_3_label = NULL_RTX;
21700 rtx align_4_label = gen_label_rtx ();
21701 rtx end_0_label = gen_label_rtx ();
21703 rtx tmpreg = gen_reg_rtx (SImode);
21704 rtx scratch = gen_reg_rtx (SImode);
21708 if (CONST_INT_P (align_rtx))
21709 align = INTVAL (align_rtx);
21711 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21713 /* Is there a known alignment and is it less than 4? */
21716 rtx scratch1 = gen_reg_rtx (Pmode);
21717 emit_move_insn (scratch1, out);
21718 /* Is there a known alignment and is it not 2? */
21721 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21722 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21724 /* Leave just the 3 lower bits. */
21725 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21726 NULL_RTX, 0, OPTAB_WIDEN);
21728 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21729 Pmode, 1, align_4_label);
21730 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21731 Pmode, 1, align_2_label);
21732 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21733 Pmode, 1, align_3_label);
21737 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21738 check if is aligned to 4 - byte. */
21740 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21741 NULL_RTX, 0, OPTAB_WIDEN);
21743 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21744 Pmode, 1, align_4_label);
21747 mem = change_address (src, QImode, out);
21749 /* Now compare the bytes. */
21751 /* Compare the first n unaligned byte on a byte per byte basis. */
21752 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21753 QImode, 1, end_0_label);
21755 /* Increment the address. */
21756 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21758 /* Not needed with an alignment of 2 */
21761 emit_label (align_2_label);
21763 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21766 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21768 emit_label (align_3_label);
21771 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21774 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21777 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21778 align this loop. It gives only huge programs, but does not help to
21780 emit_label (align_4_label);
21782 mem = change_address (src, SImode, out);
21783 emit_move_insn (scratch, mem);
21784 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21786 /* This formula yields a nonzero result iff one of the bytes is zero.
21787 This saves three branches inside loop and many cycles. */
21789 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21790 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21791 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21792 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21793 gen_int_mode (0x80808080, SImode)));
21794 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21799 rtx reg = gen_reg_rtx (SImode);
21800 rtx reg2 = gen_reg_rtx (Pmode);
21801 emit_move_insn (reg, tmpreg);
21802 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21804 /* If zero is not in the first two bytes, move two bytes forward. */
21805 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21806 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21807 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21808 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21809 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21812 /* Emit lea manually to avoid clobbering of flags. */
21813 emit_insn (gen_rtx_SET (SImode, reg2,
21814 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21816 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21817 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21818 emit_insn (gen_rtx_SET (VOIDmode, out,
21819 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21825 rtx end_2_label = gen_label_rtx ();
21826 /* Is zero in the first two bytes? */
21828 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21829 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21830 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21831 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21832 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21834 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21835 JUMP_LABEL (tmp) = end_2_label;
21837 /* Not in the first two. Move two bytes forward. */
21838 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21839 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21841 emit_label (end_2_label);
21845 /* Avoid branch in fixing the byte. */
21846 tmpreg = gen_lowpart (QImode, tmpreg);
21847 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21848 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21849 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21850 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21852 emit_label (end_0_label);
21855 /* Expand strlen. */
21858 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21860 rtx addr, scratch1, scratch2, scratch3, scratch4;
21862 /* The generic case of strlen expander is long. Avoid it's
21863 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21865 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21866 && !TARGET_INLINE_ALL_STRINGOPS
21867 && !optimize_insn_for_size_p ()
21868 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21871 addr = force_reg (Pmode, XEXP (src, 0));
21872 scratch1 = gen_reg_rtx (Pmode);
21874 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21875 && !optimize_insn_for_size_p ())
21877 /* Well it seems that some optimizer does not combine a call like
21878 foo(strlen(bar), strlen(bar));
21879 when the move and the subtraction is done here. It does calculate
21880 the length just once when these instructions are done inside of
21881 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21882 often used and I use one fewer register for the lifetime of
21883 output_strlen_unroll() this is better. */
21885 emit_move_insn (out, addr);
21887 ix86_expand_strlensi_unroll_1 (out, src, align);
21889 /* strlensi_unroll_1 returns the address of the zero at the end of
21890 the string, like memchr(), so compute the length by subtracting
21891 the start address. */
21892 emit_insn (ix86_gen_sub3 (out, out, addr));
21898 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21899 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21902 scratch2 = gen_reg_rtx (Pmode);
21903 scratch3 = gen_reg_rtx (Pmode);
21904 scratch4 = force_reg (Pmode, constm1_rtx);
21906 emit_move_insn (scratch3, addr);
21907 eoschar = force_reg (QImode, eoschar);
21909 src = replace_equiv_address_nv (src, scratch3);
21911 /* If .md starts supporting :P, this can be done in .md. */
21912 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21913 scratch4), UNSPEC_SCAS);
21914 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21915 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21916 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21921 /* For given symbol (function) construct code to compute address of it's PLT
21922 entry in large x86-64 PIC model. */
21924 construct_plt_address (rtx symbol)
21926 rtx tmp = gen_reg_rtx (Pmode);
21927 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21929 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21930 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21932 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21933 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21938 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21940 rtx pop, int sibcall)
21942 rtx use = NULL, call;
21944 if (pop == const0_rtx)
21946 gcc_assert (!TARGET_64BIT || !pop);
21948 if (TARGET_MACHO && !TARGET_64BIT)
21951 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21952 fnaddr = machopic_indirect_call_target (fnaddr);
21957 /* Static functions and indirect calls don't need the pic register. */
21958 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21959 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21960 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21961 use_reg (&use, pic_offset_table_rtx);
21964 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21966 rtx al = gen_rtx_REG (QImode, AX_REG);
21967 emit_move_insn (al, callarg2);
21968 use_reg (&use, al);
21971 if (ix86_cmodel == CM_LARGE_PIC
21973 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21974 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21975 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21977 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21978 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21980 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
21981 fnaddr = gen_rtx_MEM (QImode, fnaddr);
21984 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21986 call = gen_rtx_SET (VOIDmode, retval, call);
21989 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21990 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21991 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
21993 if (TARGET_64BIT_MS_ABI
21994 && (!callarg2 || INTVAL (callarg2) != -2))
21996 /* We need to represent that SI and DI registers are clobbered
21998 static int clobbered_registers[] = {
21999 XMM6_REG, XMM7_REG, XMM8_REG,
22000 XMM9_REG, XMM10_REG, XMM11_REG,
22001 XMM12_REG, XMM13_REG, XMM14_REG,
22002 XMM15_REG, SI_REG, DI_REG
22005 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
22006 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
22007 UNSPEC_MS_TO_SYSV_CALL);
22011 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
22012 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
22015 (SSE_REGNO_P (clobbered_registers[i])
22017 clobbered_registers[i]));
22019 call = gen_rtx_PARALLEL (VOIDmode,
22020 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
22024 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
22025 if (TARGET_VZEROUPPER)
22030 if (cfun->machine->callee_pass_avx256_p)
22032 if (cfun->machine->callee_return_avx256_p)
22033 avx256 = callee_return_pass_avx256;
22035 avx256 = callee_pass_avx256;
22037 else if (cfun->machine->callee_return_avx256_p)
22038 avx256 = callee_return_avx256;
22040 avx256 = call_no_avx256;
22042 if (reload_completed)
22043 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
22046 unspec = gen_rtx_UNSPEC (VOIDmode,
22047 gen_rtvec (1, GEN_INT (avx256)),
22048 UNSPEC_CALL_NEEDS_VZEROUPPER);
22049 call = gen_rtx_PARALLEL (VOIDmode,
22050 gen_rtvec (2, call, unspec));
22054 call = emit_call_insn (call);
22056 CALL_INSN_FUNCTION_USAGE (call) = use;
22062 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
22064 rtx call = XVECEXP (PATTERN (insn), 0, 0);
22065 emit_insn (gen_avx_vzeroupper (vzeroupper));
22066 emit_call_insn (call);
22069 /* Output the assembly for a call instruction. */
22072 ix86_output_call_insn (rtx insn, rtx call_op, int addr_op)
22074 bool direct_p = constant_call_address_operand (call_op, Pmode);
22075 bool seh_nop_p = false;
22077 gcc_assert (addr_op == 0 || addr_op == 1);
22079 if (SIBLING_CALL_P (insn))
22082 return addr_op ? "jmp\t%P1" : "jmp\t%P0";
22083 /* SEH epilogue detection requires the indirect branch case
22084 to include REX.W. */
22085 else if (TARGET_SEH)
22086 return addr_op ? "rex.W jmp %A1" : "rex.W jmp %A0";
22088 return addr_op ? "jmp\t%A1" : "jmp\t%A0";
22091 /* SEH unwinding can require an extra nop to be emitted in several
22092 circumstances. Determine if we have one of those. */
22097 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
22099 /* If we get to another real insn, we don't need the nop. */
22103 /* If we get to the epilogue note, prevent a catch region from
22104 being adjacent to the standard epilogue sequence. If non-
22105 call-exceptions, we'll have done this during epilogue emission. */
22106 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
22107 && !flag_non_call_exceptions
22108 && !can_throw_internal (insn))
22115 /* If we didn't find a real insn following the call, prevent the
22116 unwinder from looking into the next function. */
22124 return addr_op ? "call\t%P1\n\tnop" : "call\t%P0\n\tnop";
22126 return addr_op ? "call\t%P1" : "call\t%P0";
22131 return addr_op ? "call\t%A1\n\tnop" : "call\t%A0\n\tnop";
22133 return addr_op ? "call\t%A1" : "call\t%A0";
22137 /* Clear stack slot assignments remembered from previous functions.
22138 This is called from INIT_EXPANDERS once before RTL is emitted for each
22141 static struct machine_function *
22142 ix86_init_machine_status (void)
22144 struct machine_function *f;
22146 f = ggc_alloc_cleared_machine_function ();
22147 f->use_fast_prologue_epilogue_nregs = -1;
22148 f->tls_descriptor_call_expanded_p = 0;
22149 f->call_abi = ix86_abi;
22154 /* Return a MEM corresponding to a stack slot with mode MODE.
22155 Allocate a new slot if necessary.
22157 The RTL for a function can have several slots available: N is
22158 which slot to use. */
22161 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
22163 struct stack_local_entry *s;
22165 gcc_assert (n < MAX_386_STACK_LOCALS);
22167 /* Virtual slot is valid only before vregs are instantiated. */
22168 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
22170 for (s = ix86_stack_locals; s; s = s->next)
22171 if (s->mode == mode && s->n == n)
22172 return copy_rtx (s->rtl);
22174 s = ggc_alloc_stack_local_entry ();
22177 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
22179 s->next = ix86_stack_locals;
22180 ix86_stack_locals = s;
22184 /* Calculate the length of the memory address in the instruction
22185 encoding. Does not include the one-byte modrm, opcode, or prefix. */
22188 memory_address_length (rtx addr)
22190 struct ix86_address parts;
22191 rtx base, index, disp;
22195 if (GET_CODE (addr) == PRE_DEC
22196 || GET_CODE (addr) == POST_INC
22197 || GET_CODE (addr) == PRE_MODIFY
22198 || GET_CODE (addr) == POST_MODIFY)
22201 ok = ix86_decompose_address (addr, &parts);
22204 if (parts.base && GET_CODE (parts.base) == SUBREG)
22205 parts.base = SUBREG_REG (parts.base);
22206 if (parts.index && GET_CODE (parts.index) == SUBREG)
22207 parts.index = SUBREG_REG (parts.index);
22210 index = parts.index;
22215 - esp as the base always wants an index,
22216 - ebp as the base always wants a displacement,
22217 - r12 as the base always wants an index,
22218 - r13 as the base always wants a displacement. */
22220 /* Register Indirect. */
22221 if (base && !index && !disp)
22223 /* esp (for its index) and ebp (for its displacement) need
22224 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
22227 && (addr == arg_pointer_rtx
22228 || addr == frame_pointer_rtx
22229 || REGNO (addr) == SP_REG
22230 || REGNO (addr) == BP_REG
22231 || REGNO (addr) == R12_REG
22232 || REGNO (addr) == R13_REG))
22236 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
22237 is not disp32, but disp32(%rip), so for disp32
22238 SIB byte is needed, unless print_operand_address
22239 optimizes it into disp32(%rip) or (%rip) is implied
22241 else if (disp && !base && !index)
22248 if (GET_CODE (disp) == CONST)
22249 symbol = XEXP (disp, 0);
22250 if (GET_CODE (symbol) == PLUS
22251 && CONST_INT_P (XEXP (symbol, 1)))
22252 symbol = XEXP (symbol, 0);
22254 if (GET_CODE (symbol) != LABEL_REF
22255 && (GET_CODE (symbol) != SYMBOL_REF
22256 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
22257 && (GET_CODE (symbol) != UNSPEC
22258 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
22259 && XINT (symbol, 1) != UNSPEC_PCREL
22260 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
22267 /* Find the length of the displacement constant. */
22270 if (base && satisfies_constraint_K (disp))
22275 /* ebp always wants a displacement. Similarly r13. */
22276 else if (base && REG_P (base)
22277 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
22280 /* An index requires the two-byte modrm form.... */
22282 /* ...like esp (or r12), which always wants an index. */
22283 || base == arg_pointer_rtx
22284 || base == frame_pointer_rtx
22285 || (base && REG_P (base)
22286 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
22303 /* Compute default value for "length_immediate" attribute. When SHORTFORM
22304 is set, expect that insn have 8bit immediate alternative. */
22306 ix86_attr_length_immediate_default (rtx insn, int shortform)
22310 extract_insn_cached (insn);
22311 for (i = recog_data.n_operands - 1; i >= 0; --i)
22312 if (CONSTANT_P (recog_data.operand[i]))
22314 enum attr_mode mode = get_attr_mode (insn);
22317 if (shortform && CONST_INT_P (recog_data.operand[i]))
22319 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
22326 ival = trunc_int_for_mode (ival, HImode);
22329 ival = trunc_int_for_mode (ival, SImode);
22334 if (IN_RANGE (ival, -128, 127))
22351 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
22356 fatal_insn ("unknown insn mode", insn);
22361 /* Compute default value for "length_address" attribute. */
22363 ix86_attr_length_address_default (rtx insn)
22367 if (get_attr_type (insn) == TYPE_LEA)
22369 rtx set = PATTERN (insn), addr;
22371 if (GET_CODE (set) == PARALLEL)
22372 set = XVECEXP (set, 0, 0);
22374 gcc_assert (GET_CODE (set) == SET);
22376 addr = SET_SRC (set);
22377 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
22379 if (GET_CODE (addr) == ZERO_EXTEND)
22380 addr = XEXP (addr, 0);
22381 if (GET_CODE (addr) == SUBREG)
22382 addr = SUBREG_REG (addr);
22385 return memory_address_length (addr);
22388 extract_insn_cached (insn);
22389 for (i = recog_data.n_operands - 1; i >= 0; --i)
22390 if (MEM_P (recog_data.operand[i]))
22392 constrain_operands_cached (reload_completed);
22393 if (which_alternative != -1)
22395 const char *constraints = recog_data.constraints[i];
22396 int alt = which_alternative;
22398 while (*constraints == '=' || *constraints == '+')
22401 while (*constraints++ != ',')
22403 /* Skip ignored operands. */
22404 if (*constraints == 'X')
22407 return memory_address_length (XEXP (recog_data.operand[i], 0));
22412 /* Compute default value for "length_vex" attribute. It includes
22413 2 or 3 byte VEX prefix and 1 opcode byte. */
22416 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
22421 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22422 byte VEX prefix. */
22423 if (!has_0f_opcode || has_vex_w)
22426 /* We can always use 2 byte VEX prefix in 32bit. */
22430 extract_insn_cached (insn);
22432 for (i = recog_data.n_operands - 1; i >= 0; --i)
22433 if (REG_P (recog_data.operand[i]))
22435 /* REX.W bit uses 3 byte VEX prefix. */
22436 if (GET_MODE (recog_data.operand[i]) == DImode
22437 && GENERAL_REG_P (recog_data.operand[i]))
22442 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22443 if (MEM_P (recog_data.operand[i])
22444 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22451 /* Return the maximum number of instructions a cpu can issue. */
22454 ix86_issue_rate (void)
22458 case PROCESSOR_PENTIUM:
22459 case PROCESSOR_ATOM:
22463 case PROCESSOR_PENTIUMPRO:
22464 case PROCESSOR_PENTIUM4:
22465 case PROCESSOR_CORE2_32:
22466 case PROCESSOR_CORE2_64:
22467 case PROCESSOR_COREI7_32:
22468 case PROCESSOR_COREI7_64:
22469 case PROCESSOR_ATHLON:
22471 case PROCESSOR_AMDFAM10:
22472 case PROCESSOR_NOCONA:
22473 case PROCESSOR_GENERIC32:
22474 case PROCESSOR_GENERIC64:
22475 case PROCESSOR_BDVER1:
22476 case PROCESSOR_BTVER1:
22484 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
22485 by DEP_INSN and nothing set by DEP_INSN. */
22488 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22492 /* Simplify the test for uninteresting insns. */
22493 if (insn_type != TYPE_SETCC
22494 && insn_type != TYPE_ICMOV
22495 && insn_type != TYPE_FCMOV
22496 && insn_type != TYPE_IBR)
22499 if ((set = single_set (dep_insn)) != 0)
22501 set = SET_DEST (set);
22504 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22505 && XVECLEN (PATTERN (dep_insn), 0) == 2
22506 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22507 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22509 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22510 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22515 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22518 /* This test is true if the dependent insn reads the flags but
22519 not any other potentially set register. */
22520 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22523 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22529 /* Return true iff USE_INSN has a memory address with operands set by
22533 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22536 extract_insn_cached (use_insn);
22537 for (i = recog_data.n_operands - 1; i >= 0; --i)
22538 if (MEM_P (recog_data.operand[i]))
22540 rtx addr = XEXP (recog_data.operand[i], 0);
22541 return modified_in_p (addr, set_insn) != 0;
22547 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22549 enum attr_type insn_type, dep_insn_type;
22550 enum attr_memory memory;
22552 int dep_insn_code_number;
22554 /* Anti and output dependencies have zero cost on all CPUs. */
22555 if (REG_NOTE_KIND (link) != 0)
22558 dep_insn_code_number = recog_memoized (dep_insn);
22560 /* If we can't recognize the insns, we can't really do anything. */
22561 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22564 insn_type = get_attr_type (insn);
22565 dep_insn_type = get_attr_type (dep_insn);
22569 case PROCESSOR_PENTIUM:
22570 /* Address Generation Interlock adds a cycle of latency. */
22571 if (insn_type == TYPE_LEA)
22573 rtx addr = PATTERN (insn);
22575 if (GET_CODE (addr) == PARALLEL)
22576 addr = XVECEXP (addr, 0, 0);
22578 gcc_assert (GET_CODE (addr) == SET);
22580 addr = SET_SRC (addr);
22581 if (modified_in_p (addr, dep_insn))
22584 else if (ix86_agi_dependent (dep_insn, insn))
22587 /* ??? Compares pair with jump/setcc. */
22588 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22591 /* Floating point stores require value to be ready one cycle earlier. */
22592 if (insn_type == TYPE_FMOV
22593 && get_attr_memory (insn) == MEMORY_STORE
22594 && !ix86_agi_dependent (dep_insn, insn))
22598 case PROCESSOR_PENTIUMPRO:
22599 memory = get_attr_memory (insn);
22601 /* INT->FP conversion is expensive. */
22602 if (get_attr_fp_int_src (dep_insn))
22605 /* There is one cycle extra latency between an FP op and a store. */
22606 if (insn_type == TYPE_FMOV
22607 && (set = single_set (dep_insn)) != NULL_RTX
22608 && (set2 = single_set (insn)) != NULL_RTX
22609 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22610 && MEM_P (SET_DEST (set2)))
22613 /* Show ability of reorder buffer to hide latency of load by executing
22614 in parallel with previous instruction in case
22615 previous instruction is not needed to compute the address. */
22616 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22617 && !ix86_agi_dependent (dep_insn, insn))
22619 /* Claim moves to take one cycle, as core can issue one load
22620 at time and the next load can start cycle later. */
22621 if (dep_insn_type == TYPE_IMOV
22622 || dep_insn_type == TYPE_FMOV)
22630 memory = get_attr_memory (insn);
22632 /* The esp dependency is resolved before the instruction is really
22634 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22635 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22638 /* INT->FP conversion is expensive. */
22639 if (get_attr_fp_int_src (dep_insn))
22642 /* Show ability of reorder buffer to hide latency of load by executing
22643 in parallel with previous instruction in case
22644 previous instruction is not needed to compute the address. */
22645 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22646 && !ix86_agi_dependent (dep_insn, insn))
22648 /* Claim moves to take one cycle, as core can issue one load
22649 at time and the next load can start cycle later. */
22650 if (dep_insn_type == TYPE_IMOV
22651 || dep_insn_type == TYPE_FMOV)
22660 case PROCESSOR_ATHLON:
22662 case PROCESSOR_AMDFAM10:
22663 case PROCESSOR_BDVER1:
22664 case PROCESSOR_BTVER1:
22665 case PROCESSOR_ATOM:
22666 case PROCESSOR_GENERIC32:
22667 case PROCESSOR_GENERIC64:
22668 memory = get_attr_memory (insn);
22670 /* Show ability of reorder buffer to hide latency of load by executing
22671 in parallel with previous instruction in case
22672 previous instruction is not needed to compute the address. */
22673 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22674 && !ix86_agi_dependent (dep_insn, insn))
22676 enum attr_unit unit = get_attr_unit (insn);
22679 /* Because of the difference between the length of integer and
22680 floating unit pipeline preparation stages, the memory operands
22681 for floating point are cheaper.
22683 ??? For Athlon it the difference is most probably 2. */
22684 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22687 loadcost = TARGET_ATHLON ? 2 : 0;
22689 if (cost >= loadcost)
22702 /* How many alternative schedules to try. This should be as wide as the
22703 scheduling freedom in the DFA, but no wider. Making this value too
22704 large results extra work for the scheduler. */
22707 ia32_multipass_dfa_lookahead (void)
22711 case PROCESSOR_PENTIUM:
22714 case PROCESSOR_PENTIUMPRO:
22718 case PROCESSOR_CORE2_32:
22719 case PROCESSOR_CORE2_64:
22720 case PROCESSOR_COREI7_32:
22721 case PROCESSOR_COREI7_64:
22722 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22723 as many instructions can be executed on a cycle, i.e.,
22724 issue_rate. I wonder why tuning for many CPUs does not do this. */
22725 return ix86_issue_rate ();
22734 /* Model decoder of Core 2/i7.
22735 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22736 track the instruction fetch block boundaries and make sure that long
22737 (9+ bytes) instructions are assigned to D0. */
22739 /* Maximum length of an insn that can be handled by
22740 a secondary decoder unit. '8' for Core 2/i7. */
22741 static int core2i7_secondary_decoder_max_insn_size;
22743 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22744 '16' for Core 2/i7. */
22745 static int core2i7_ifetch_block_size;
22747 /* Maximum number of instructions decoder can handle per cycle.
22748 '6' for Core 2/i7. */
22749 static int core2i7_ifetch_block_max_insns;
22751 typedef struct ix86_first_cycle_multipass_data_ *
22752 ix86_first_cycle_multipass_data_t;
22753 typedef const struct ix86_first_cycle_multipass_data_ *
22754 const_ix86_first_cycle_multipass_data_t;
22756 /* A variable to store target state across calls to max_issue within
22758 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22759 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22761 /* Initialize DATA. */
22763 core2i7_first_cycle_multipass_init (void *_data)
22765 ix86_first_cycle_multipass_data_t data
22766 = (ix86_first_cycle_multipass_data_t) _data;
22768 data->ifetch_block_len = 0;
22769 data->ifetch_block_n_insns = 0;
22770 data->ready_try_change = NULL;
22771 data->ready_try_change_size = 0;
22774 /* Advancing the cycle; reset ifetch block counts. */
22776 core2i7_dfa_post_advance_cycle (void)
22778 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22780 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22782 data->ifetch_block_len = 0;
22783 data->ifetch_block_n_insns = 0;
22786 static int min_insn_size (rtx);
22788 /* Filter out insns from ready_try that the core will not be able to issue
22789 on current cycle due to decoder. */
22791 core2i7_first_cycle_multipass_filter_ready_try
22792 (const_ix86_first_cycle_multipass_data_t data,
22793 char *ready_try, int n_ready, bool first_cycle_insn_p)
22800 if (ready_try[n_ready])
22803 insn = get_ready_element (n_ready);
22804 insn_size = min_insn_size (insn);
22806 if (/* If this is a too long an insn for a secondary decoder ... */
22807 (!first_cycle_insn_p
22808 && insn_size > core2i7_secondary_decoder_max_insn_size)
22809 /* ... or it would not fit into the ifetch block ... */
22810 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22811 /* ... or the decoder is full already ... */
22812 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22813 /* ... mask the insn out. */
22815 ready_try[n_ready] = 1;
22817 if (data->ready_try_change)
22818 SET_BIT (data->ready_try_change, n_ready);
22823 /* Prepare for a new round of multipass lookahead scheduling. */
22825 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22826 bool first_cycle_insn_p)
22828 ix86_first_cycle_multipass_data_t data
22829 = (ix86_first_cycle_multipass_data_t) _data;
22830 const_ix86_first_cycle_multipass_data_t prev_data
22831 = ix86_first_cycle_multipass_data;
22833 /* Restore the state from the end of the previous round. */
22834 data->ifetch_block_len = prev_data->ifetch_block_len;
22835 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22837 /* Filter instructions that cannot be issued on current cycle due to
22838 decoder restrictions. */
22839 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22840 first_cycle_insn_p);
22843 /* INSN is being issued in current solution. Account for its impact on
22844 the decoder model. */
22846 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22847 rtx insn, const void *_prev_data)
22849 ix86_first_cycle_multipass_data_t data
22850 = (ix86_first_cycle_multipass_data_t) _data;
22851 const_ix86_first_cycle_multipass_data_t prev_data
22852 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22854 int insn_size = min_insn_size (insn);
22856 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22857 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22858 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22859 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22861 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22862 if (!data->ready_try_change)
22864 data->ready_try_change = sbitmap_alloc (n_ready);
22865 data->ready_try_change_size = n_ready;
22867 else if (data->ready_try_change_size < n_ready)
22869 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22871 data->ready_try_change_size = n_ready;
22873 sbitmap_zero (data->ready_try_change);
22875 /* Filter out insns from ready_try that the core will not be able to issue
22876 on current cycle due to decoder. */
22877 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22881 /* Revert the effect on ready_try. */
22883 core2i7_first_cycle_multipass_backtrack (const void *_data,
22885 int n_ready ATTRIBUTE_UNUSED)
22887 const_ix86_first_cycle_multipass_data_t data
22888 = (const_ix86_first_cycle_multipass_data_t) _data;
22889 unsigned int i = 0;
22890 sbitmap_iterator sbi;
22892 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22893 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22899 /* Save the result of multipass lookahead scheduling for the next round. */
22901 core2i7_first_cycle_multipass_end (const void *_data)
22903 const_ix86_first_cycle_multipass_data_t data
22904 = (const_ix86_first_cycle_multipass_data_t) _data;
22905 ix86_first_cycle_multipass_data_t next_data
22906 = ix86_first_cycle_multipass_data;
22910 next_data->ifetch_block_len = data->ifetch_block_len;
22911 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22915 /* Deallocate target data. */
22917 core2i7_first_cycle_multipass_fini (void *_data)
22919 ix86_first_cycle_multipass_data_t data
22920 = (ix86_first_cycle_multipass_data_t) _data;
22922 if (data->ready_try_change)
22924 sbitmap_free (data->ready_try_change);
22925 data->ready_try_change = NULL;
22926 data->ready_try_change_size = 0;
22930 /* Prepare for scheduling pass. */
22932 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22933 int verbose ATTRIBUTE_UNUSED,
22934 int max_uid ATTRIBUTE_UNUSED)
22936 /* Install scheduling hooks for current CPU. Some of these hooks are used
22937 in time-critical parts of the scheduler, so we only set them up when
22938 they are actually used. */
22941 case PROCESSOR_CORE2_32:
22942 case PROCESSOR_CORE2_64:
22943 case PROCESSOR_COREI7_32:
22944 case PROCESSOR_COREI7_64:
22945 targetm.sched.dfa_post_advance_cycle
22946 = core2i7_dfa_post_advance_cycle;
22947 targetm.sched.first_cycle_multipass_init
22948 = core2i7_first_cycle_multipass_init;
22949 targetm.sched.first_cycle_multipass_begin
22950 = core2i7_first_cycle_multipass_begin;
22951 targetm.sched.first_cycle_multipass_issue
22952 = core2i7_first_cycle_multipass_issue;
22953 targetm.sched.first_cycle_multipass_backtrack
22954 = core2i7_first_cycle_multipass_backtrack;
22955 targetm.sched.first_cycle_multipass_end
22956 = core2i7_first_cycle_multipass_end;
22957 targetm.sched.first_cycle_multipass_fini
22958 = core2i7_first_cycle_multipass_fini;
22960 /* Set decoder parameters. */
22961 core2i7_secondary_decoder_max_insn_size = 8;
22962 core2i7_ifetch_block_size = 16;
22963 core2i7_ifetch_block_max_insns = 6;
22967 targetm.sched.dfa_post_advance_cycle = NULL;
22968 targetm.sched.first_cycle_multipass_init = NULL;
22969 targetm.sched.first_cycle_multipass_begin = NULL;
22970 targetm.sched.first_cycle_multipass_issue = NULL;
22971 targetm.sched.first_cycle_multipass_backtrack = NULL;
22972 targetm.sched.first_cycle_multipass_end = NULL;
22973 targetm.sched.first_cycle_multipass_fini = NULL;
22979 /* Compute the alignment given to a constant that is being placed in memory.
22980 EXP is the constant and ALIGN is the alignment that the object would
22982 The value of this function is used instead of that alignment to align
22986 ix86_constant_alignment (tree exp, int align)
22988 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
22989 || TREE_CODE (exp) == INTEGER_CST)
22991 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
22993 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
22996 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
22997 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
22998 return BITS_PER_WORD;
23003 /* Compute the alignment for a static variable.
23004 TYPE is the data type, and ALIGN is the alignment that
23005 the object would ordinarily have. The value of this function is used
23006 instead of that alignment to align the object. */
23009 ix86_data_alignment (tree type, int align)
23011 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
23013 if (AGGREGATE_TYPE_P (type)
23014 && TYPE_SIZE (type)
23015 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23016 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
23017 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
23018 && align < max_align)
23021 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23022 to 16byte boundary. */
23025 if (AGGREGATE_TYPE_P (type)
23026 && TYPE_SIZE (type)
23027 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23028 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
23029 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23033 if (TREE_CODE (type) == ARRAY_TYPE)
23035 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23037 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23040 else if (TREE_CODE (type) == COMPLEX_TYPE)
23043 if (TYPE_MODE (type) == DCmode && align < 64)
23045 if ((TYPE_MODE (type) == XCmode
23046 || TYPE_MODE (type) == TCmode) && align < 128)
23049 else if ((TREE_CODE (type) == RECORD_TYPE
23050 || TREE_CODE (type) == UNION_TYPE
23051 || TREE_CODE (type) == QUAL_UNION_TYPE)
23052 && TYPE_FIELDS (type))
23054 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23056 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23059 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23060 || TREE_CODE (type) == INTEGER_TYPE)
23062 if (TYPE_MODE (type) == DFmode && align < 64)
23064 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23071 /* Compute the alignment for a local variable or a stack slot. EXP is
23072 the data type or decl itself, MODE is the widest mode available and
23073 ALIGN is the alignment that the object would ordinarily have. The
23074 value of this macro is used instead of that alignment to align the
23078 ix86_local_alignment (tree exp, enum machine_mode mode,
23079 unsigned int align)
23083 if (exp && DECL_P (exp))
23085 type = TREE_TYPE (exp);
23094 /* Don't do dynamic stack realignment for long long objects with
23095 -mpreferred-stack-boundary=2. */
23098 && ix86_preferred_stack_boundary < 64
23099 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
23100 && (!type || !TYPE_USER_ALIGN (type))
23101 && (!decl || !DECL_USER_ALIGN (decl)))
23104 /* If TYPE is NULL, we are allocating a stack slot for caller-save
23105 register in MODE. We will return the largest alignment of XF
23109 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
23110 align = GET_MODE_ALIGNMENT (DFmode);
23114 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23115 to 16byte boundary. Exact wording is:
23117 An array uses the same alignment as its elements, except that a local or
23118 global array variable of length at least 16 bytes or
23119 a C99 variable-length array variable always has alignment of at least 16 bytes.
23121 This was added to allow use of aligned SSE instructions at arrays. This
23122 rule is meant for static storage (where compiler can not do the analysis
23123 by itself). We follow it for automatic variables only when convenient.
23124 We fully control everything in the function compiled and functions from
23125 other unit can not rely on the alignment.
23127 Exclude va_list type. It is the common case of local array where
23128 we can not benefit from the alignment. */
23129 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
23132 if (AGGREGATE_TYPE_P (type)
23133 && (va_list_type_node == NULL_TREE
23134 || (TYPE_MAIN_VARIANT (type)
23135 != TYPE_MAIN_VARIANT (va_list_type_node)))
23136 && TYPE_SIZE (type)
23137 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23138 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
23139 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23142 if (TREE_CODE (type) == ARRAY_TYPE)
23144 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23146 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23149 else if (TREE_CODE (type) == COMPLEX_TYPE)
23151 if (TYPE_MODE (type) == DCmode && align < 64)
23153 if ((TYPE_MODE (type) == XCmode
23154 || TYPE_MODE (type) == TCmode) && align < 128)
23157 else if ((TREE_CODE (type) == RECORD_TYPE
23158 || TREE_CODE (type) == UNION_TYPE
23159 || TREE_CODE (type) == QUAL_UNION_TYPE)
23160 && TYPE_FIELDS (type))
23162 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23164 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23167 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23168 || TREE_CODE (type) == INTEGER_TYPE)
23171 if (TYPE_MODE (type) == DFmode && align < 64)
23173 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23179 /* Compute the minimum required alignment for dynamic stack realignment
23180 purposes for a local variable, parameter or a stack slot. EXP is
23181 the data type or decl itself, MODE is its mode and ALIGN is the
23182 alignment that the object would ordinarily have. */
23185 ix86_minimum_alignment (tree exp, enum machine_mode mode,
23186 unsigned int align)
23190 if (exp && DECL_P (exp))
23192 type = TREE_TYPE (exp);
23201 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
23204 /* Don't do dynamic stack realignment for long long objects with
23205 -mpreferred-stack-boundary=2. */
23206 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
23207 && (!type || !TYPE_USER_ALIGN (type))
23208 && (!decl || !DECL_USER_ALIGN (decl)))
23214 /* Find a location for the static chain incoming to a nested function.
23215 This is a register, unless all free registers are used by arguments. */
23218 ix86_static_chain (const_tree fndecl, bool incoming_p)
23222 if (!DECL_STATIC_CHAIN (fndecl))
23227 /* We always use R10 in 64-bit mode. */
23235 /* By default in 32-bit mode we use ECX to pass the static chain. */
23238 fntype = TREE_TYPE (fndecl);
23239 ccvt = ix86_get_callcvt (fntype);
23240 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
23242 /* Fastcall functions use ecx/edx for arguments, which leaves
23243 us with EAX for the static chain.
23244 Thiscall functions use ecx for arguments, which also
23245 leaves us with EAX for the static chain. */
23248 else if (ix86_function_regparm (fntype, fndecl) == 3)
23250 /* For regparm 3, we have no free call-clobbered registers in
23251 which to store the static chain. In order to implement this,
23252 we have the trampoline push the static chain to the stack.
23253 However, we can't push a value below the return address when
23254 we call the nested function directly, so we have to use an
23255 alternate entry point. For this we use ESI, and have the
23256 alternate entry point push ESI, so that things appear the
23257 same once we're executing the nested function. */
23260 if (fndecl == current_function_decl)
23261 ix86_static_chain_on_stack = true;
23262 return gen_frame_mem (SImode,
23263 plus_constant (arg_pointer_rtx, -8));
23269 return gen_rtx_REG (Pmode, regno);
23272 /* Emit RTL insns to initialize the variable parts of a trampoline.
23273 FNDECL is the decl of the target address; M_TRAMP is a MEM for
23274 the trampoline, and CHAIN_VALUE is an RTX for the static chain
23275 to be passed to the target function. */
23278 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
23282 fnaddr = XEXP (DECL_RTL (fndecl), 0);
23289 /* Depending on the static chain location, either load a register
23290 with a constant, or push the constant to the stack. All of the
23291 instructions are the same size. */
23292 chain = ix86_static_chain (fndecl, true);
23295 if (REGNO (chain) == CX_REG)
23297 else if (REGNO (chain) == AX_REG)
23300 gcc_unreachable ();
23305 mem = adjust_address (m_tramp, QImode, 0);
23306 emit_move_insn (mem, gen_int_mode (opcode, QImode));
23308 mem = adjust_address (m_tramp, SImode, 1);
23309 emit_move_insn (mem, chain_value);
23311 /* Compute offset from the end of the jmp to the target function.
23312 In the case in which the trampoline stores the static chain on
23313 the stack, we need to skip the first insn which pushes the
23314 (call-saved) register static chain; this push is 1 byte. */
23315 disp = expand_binop (SImode, sub_optab, fnaddr,
23316 plus_constant (XEXP (m_tramp, 0),
23317 MEM_P (chain) ? 9 : 10),
23318 NULL_RTX, 1, OPTAB_DIRECT);
23320 mem = adjust_address (m_tramp, QImode, 5);
23321 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
23323 mem = adjust_address (m_tramp, SImode, 6);
23324 emit_move_insn (mem, disp);
23330 /* Load the function address to r11. Try to load address using
23331 the shorter movl instead of movabs. We may want to support
23332 movq for kernel mode, but kernel does not use trampolines at
23334 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
23336 fnaddr = copy_to_mode_reg (DImode, fnaddr);
23338 mem = adjust_address (m_tramp, HImode, offset);
23339 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
23341 mem = adjust_address (m_tramp, SImode, offset + 2);
23342 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
23347 mem = adjust_address (m_tramp, HImode, offset);
23348 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
23350 mem = adjust_address (m_tramp, DImode, offset + 2);
23351 emit_move_insn (mem, fnaddr);
23355 /* Load static chain using movabs to r10. */
23356 mem = adjust_address (m_tramp, HImode, offset);
23357 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
23359 mem = adjust_address (m_tramp, DImode, offset + 2);
23360 emit_move_insn (mem, chain_value);
23363 /* Jump to r11; the last (unused) byte is a nop, only there to
23364 pad the write out to a single 32-bit store. */
23365 mem = adjust_address (m_tramp, SImode, offset);
23366 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
23369 gcc_assert (offset <= TRAMPOLINE_SIZE);
23372 #ifdef ENABLE_EXECUTE_STACK
23373 #ifdef CHECK_EXECUTE_STACK_ENABLED
23374 if (CHECK_EXECUTE_STACK_ENABLED)
23376 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
23377 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
23381 /* The following file contains several enumerations and data structures
23382 built from the definitions in i386-builtin-types.def. */
23384 #include "i386-builtin-types.inc"
23386 /* Table for the ix86 builtin non-function types. */
23387 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
23389 /* Retrieve an element from the above table, building some of
23390 the types lazily. */
23393 ix86_get_builtin_type (enum ix86_builtin_type tcode)
23395 unsigned int index;
23398 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
23400 type = ix86_builtin_type_tab[(int) tcode];
23404 gcc_assert (tcode > IX86_BT_LAST_PRIM);
23405 if (tcode <= IX86_BT_LAST_VECT)
23407 enum machine_mode mode;
23409 index = tcode - IX86_BT_LAST_PRIM - 1;
23410 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
23411 mode = ix86_builtin_type_vect_mode[index];
23413 type = build_vector_type_for_mode (itype, mode);
23419 index = tcode - IX86_BT_LAST_VECT - 1;
23420 if (tcode <= IX86_BT_LAST_PTR)
23421 quals = TYPE_UNQUALIFIED;
23423 quals = TYPE_QUAL_CONST;
23425 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
23426 if (quals != TYPE_UNQUALIFIED)
23427 itype = build_qualified_type (itype, quals);
23429 type = build_pointer_type (itype);
23432 ix86_builtin_type_tab[(int) tcode] = type;
23436 /* Table for the ix86 builtin function types. */
23437 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23439 /* Retrieve an element from the above table, building some of
23440 the types lazily. */
23443 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23447 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23449 type = ix86_builtin_func_type_tab[(int) tcode];
23453 if (tcode <= IX86_BT_LAST_FUNC)
23455 unsigned start = ix86_builtin_func_start[(int) tcode];
23456 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23457 tree rtype, atype, args = void_list_node;
23460 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23461 for (i = after - 1; i > start; --i)
23463 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23464 args = tree_cons (NULL, atype, args);
23467 type = build_function_type (rtype, args);
23471 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23472 enum ix86_builtin_func_type icode;
23474 icode = ix86_builtin_func_alias_base[index];
23475 type = ix86_get_builtin_func_type (icode);
23478 ix86_builtin_func_type_tab[(int) tcode] = type;
23483 /* Codes for all the SSE/MMX builtins. */
23486 IX86_BUILTIN_ADDPS,
23487 IX86_BUILTIN_ADDSS,
23488 IX86_BUILTIN_DIVPS,
23489 IX86_BUILTIN_DIVSS,
23490 IX86_BUILTIN_MULPS,
23491 IX86_BUILTIN_MULSS,
23492 IX86_BUILTIN_SUBPS,
23493 IX86_BUILTIN_SUBSS,
23495 IX86_BUILTIN_CMPEQPS,
23496 IX86_BUILTIN_CMPLTPS,
23497 IX86_BUILTIN_CMPLEPS,
23498 IX86_BUILTIN_CMPGTPS,
23499 IX86_BUILTIN_CMPGEPS,
23500 IX86_BUILTIN_CMPNEQPS,
23501 IX86_BUILTIN_CMPNLTPS,
23502 IX86_BUILTIN_CMPNLEPS,
23503 IX86_BUILTIN_CMPNGTPS,
23504 IX86_BUILTIN_CMPNGEPS,
23505 IX86_BUILTIN_CMPORDPS,
23506 IX86_BUILTIN_CMPUNORDPS,
23507 IX86_BUILTIN_CMPEQSS,
23508 IX86_BUILTIN_CMPLTSS,
23509 IX86_BUILTIN_CMPLESS,
23510 IX86_BUILTIN_CMPNEQSS,
23511 IX86_BUILTIN_CMPNLTSS,
23512 IX86_BUILTIN_CMPNLESS,
23513 IX86_BUILTIN_CMPNGTSS,
23514 IX86_BUILTIN_CMPNGESS,
23515 IX86_BUILTIN_CMPORDSS,
23516 IX86_BUILTIN_CMPUNORDSS,
23518 IX86_BUILTIN_COMIEQSS,
23519 IX86_BUILTIN_COMILTSS,
23520 IX86_BUILTIN_COMILESS,
23521 IX86_BUILTIN_COMIGTSS,
23522 IX86_BUILTIN_COMIGESS,
23523 IX86_BUILTIN_COMINEQSS,
23524 IX86_BUILTIN_UCOMIEQSS,
23525 IX86_BUILTIN_UCOMILTSS,
23526 IX86_BUILTIN_UCOMILESS,
23527 IX86_BUILTIN_UCOMIGTSS,
23528 IX86_BUILTIN_UCOMIGESS,
23529 IX86_BUILTIN_UCOMINEQSS,
23531 IX86_BUILTIN_CVTPI2PS,
23532 IX86_BUILTIN_CVTPS2PI,
23533 IX86_BUILTIN_CVTSI2SS,
23534 IX86_BUILTIN_CVTSI642SS,
23535 IX86_BUILTIN_CVTSS2SI,
23536 IX86_BUILTIN_CVTSS2SI64,
23537 IX86_BUILTIN_CVTTPS2PI,
23538 IX86_BUILTIN_CVTTSS2SI,
23539 IX86_BUILTIN_CVTTSS2SI64,
23541 IX86_BUILTIN_MAXPS,
23542 IX86_BUILTIN_MAXSS,
23543 IX86_BUILTIN_MINPS,
23544 IX86_BUILTIN_MINSS,
23546 IX86_BUILTIN_LOADUPS,
23547 IX86_BUILTIN_STOREUPS,
23548 IX86_BUILTIN_MOVSS,
23550 IX86_BUILTIN_MOVHLPS,
23551 IX86_BUILTIN_MOVLHPS,
23552 IX86_BUILTIN_LOADHPS,
23553 IX86_BUILTIN_LOADLPS,
23554 IX86_BUILTIN_STOREHPS,
23555 IX86_BUILTIN_STORELPS,
23557 IX86_BUILTIN_MASKMOVQ,
23558 IX86_BUILTIN_MOVMSKPS,
23559 IX86_BUILTIN_PMOVMSKB,
23561 IX86_BUILTIN_MOVNTPS,
23562 IX86_BUILTIN_MOVNTQ,
23564 IX86_BUILTIN_LOADDQU,
23565 IX86_BUILTIN_STOREDQU,
23567 IX86_BUILTIN_PACKSSWB,
23568 IX86_BUILTIN_PACKSSDW,
23569 IX86_BUILTIN_PACKUSWB,
23571 IX86_BUILTIN_PADDB,
23572 IX86_BUILTIN_PADDW,
23573 IX86_BUILTIN_PADDD,
23574 IX86_BUILTIN_PADDQ,
23575 IX86_BUILTIN_PADDSB,
23576 IX86_BUILTIN_PADDSW,
23577 IX86_BUILTIN_PADDUSB,
23578 IX86_BUILTIN_PADDUSW,
23579 IX86_BUILTIN_PSUBB,
23580 IX86_BUILTIN_PSUBW,
23581 IX86_BUILTIN_PSUBD,
23582 IX86_BUILTIN_PSUBQ,
23583 IX86_BUILTIN_PSUBSB,
23584 IX86_BUILTIN_PSUBSW,
23585 IX86_BUILTIN_PSUBUSB,
23586 IX86_BUILTIN_PSUBUSW,
23589 IX86_BUILTIN_PANDN,
23593 IX86_BUILTIN_PAVGB,
23594 IX86_BUILTIN_PAVGW,
23596 IX86_BUILTIN_PCMPEQB,
23597 IX86_BUILTIN_PCMPEQW,
23598 IX86_BUILTIN_PCMPEQD,
23599 IX86_BUILTIN_PCMPGTB,
23600 IX86_BUILTIN_PCMPGTW,
23601 IX86_BUILTIN_PCMPGTD,
23603 IX86_BUILTIN_PMADDWD,
23605 IX86_BUILTIN_PMAXSW,
23606 IX86_BUILTIN_PMAXUB,
23607 IX86_BUILTIN_PMINSW,
23608 IX86_BUILTIN_PMINUB,
23610 IX86_BUILTIN_PMULHUW,
23611 IX86_BUILTIN_PMULHW,
23612 IX86_BUILTIN_PMULLW,
23614 IX86_BUILTIN_PSADBW,
23615 IX86_BUILTIN_PSHUFW,
23617 IX86_BUILTIN_PSLLW,
23618 IX86_BUILTIN_PSLLD,
23619 IX86_BUILTIN_PSLLQ,
23620 IX86_BUILTIN_PSRAW,
23621 IX86_BUILTIN_PSRAD,
23622 IX86_BUILTIN_PSRLW,
23623 IX86_BUILTIN_PSRLD,
23624 IX86_BUILTIN_PSRLQ,
23625 IX86_BUILTIN_PSLLWI,
23626 IX86_BUILTIN_PSLLDI,
23627 IX86_BUILTIN_PSLLQI,
23628 IX86_BUILTIN_PSRAWI,
23629 IX86_BUILTIN_PSRADI,
23630 IX86_BUILTIN_PSRLWI,
23631 IX86_BUILTIN_PSRLDI,
23632 IX86_BUILTIN_PSRLQI,
23634 IX86_BUILTIN_PUNPCKHBW,
23635 IX86_BUILTIN_PUNPCKHWD,
23636 IX86_BUILTIN_PUNPCKHDQ,
23637 IX86_BUILTIN_PUNPCKLBW,
23638 IX86_BUILTIN_PUNPCKLWD,
23639 IX86_BUILTIN_PUNPCKLDQ,
23641 IX86_BUILTIN_SHUFPS,
23643 IX86_BUILTIN_RCPPS,
23644 IX86_BUILTIN_RCPSS,
23645 IX86_BUILTIN_RSQRTPS,
23646 IX86_BUILTIN_RSQRTPS_NR,
23647 IX86_BUILTIN_RSQRTSS,
23648 IX86_BUILTIN_RSQRTF,
23649 IX86_BUILTIN_SQRTPS,
23650 IX86_BUILTIN_SQRTPS_NR,
23651 IX86_BUILTIN_SQRTSS,
23653 IX86_BUILTIN_UNPCKHPS,
23654 IX86_BUILTIN_UNPCKLPS,
23656 IX86_BUILTIN_ANDPS,
23657 IX86_BUILTIN_ANDNPS,
23659 IX86_BUILTIN_XORPS,
23662 IX86_BUILTIN_LDMXCSR,
23663 IX86_BUILTIN_STMXCSR,
23664 IX86_BUILTIN_SFENCE,
23666 /* 3DNow! Original */
23667 IX86_BUILTIN_FEMMS,
23668 IX86_BUILTIN_PAVGUSB,
23669 IX86_BUILTIN_PF2ID,
23670 IX86_BUILTIN_PFACC,
23671 IX86_BUILTIN_PFADD,
23672 IX86_BUILTIN_PFCMPEQ,
23673 IX86_BUILTIN_PFCMPGE,
23674 IX86_BUILTIN_PFCMPGT,
23675 IX86_BUILTIN_PFMAX,
23676 IX86_BUILTIN_PFMIN,
23677 IX86_BUILTIN_PFMUL,
23678 IX86_BUILTIN_PFRCP,
23679 IX86_BUILTIN_PFRCPIT1,
23680 IX86_BUILTIN_PFRCPIT2,
23681 IX86_BUILTIN_PFRSQIT1,
23682 IX86_BUILTIN_PFRSQRT,
23683 IX86_BUILTIN_PFSUB,
23684 IX86_BUILTIN_PFSUBR,
23685 IX86_BUILTIN_PI2FD,
23686 IX86_BUILTIN_PMULHRW,
23688 /* 3DNow! Athlon Extensions */
23689 IX86_BUILTIN_PF2IW,
23690 IX86_BUILTIN_PFNACC,
23691 IX86_BUILTIN_PFPNACC,
23692 IX86_BUILTIN_PI2FW,
23693 IX86_BUILTIN_PSWAPDSI,
23694 IX86_BUILTIN_PSWAPDSF,
23697 IX86_BUILTIN_ADDPD,
23698 IX86_BUILTIN_ADDSD,
23699 IX86_BUILTIN_DIVPD,
23700 IX86_BUILTIN_DIVSD,
23701 IX86_BUILTIN_MULPD,
23702 IX86_BUILTIN_MULSD,
23703 IX86_BUILTIN_SUBPD,
23704 IX86_BUILTIN_SUBSD,
23706 IX86_BUILTIN_CMPEQPD,
23707 IX86_BUILTIN_CMPLTPD,
23708 IX86_BUILTIN_CMPLEPD,
23709 IX86_BUILTIN_CMPGTPD,
23710 IX86_BUILTIN_CMPGEPD,
23711 IX86_BUILTIN_CMPNEQPD,
23712 IX86_BUILTIN_CMPNLTPD,
23713 IX86_BUILTIN_CMPNLEPD,
23714 IX86_BUILTIN_CMPNGTPD,
23715 IX86_BUILTIN_CMPNGEPD,
23716 IX86_BUILTIN_CMPORDPD,
23717 IX86_BUILTIN_CMPUNORDPD,
23718 IX86_BUILTIN_CMPEQSD,
23719 IX86_BUILTIN_CMPLTSD,
23720 IX86_BUILTIN_CMPLESD,
23721 IX86_BUILTIN_CMPNEQSD,
23722 IX86_BUILTIN_CMPNLTSD,
23723 IX86_BUILTIN_CMPNLESD,
23724 IX86_BUILTIN_CMPORDSD,
23725 IX86_BUILTIN_CMPUNORDSD,
23727 IX86_BUILTIN_COMIEQSD,
23728 IX86_BUILTIN_COMILTSD,
23729 IX86_BUILTIN_COMILESD,
23730 IX86_BUILTIN_COMIGTSD,
23731 IX86_BUILTIN_COMIGESD,
23732 IX86_BUILTIN_COMINEQSD,
23733 IX86_BUILTIN_UCOMIEQSD,
23734 IX86_BUILTIN_UCOMILTSD,
23735 IX86_BUILTIN_UCOMILESD,
23736 IX86_BUILTIN_UCOMIGTSD,
23737 IX86_BUILTIN_UCOMIGESD,
23738 IX86_BUILTIN_UCOMINEQSD,
23740 IX86_BUILTIN_MAXPD,
23741 IX86_BUILTIN_MAXSD,
23742 IX86_BUILTIN_MINPD,
23743 IX86_BUILTIN_MINSD,
23745 IX86_BUILTIN_ANDPD,
23746 IX86_BUILTIN_ANDNPD,
23748 IX86_BUILTIN_XORPD,
23750 IX86_BUILTIN_SQRTPD,
23751 IX86_BUILTIN_SQRTSD,
23753 IX86_BUILTIN_UNPCKHPD,
23754 IX86_BUILTIN_UNPCKLPD,
23756 IX86_BUILTIN_SHUFPD,
23758 IX86_BUILTIN_LOADUPD,
23759 IX86_BUILTIN_STOREUPD,
23760 IX86_BUILTIN_MOVSD,
23762 IX86_BUILTIN_LOADHPD,
23763 IX86_BUILTIN_LOADLPD,
23765 IX86_BUILTIN_CVTDQ2PD,
23766 IX86_BUILTIN_CVTDQ2PS,
23768 IX86_BUILTIN_CVTPD2DQ,
23769 IX86_BUILTIN_CVTPD2PI,
23770 IX86_BUILTIN_CVTPD2PS,
23771 IX86_BUILTIN_CVTTPD2DQ,
23772 IX86_BUILTIN_CVTTPD2PI,
23774 IX86_BUILTIN_CVTPI2PD,
23775 IX86_BUILTIN_CVTSI2SD,
23776 IX86_BUILTIN_CVTSI642SD,
23778 IX86_BUILTIN_CVTSD2SI,
23779 IX86_BUILTIN_CVTSD2SI64,
23780 IX86_BUILTIN_CVTSD2SS,
23781 IX86_BUILTIN_CVTSS2SD,
23782 IX86_BUILTIN_CVTTSD2SI,
23783 IX86_BUILTIN_CVTTSD2SI64,
23785 IX86_BUILTIN_CVTPS2DQ,
23786 IX86_BUILTIN_CVTPS2PD,
23787 IX86_BUILTIN_CVTTPS2DQ,
23789 IX86_BUILTIN_MOVNTI,
23790 IX86_BUILTIN_MOVNTPD,
23791 IX86_BUILTIN_MOVNTDQ,
23793 IX86_BUILTIN_MOVQ128,
23796 IX86_BUILTIN_MASKMOVDQU,
23797 IX86_BUILTIN_MOVMSKPD,
23798 IX86_BUILTIN_PMOVMSKB128,
23800 IX86_BUILTIN_PACKSSWB128,
23801 IX86_BUILTIN_PACKSSDW128,
23802 IX86_BUILTIN_PACKUSWB128,
23804 IX86_BUILTIN_PADDB128,
23805 IX86_BUILTIN_PADDW128,
23806 IX86_BUILTIN_PADDD128,
23807 IX86_BUILTIN_PADDQ128,
23808 IX86_BUILTIN_PADDSB128,
23809 IX86_BUILTIN_PADDSW128,
23810 IX86_BUILTIN_PADDUSB128,
23811 IX86_BUILTIN_PADDUSW128,
23812 IX86_BUILTIN_PSUBB128,
23813 IX86_BUILTIN_PSUBW128,
23814 IX86_BUILTIN_PSUBD128,
23815 IX86_BUILTIN_PSUBQ128,
23816 IX86_BUILTIN_PSUBSB128,
23817 IX86_BUILTIN_PSUBSW128,
23818 IX86_BUILTIN_PSUBUSB128,
23819 IX86_BUILTIN_PSUBUSW128,
23821 IX86_BUILTIN_PAND128,
23822 IX86_BUILTIN_PANDN128,
23823 IX86_BUILTIN_POR128,
23824 IX86_BUILTIN_PXOR128,
23826 IX86_BUILTIN_PAVGB128,
23827 IX86_BUILTIN_PAVGW128,
23829 IX86_BUILTIN_PCMPEQB128,
23830 IX86_BUILTIN_PCMPEQW128,
23831 IX86_BUILTIN_PCMPEQD128,
23832 IX86_BUILTIN_PCMPGTB128,
23833 IX86_BUILTIN_PCMPGTW128,
23834 IX86_BUILTIN_PCMPGTD128,
23836 IX86_BUILTIN_PMADDWD128,
23838 IX86_BUILTIN_PMAXSW128,
23839 IX86_BUILTIN_PMAXUB128,
23840 IX86_BUILTIN_PMINSW128,
23841 IX86_BUILTIN_PMINUB128,
23843 IX86_BUILTIN_PMULUDQ,
23844 IX86_BUILTIN_PMULUDQ128,
23845 IX86_BUILTIN_PMULHUW128,
23846 IX86_BUILTIN_PMULHW128,
23847 IX86_BUILTIN_PMULLW128,
23849 IX86_BUILTIN_PSADBW128,
23850 IX86_BUILTIN_PSHUFHW,
23851 IX86_BUILTIN_PSHUFLW,
23852 IX86_BUILTIN_PSHUFD,
23854 IX86_BUILTIN_PSLLDQI128,
23855 IX86_BUILTIN_PSLLWI128,
23856 IX86_BUILTIN_PSLLDI128,
23857 IX86_BUILTIN_PSLLQI128,
23858 IX86_BUILTIN_PSRAWI128,
23859 IX86_BUILTIN_PSRADI128,
23860 IX86_BUILTIN_PSRLDQI128,
23861 IX86_BUILTIN_PSRLWI128,
23862 IX86_BUILTIN_PSRLDI128,
23863 IX86_BUILTIN_PSRLQI128,
23865 IX86_BUILTIN_PSLLDQ128,
23866 IX86_BUILTIN_PSLLW128,
23867 IX86_BUILTIN_PSLLD128,
23868 IX86_BUILTIN_PSLLQ128,
23869 IX86_BUILTIN_PSRAW128,
23870 IX86_BUILTIN_PSRAD128,
23871 IX86_BUILTIN_PSRLW128,
23872 IX86_BUILTIN_PSRLD128,
23873 IX86_BUILTIN_PSRLQ128,
23875 IX86_BUILTIN_PUNPCKHBW128,
23876 IX86_BUILTIN_PUNPCKHWD128,
23877 IX86_BUILTIN_PUNPCKHDQ128,
23878 IX86_BUILTIN_PUNPCKHQDQ128,
23879 IX86_BUILTIN_PUNPCKLBW128,
23880 IX86_BUILTIN_PUNPCKLWD128,
23881 IX86_BUILTIN_PUNPCKLDQ128,
23882 IX86_BUILTIN_PUNPCKLQDQ128,
23884 IX86_BUILTIN_CLFLUSH,
23885 IX86_BUILTIN_MFENCE,
23886 IX86_BUILTIN_LFENCE,
23888 IX86_BUILTIN_BSRSI,
23889 IX86_BUILTIN_BSRDI,
23890 IX86_BUILTIN_RDPMC,
23891 IX86_BUILTIN_RDTSC,
23892 IX86_BUILTIN_RDTSCP,
23893 IX86_BUILTIN_ROLQI,
23894 IX86_BUILTIN_ROLHI,
23895 IX86_BUILTIN_RORQI,
23896 IX86_BUILTIN_RORHI,
23899 IX86_BUILTIN_ADDSUBPS,
23900 IX86_BUILTIN_HADDPS,
23901 IX86_BUILTIN_HSUBPS,
23902 IX86_BUILTIN_MOVSHDUP,
23903 IX86_BUILTIN_MOVSLDUP,
23904 IX86_BUILTIN_ADDSUBPD,
23905 IX86_BUILTIN_HADDPD,
23906 IX86_BUILTIN_HSUBPD,
23907 IX86_BUILTIN_LDDQU,
23909 IX86_BUILTIN_MONITOR,
23910 IX86_BUILTIN_MWAIT,
23913 IX86_BUILTIN_PHADDW,
23914 IX86_BUILTIN_PHADDD,
23915 IX86_BUILTIN_PHADDSW,
23916 IX86_BUILTIN_PHSUBW,
23917 IX86_BUILTIN_PHSUBD,
23918 IX86_BUILTIN_PHSUBSW,
23919 IX86_BUILTIN_PMADDUBSW,
23920 IX86_BUILTIN_PMULHRSW,
23921 IX86_BUILTIN_PSHUFB,
23922 IX86_BUILTIN_PSIGNB,
23923 IX86_BUILTIN_PSIGNW,
23924 IX86_BUILTIN_PSIGND,
23925 IX86_BUILTIN_PALIGNR,
23926 IX86_BUILTIN_PABSB,
23927 IX86_BUILTIN_PABSW,
23928 IX86_BUILTIN_PABSD,
23930 IX86_BUILTIN_PHADDW128,
23931 IX86_BUILTIN_PHADDD128,
23932 IX86_BUILTIN_PHADDSW128,
23933 IX86_BUILTIN_PHSUBW128,
23934 IX86_BUILTIN_PHSUBD128,
23935 IX86_BUILTIN_PHSUBSW128,
23936 IX86_BUILTIN_PMADDUBSW128,
23937 IX86_BUILTIN_PMULHRSW128,
23938 IX86_BUILTIN_PSHUFB128,
23939 IX86_BUILTIN_PSIGNB128,
23940 IX86_BUILTIN_PSIGNW128,
23941 IX86_BUILTIN_PSIGND128,
23942 IX86_BUILTIN_PALIGNR128,
23943 IX86_BUILTIN_PABSB128,
23944 IX86_BUILTIN_PABSW128,
23945 IX86_BUILTIN_PABSD128,
23947 /* AMDFAM10 - SSE4A New Instructions. */
23948 IX86_BUILTIN_MOVNTSD,
23949 IX86_BUILTIN_MOVNTSS,
23950 IX86_BUILTIN_EXTRQI,
23951 IX86_BUILTIN_EXTRQ,
23952 IX86_BUILTIN_INSERTQI,
23953 IX86_BUILTIN_INSERTQ,
23956 IX86_BUILTIN_BLENDPD,
23957 IX86_BUILTIN_BLENDPS,
23958 IX86_BUILTIN_BLENDVPD,
23959 IX86_BUILTIN_BLENDVPS,
23960 IX86_BUILTIN_PBLENDVB128,
23961 IX86_BUILTIN_PBLENDW128,
23966 IX86_BUILTIN_INSERTPS128,
23968 IX86_BUILTIN_MOVNTDQA,
23969 IX86_BUILTIN_MPSADBW128,
23970 IX86_BUILTIN_PACKUSDW128,
23971 IX86_BUILTIN_PCMPEQQ,
23972 IX86_BUILTIN_PHMINPOSUW128,
23974 IX86_BUILTIN_PMAXSB128,
23975 IX86_BUILTIN_PMAXSD128,
23976 IX86_BUILTIN_PMAXUD128,
23977 IX86_BUILTIN_PMAXUW128,
23979 IX86_BUILTIN_PMINSB128,
23980 IX86_BUILTIN_PMINSD128,
23981 IX86_BUILTIN_PMINUD128,
23982 IX86_BUILTIN_PMINUW128,
23984 IX86_BUILTIN_PMOVSXBW128,
23985 IX86_BUILTIN_PMOVSXBD128,
23986 IX86_BUILTIN_PMOVSXBQ128,
23987 IX86_BUILTIN_PMOVSXWD128,
23988 IX86_BUILTIN_PMOVSXWQ128,
23989 IX86_BUILTIN_PMOVSXDQ128,
23991 IX86_BUILTIN_PMOVZXBW128,
23992 IX86_BUILTIN_PMOVZXBD128,
23993 IX86_BUILTIN_PMOVZXBQ128,
23994 IX86_BUILTIN_PMOVZXWD128,
23995 IX86_BUILTIN_PMOVZXWQ128,
23996 IX86_BUILTIN_PMOVZXDQ128,
23998 IX86_BUILTIN_PMULDQ128,
23999 IX86_BUILTIN_PMULLD128,
24001 IX86_BUILTIN_ROUNDPD,
24002 IX86_BUILTIN_ROUNDPS,
24003 IX86_BUILTIN_ROUNDSD,
24004 IX86_BUILTIN_ROUNDSS,
24006 IX86_BUILTIN_FLOORPD,
24007 IX86_BUILTIN_CEILPD,
24008 IX86_BUILTIN_TRUNCPD,
24009 IX86_BUILTIN_RINTPD,
24010 IX86_BUILTIN_FLOORPS,
24011 IX86_BUILTIN_CEILPS,
24012 IX86_BUILTIN_TRUNCPS,
24013 IX86_BUILTIN_RINTPS,
24015 IX86_BUILTIN_PTESTZ,
24016 IX86_BUILTIN_PTESTC,
24017 IX86_BUILTIN_PTESTNZC,
24019 IX86_BUILTIN_VEC_INIT_V2SI,
24020 IX86_BUILTIN_VEC_INIT_V4HI,
24021 IX86_BUILTIN_VEC_INIT_V8QI,
24022 IX86_BUILTIN_VEC_EXT_V2DF,
24023 IX86_BUILTIN_VEC_EXT_V2DI,
24024 IX86_BUILTIN_VEC_EXT_V4SF,
24025 IX86_BUILTIN_VEC_EXT_V4SI,
24026 IX86_BUILTIN_VEC_EXT_V8HI,
24027 IX86_BUILTIN_VEC_EXT_V2SI,
24028 IX86_BUILTIN_VEC_EXT_V4HI,
24029 IX86_BUILTIN_VEC_EXT_V16QI,
24030 IX86_BUILTIN_VEC_SET_V2DI,
24031 IX86_BUILTIN_VEC_SET_V4SF,
24032 IX86_BUILTIN_VEC_SET_V4SI,
24033 IX86_BUILTIN_VEC_SET_V8HI,
24034 IX86_BUILTIN_VEC_SET_V4HI,
24035 IX86_BUILTIN_VEC_SET_V16QI,
24037 IX86_BUILTIN_VEC_PACK_SFIX,
24040 IX86_BUILTIN_CRC32QI,
24041 IX86_BUILTIN_CRC32HI,
24042 IX86_BUILTIN_CRC32SI,
24043 IX86_BUILTIN_CRC32DI,
24045 IX86_BUILTIN_PCMPESTRI128,
24046 IX86_BUILTIN_PCMPESTRM128,
24047 IX86_BUILTIN_PCMPESTRA128,
24048 IX86_BUILTIN_PCMPESTRC128,
24049 IX86_BUILTIN_PCMPESTRO128,
24050 IX86_BUILTIN_PCMPESTRS128,
24051 IX86_BUILTIN_PCMPESTRZ128,
24052 IX86_BUILTIN_PCMPISTRI128,
24053 IX86_BUILTIN_PCMPISTRM128,
24054 IX86_BUILTIN_PCMPISTRA128,
24055 IX86_BUILTIN_PCMPISTRC128,
24056 IX86_BUILTIN_PCMPISTRO128,
24057 IX86_BUILTIN_PCMPISTRS128,
24058 IX86_BUILTIN_PCMPISTRZ128,
24060 IX86_BUILTIN_PCMPGTQ,
24062 /* AES instructions */
24063 IX86_BUILTIN_AESENC128,
24064 IX86_BUILTIN_AESENCLAST128,
24065 IX86_BUILTIN_AESDEC128,
24066 IX86_BUILTIN_AESDECLAST128,
24067 IX86_BUILTIN_AESIMC128,
24068 IX86_BUILTIN_AESKEYGENASSIST128,
24070 /* PCLMUL instruction */
24071 IX86_BUILTIN_PCLMULQDQ128,
24074 IX86_BUILTIN_ADDPD256,
24075 IX86_BUILTIN_ADDPS256,
24076 IX86_BUILTIN_ADDSUBPD256,
24077 IX86_BUILTIN_ADDSUBPS256,
24078 IX86_BUILTIN_ANDPD256,
24079 IX86_BUILTIN_ANDPS256,
24080 IX86_BUILTIN_ANDNPD256,
24081 IX86_BUILTIN_ANDNPS256,
24082 IX86_BUILTIN_BLENDPD256,
24083 IX86_BUILTIN_BLENDPS256,
24084 IX86_BUILTIN_BLENDVPD256,
24085 IX86_BUILTIN_BLENDVPS256,
24086 IX86_BUILTIN_DIVPD256,
24087 IX86_BUILTIN_DIVPS256,
24088 IX86_BUILTIN_DPPS256,
24089 IX86_BUILTIN_HADDPD256,
24090 IX86_BUILTIN_HADDPS256,
24091 IX86_BUILTIN_HSUBPD256,
24092 IX86_BUILTIN_HSUBPS256,
24093 IX86_BUILTIN_MAXPD256,
24094 IX86_BUILTIN_MAXPS256,
24095 IX86_BUILTIN_MINPD256,
24096 IX86_BUILTIN_MINPS256,
24097 IX86_BUILTIN_MULPD256,
24098 IX86_BUILTIN_MULPS256,
24099 IX86_BUILTIN_ORPD256,
24100 IX86_BUILTIN_ORPS256,
24101 IX86_BUILTIN_SHUFPD256,
24102 IX86_BUILTIN_SHUFPS256,
24103 IX86_BUILTIN_SUBPD256,
24104 IX86_BUILTIN_SUBPS256,
24105 IX86_BUILTIN_XORPD256,
24106 IX86_BUILTIN_XORPS256,
24107 IX86_BUILTIN_CMPSD,
24108 IX86_BUILTIN_CMPSS,
24109 IX86_BUILTIN_CMPPD,
24110 IX86_BUILTIN_CMPPS,
24111 IX86_BUILTIN_CMPPD256,
24112 IX86_BUILTIN_CMPPS256,
24113 IX86_BUILTIN_CVTDQ2PD256,
24114 IX86_BUILTIN_CVTDQ2PS256,
24115 IX86_BUILTIN_CVTPD2PS256,
24116 IX86_BUILTIN_CVTPS2DQ256,
24117 IX86_BUILTIN_CVTPS2PD256,
24118 IX86_BUILTIN_CVTTPD2DQ256,
24119 IX86_BUILTIN_CVTPD2DQ256,
24120 IX86_BUILTIN_CVTTPS2DQ256,
24121 IX86_BUILTIN_EXTRACTF128PD256,
24122 IX86_BUILTIN_EXTRACTF128PS256,
24123 IX86_BUILTIN_EXTRACTF128SI256,
24124 IX86_BUILTIN_VZEROALL,
24125 IX86_BUILTIN_VZEROUPPER,
24126 IX86_BUILTIN_VPERMILVARPD,
24127 IX86_BUILTIN_VPERMILVARPS,
24128 IX86_BUILTIN_VPERMILVARPD256,
24129 IX86_BUILTIN_VPERMILVARPS256,
24130 IX86_BUILTIN_VPERMILPD,
24131 IX86_BUILTIN_VPERMILPS,
24132 IX86_BUILTIN_VPERMILPD256,
24133 IX86_BUILTIN_VPERMILPS256,
24134 IX86_BUILTIN_VPERMIL2PD,
24135 IX86_BUILTIN_VPERMIL2PS,
24136 IX86_BUILTIN_VPERMIL2PD256,
24137 IX86_BUILTIN_VPERMIL2PS256,
24138 IX86_BUILTIN_VPERM2F128PD256,
24139 IX86_BUILTIN_VPERM2F128PS256,
24140 IX86_BUILTIN_VPERM2F128SI256,
24141 IX86_BUILTIN_VBROADCASTSS,
24142 IX86_BUILTIN_VBROADCASTSD256,
24143 IX86_BUILTIN_VBROADCASTSS256,
24144 IX86_BUILTIN_VBROADCASTPD256,
24145 IX86_BUILTIN_VBROADCASTPS256,
24146 IX86_BUILTIN_VINSERTF128PD256,
24147 IX86_BUILTIN_VINSERTF128PS256,
24148 IX86_BUILTIN_VINSERTF128SI256,
24149 IX86_BUILTIN_LOADUPD256,
24150 IX86_BUILTIN_LOADUPS256,
24151 IX86_BUILTIN_STOREUPD256,
24152 IX86_BUILTIN_STOREUPS256,
24153 IX86_BUILTIN_LDDQU256,
24154 IX86_BUILTIN_MOVNTDQ256,
24155 IX86_BUILTIN_MOVNTPD256,
24156 IX86_BUILTIN_MOVNTPS256,
24157 IX86_BUILTIN_LOADDQU256,
24158 IX86_BUILTIN_STOREDQU256,
24159 IX86_BUILTIN_MASKLOADPD,
24160 IX86_BUILTIN_MASKLOADPS,
24161 IX86_BUILTIN_MASKSTOREPD,
24162 IX86_BUILTIN_MASKSTOREPS,
24163 IX86_BUILTIN_MASKLOADPD256,
24164 IX86_BUILTIN_MASKLOADPS256,
24165 IX86_BUILTIN_MASKSTOREPD256,
24166 IX86_BUILTIN_MASKSTOREPS256,
24167 IX86_BUILTIN_MOVSHDUP256,
24168 IX86_BUILTIN_MOVSLDUP256,
24169 IX86_BUILTIN_MOVDDUP256,
24171 IX86_BUILTIN_SQRTPD256,
24172 IX86_BUILTIN_SQRTPS256,
24173 IX86_BUILTIN_SQRTPS_NR256,
24174 IX86_BUILTIN_RSQRTPS256,
24175 IX86_BUILTIN_RSQRTPS_NR256,
24177 IX86_BUILTIN_RCPPS256,
24179 IX86_BUILTIN_ROUNDPD256,
24180 IX86_BUILTIN_ROUNDPS256,
24182 IX86_BUILTIN_FLOORPD256,
24183 IX86_BUILTIN_CEILPD256,
24184 IX86_BUILTIN_TRUNCPD256,
24185 IX86_BUILTIN_RINTPD256,
24186 IX86_BUILTIN_FLOORPS256,
24187 IX86_BUILTIN_CEILPS256,
24188 IX86_BUILTIN_TRUNCPS256,
24189 IX86_BUILTIN_RINTPS256,
24191 IX86_BUILTIN_UNPCKHPD256,
24192 IX86_BUILTIN_UNPCKLPD256,
24193 IX86_BUILTIN_UNPCKHPS256,
24194 IX86_BUILTIN_UNPCKLPS256,
24196 IX86_BUILTIN_SI256_SI,
24197 IX86_BUILTIN_PS256_PS,
24198 IX86_BUILTIN_PD256_PD,
24199 IX86_BUILTIN_SI_SI256,
24200 IX86_BUILTIN_PS_PS256,
24201 IX86_BUILTIN_PD_PD256,
24203 IX86_BUILTIN_VTESTZPD,
24204 IX86_BUILTIN_VTESTCPD,
24205 IX86_BUILTIN_VTESTNZCPD,
24206 IX86_BUILTIN_VTESTZPS,
24207 IX86_BUILTIN_VTESTCPS,
24208 IX86_BUILTIN_VTESTNZCPS,
24209 IX86_BUILTIN_VTESTZPD256,
24210 IX86_BUILTIN_VTESTCPD256,
24211 IX86_BUILTIN_VTESTNZCPD256,
24212 IX86_BUILTIN_VTESTZPS256,
24213 IX86_BUILTIN_VTESTCPS256,
24214 IX86_BUILTIN_VTESTNZCPS256,
24215 IX86_BUILTIN_PTESTZ256,
24216 IX86_BUILTIN_PTESTC256,
24217 IX86_BUILTIN_PTESTNZC256,
24219 IX86_BUILTIN_MOVMSKPD256,
24220 IX86_BUILTIN_MOVMSKPS256,
24222 /* TFmode support builtins. */
24224 IX86_BUILTIN_HUGE_VALQ,
24225 IX86_BUILTIN_FABSQ,
24226 IX86_BUILTIN_COPYSIGNQ,
24228 /* Vectorizer support builtins. */
24229 IX86_BUILTIN_CPYSGNPS,
24230 IX86_BUILTIN_CPYSGNPD,
24231 IX86_BUILTIN_CPYSGNPS256,
24232 IX86_BUILTIN_CPYSGNPD256,
24234 IX86_BUILTIN_CVTUDQ2PS,
24236 IX86_BUILTIN_VEC_PERM_V2DF,
24237 IX86_BUILTIN_VEC_PERM_V4SF,
24238 IX86_BUILTIN_VEC_PERM_V2DI,
24239 IX86_BUILTIN_VEC_PERM_V4SI,
24240 IX86_BUILTIN_VEC_PERM_V8HI,
24241 IX86_BUILTIN_VEC_PERM_V16QI,
24242 IX86_BUILTIN_VEC_PERM_V2DI_U,
24243 IX86_BUILTIN_VEC_PERM_V4SI_U,
24244 IX86_BUILTIN_VEC_PERM_V8HI_U,
24245 IX86_BUILTIN_VEC_PERM_V16QI_U,
24246 IX86_BUILTIN_VEC_PERM_V4DF,
24247 IX86_BUILTIN_VEC_PERM_V8SF,
24249 /* FMA4 and XOP instructions. */
24250 IX86_BUILTIN_VFMADDSS,
24251 IX86_BUILTIN_VFMADDSD,
24252 IX86_BUILTIN_VFMADDPS,
24253 IX86_BUILTIN_VFMADDPD,
24254 IX86_BUILTIN_VFMADDPS256,
24255 IX86_BUILTIN_VFMADDPD256,
24256 IX86_BUILTIN_VFMADDSUBPS,
24257 IX86_BUILTIN_VFMADDSUBPD,
24258 IX86_BUILTIN_VFMADDSUBPS256,
24259 IX86_BUILTIN_VFMADDSUBPD256,
24261 IX86_BUILTIN_VPCMOV,
24262 IX86_BUILTIN_VPCMOV_V2DI,
24263 IX86_BUILTIN_VPCMOV_V4SI,
24264 IX86_BUILTIN_VPCMOV_V8HI,
24265 IX86_BUILTIN_VPCMOV_V16QI,
24266 IX86_BUILTIN_VPCMOV_V4SF,
24267 IX86_BUILTIN_VPCMOV_V2DF,
24268 IX86_BUILTIN_VPCMOV256,
24269 IX86_BUILTIN_VPCMOV_V4DI256,
24270 IX86_BUILTIN_VPCMOV_V8SI256,
24271 IX86_BUILTIN_VPCMOV_V16HI256,
24272 IX86_BUILTIN_VPCMOV_V32QI256,
24273 IX86_BUILTIN_VPCMOV_V8SF256,
24274 IX86_BUILTIN_VPCMOV_V4DF256,
24276 IX86_BUILTIN_VPPERM,
24278 IX86_BUILTIN_VPMACSSWW,
24279 IX86_BUILTIN_VPMACSWW,
24280 IX86_BUILTIN_VPMACSSWD,
24281 IX86_BUILTIN_VPMACSWD,
24282 IX86_BUILTIN_VPMACSSDD,
24283 IX86_BUILTIN_VPMACSDD,
24284 IX86_BUILTIN_VPMACSSDQL,
24285 IX86_BUILTIN_VPMACSSDQH,
24286 IX86_BUILTIN_VPMACSDQL,
24287 IX86_BUILTIN_VPMACSDQH,
24288 IX86_BUILTIN_VPMADCSSWD,
24289 IX86_BUILTIN_VPMADCSWD,
24291 IX86_BUILTIN_VPHADDBW,
24292 IX86_BUILTIN_VPHADDBD,
24293 IX86_BUILTIN_VPHADDBQ,
24294 IX86_BUILTIN_VPHADDWD,
24295 IX86_BUILTIN_VPHADDWQ,
24296 IX86_BUILTIN_VPHADDDQ,
24297 IX86_BUILTIN_VPHADDUBW,
24298 IX86_BUILTIN_VPHADDUBD,
24299 IX86_BUILTIN_VPHADDUBQ,
24300 IX86_BUILTIN_VPHADDUWD,
24301 IX86_BUILTIN_VPHADDUWQ,
24302 IX86_BUILTIN_VPHADDUDQ,
24303 IX86_BUILTIN_VPHSUBBW,
24304 IX86_BUILTIN_VPHSUBWD,
24305 IX86_BUILTIN_VPHSUBDQ,
24307 IX86_BUILTIN_VPROTB,
24308 IX86_BUILTIN_VPROTW,
24309 IX86_BUILTIN_VPROTD,
24310 IX86_BUILTIN_VPROTQ,
24311 IX86_BUILTIN_VPROTB_IMM,
24312 IX86_BUILTIN_VPROTW_IMM,
24313 IX86_BUILTIN_VPROTD_IMM,
24314 IX86_BUILTIN_VPROTQ_IMM,
24316 IX86_BUILTIN_VPSHLB,
24317 IX86_BUILTIN_VPSHLW,
24318 IX86_BUILTIN_VPSHLD,
24319 IX86_BUILTIN_VPSHLQ,
24320 IX86_BUILTIN_VPSHAB,
24321 IX86_BUILTIN_VPSHAW,
24322 IX86_BUILTIN_VPSHAD,
24323 IX86_BUILTIN_VPSHAQ,
24325 IX86_BUILTIN_VFRCZSS,
24326 IX86_BUILTIN_VFRCZSD,
24327 IX86_BUILTIN_VFRCZPS,
24328 IX86_BUILTIN_VFRCZPD,
24329 IX86_BUILTIN_VFRCZPS256,
24330 IX86_BUILTIN_VFRCZPD256,
24332 IX86_BUILTIN_VPCOMEQUB,
24333 IX86_BUILTIN_VPCOMNEUB,
24334 IX86_BUILTIN_VPCOMLTUB,
24335 IX86_BUILTIN_VPCOMLEUB,
24336 IX86_BUILTIN_VPCOMGTUB,
24337 IX86_BUILTIN_VPCOMGEUB,
24338 IX86_BUILTIN_VPCOMFALSEUB,
24339 IX86_BUILTIN_VPCOMTRUEUB,
24341 IX86_BUILTIN_VPCOMEQUW,
24342 IX86_BUILTIN_VPCOMNEUW,
24343 IX86_BUILTIN_VPCOMLTUW,
24344 IX86_BUILTIN_VPCOMLEUW,
24345 IX86_BUILTIN_VPCOMGTUW,
24346 IX86_BUILTIN_VPCOMGEUW,
24347 IX86_BUILTIN_VPCOMFALSEUW,
24348 IX86_BUILTIN_VPCOMTRUEUW,
24350 IX86_BUILTIN_VPCOMEQUD,
24351 IX86_BUILTIN_VPCOMNEUD,
24352 IX86_BUILTIN_VPCOMLTUD,
24353 IX86_BUILTIN_VPCOMLEUD,
24354 IX86_BUILTIN_VPCOMGTUD,
24355 IX86_BUILTIN_VPCOMGEUD,
24356 IX86_BUILTIN_VPCOMFALSEUD,
24357 IX86_BUILTIN_VPCOMTRUEUD,
24359 IX86_BUILTIN_VPCOMEQUQ,
24360 IX86_BUILTIN_VPCOMNEUQ,
24361 IX86_BUILTIN_VPCOMLTUQ,
24362 IX86_BUILTIN_VPCOMLEUQ,
24363 IX86_BUILTIN_VPCOMGTUQ,
24364 IX86_BUILTIN_VPCOMGEUQ,
24365 IX86_BUILTIN_VPCOMFALSEUQ,
24366 IX86_BUILTIN_VPCOMTRUEUQ,
24368 IX86_BUILTIN_VPCOMEQB,
24369 IX86_BUILTIN_VPCOMNEB,
24370 IX86_BUILTIN_VPCOMLTB,
24371 IX86_BUILTIN_VPCOMLEB,
24372 IX86_BUILTIN_VPCOMGTB,
24373 IX86_BUILTIN_VPCOMGEB,
24374 IX86_BUILTIN_VPCOMFALSEB,
24375 IX86_BUILTIN_VPCOMTRUEB,
24377 IX86_BUILTIN_VPCOMEQW,
24378 IX86_BUILTIN_VPCOMNEW,
24379 IX86_BUILTIN_VPCOMLTW,
24380 IX86_BUILTIN_VPCOMLEW,
24381 IX86_BUILTIN_VPCOMGTW,
24382 IX86_BUILTIN_VPCOMGEW,
24383 IX86_BUILTIN_VPCOMFALSEW,
24384 IX86_BUILTIN_VPCOMTRUEW,
24386 IX86_BUILTIN_VPCOMEQD,
24387 IX86_BUILTIN_VPCOMNED,
24388 IX86_BUILTIN_VPCOMLTD,
24389 IX86_BUILTIN_VPCOMLED,
24390 IX86_BUILTIN_VPCOMGTD,
24391 IX86_BUILTIN_VPCOMGED,
24392 IX86_BUILTIN_VPCOMFALSED,
24393 IX86_BUILTIN_VPCOMTRUED,
24395 IX86_BUILTIN_VPCOMEQQ,
24396 IX86_BUILTIN_VPCOMNEQ,
24397 IX86_BUILTIN_VPCOMLTQ,
24398 IX86_BUILTIN_VPCOMLEQ,
24399 IX86_BUILTIN_VPCOMGTQ,
24400 IX86_BUILTIN_VPCOMGEQ,
24401 IX86_BUILTIN_VPCOMFALSEQ,
24402 IX86_BUILTIN_VPCOMTRUEQ,
24404 /* LWP instructions. */
24405 IX86_BUILTIN_LLWPCB,
24406 IX86_BUILTIN_SLWPCB,
24407 IX86_BUILTIN_LWPVAL32,
24408 IX86_BUILTIN_LWPVAL64,
24409 IX86_BUILTIN_LWPINS32,
24410 IX86_BUILTIN_LWPINS64,
24414 /* BMI instructions. */
24415 IX86_BUILTIN_BEXTR32,
24416 IX86_BUILTIN_BEXTR64,
24419 /* TBM instructions. */
24420 IX86_BUILTIN_BEXTRI32,
24421 IX86_BUILTIN_BEXTRI64,
24424 /* FSGSBASE instructions. */
24425 IX86_BUILTIN_RDFSBASE32,
24426 IX86_BUILTIN_RDFSBASE64,
24427 IX86_BUILTIN_RDGSBASE32,
24428 IX86_BUILTIN_RDGSBASE64,
24429 IX86_BUILTIN_WRFSBASE32,
24430 IX86_BUILTIN_WRFSBASE64,
24431 IX86_BUILTIN_WRGSBASE32,
24432 IX86_BUILTIN_WRGSBASE64,
24434 /* RDRND instructions. */
24435 IX86_BUILTIN_RDRAND16_STEP,
24436 IX86_BUILTIN_RDRAND32_STEP,
24437 IX86_BUILTIN_RDRAND64_STEP,
24439 /* F16C instructions. */
24440 IX86_BUILTIN_CVTPH2PS,
24441 IX86_BUILTIN_CVTPH2PS256,
24442 IX86_BUILTIN_CVTPS2PH,
24443 IX86_BUILTIN_CVTPS2PH256,
24445 /* CFString built-in for darwin */
24446 IX86_BUILTIN_CFSTRING,
24451 /* Table for the ix86 builtin decls. */
24452 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24454 /* Table of all of the builtin functions that are possible with different ISA's
24455 but are waiting to be built until a function is declared to use that
24457 struct builtin_isa {
24458 const char *name; /* function name */
24459 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24460 int isa; /* isa_flags this builtin is defined for */
24461 bool const_p; /* true if the declaration is constant */
24462 bool set_and_not_built_p;
24465 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24468 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24469 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24470 function decl in the ix86_builtins array. Returns the function decl or
24471 NULL_TREE, if the builtin was not added.
24473 If the front end has a special hook for builtin functions, delay adding
24474 builtin functions that aren't in the current ISA until the ISA is changed
24475 with function specific optimization. Doing so, can save about 300K for the
24476 default compiler. When the builtin is expanded, check at that time whether
24479 If the front end doesn't have a special hook, record all builtins, even if
24480 it isn't an instruction set in the current ISA in case the user uses
24481 function specific options for a different ISA, so that we don't get scope
24482 errors if a builtin is added in the middle of a function scope. */
24485 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
24486 enum ix86_builtins code)
24488 tree decl = NULL_TREE;
24490 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24492 ix86_builtins_isa[(int) code].isa = mask;
24494 mask &= ~OPTION_MASK_ISA_64BIT;
24496 || (mask & ix86_isa_flags) != 0
24497 || (lang_hooks.builtin_function
24498 == lang_hooks.builtin_function_ext_scope))
24501 tree type = ix86_get_builtin_func_type (tcode);
24502 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24504 ix86_builtins[(int) code] = decl;
24505 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24509 ix86_builtins[(int) code] = NULL_TREE;
24510 ix86_builtins_isa[(int) code].tcode = tcode;
24511 ix86_builtins_isa[(int) code].name = name;
24512 ix86_builtins_isa[(int) code].const_p = false;
24513 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24520 /* Like def_builtin, but also marks the function decl "const". */
24523 def_builtin_const (int mask, const char *name,
24524 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24526 tree decl = def_builtin (mask, name, tcode, code);
24528 TREE_READONLY (decl) = 1;
24530 ix86_builtins_isa[(int) code].const_p = true;
24535 /* Add any new builtin functions for a given ISA that may not have been
24536 declared. This saves a bit of space compared to adding all of the
24537 declarations to the tree, even if we didn't use them. */
24540 ix86_add_new_builtins (int isa)
24544 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24546 if ((ix86_builtins_isa[i].isa & isa) != 0
24547 && ix86_builtins_isa[i].set_and_not_built_p)
24551 /* Don't define the builtin again. */
24552 ix86_builtins_isa[i].set_and_not_built_p = false;
24554 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24555 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24556 type, i, BUILT_IN_MD, NULL,
24559 ix86_builtins[i] = decl;
24560 if (ix86_builtins_isa[i].const_p)
24561 TREE_READONLY (decl) = 1;
24566 /* Bits for builtin_description.flag. */
24568 /* Set when we don't support the comparison natively, and should
24569 swap_comparison in order to support it. */
24570 #define BUILTIN_DESC_SWAP_OPERANDS 1
24572 struct builtin_description
24574 const unsigned int mask;
24575 const enum insn_code icode;
24576 const char *const name;
24577 const enum ix86_builtins code;
24578 const enum rtx_code comparison;
24582 static const struct builtin_description bdesc_comi[] =
24584 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24585 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24586 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24587 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24588 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24589 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24590 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24591 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24592 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24593 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24594 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24595 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24610 static const struct builtin_description bdesc_pcmpestr[] =
24613 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24614 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24615 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24616 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24617 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24618 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24619 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24622 static const struct builtin_description bdesc_pcmpistr[] =
24625 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24626 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24627 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24628 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24629 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24630 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24631 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24634 /* Special builtins with variable number of arguments. */
24635 static const struct builtin_description bdesc_special_args[] =
24637 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24638 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24641 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24644 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24647 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24648 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24649 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24651 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24652 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24653 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24654 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24656 /* SSE or 3DNow!A */
24657 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24658 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24663 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24664 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24665 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24666 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24667 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24668 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24669 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24675 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24678 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24681 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24682 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24686 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24688 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24689 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24690 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
24707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
24708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
24709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
24710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
24711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
24712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
24713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
24715 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24716 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24717 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24718 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24719 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24720 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24723 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24724 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24725 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24726 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24727 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24728 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24729 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24730 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24733 /* Builtins with variable number of arguments. */
24734 static const struct builtin_description bdesc_args[] =
24736 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24737 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24738 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24739 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24740 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24741 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24742 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24745 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24746 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24747 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24748 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24749 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24750 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24752 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24753 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24754 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24755 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24756 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24757 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24758 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24759 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24761 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24762 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24764 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24765 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24766 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24767 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24769 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24770 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24771 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24772 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24773 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24774 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24776 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24777 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24778 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24779 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24780 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24781 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24783 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24784 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24785 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24787 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24789 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24790 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24791 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24792 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24793 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24794 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24796 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24797 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24798 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24799 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24800 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24801 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24803 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24804 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24805 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24806 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24809 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24810 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24811 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24812 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24814 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24815 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24816 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24817 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24818 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24819 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24820 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24821 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24822 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24823 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24824 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24825 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24826 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24827 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24828 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24831 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24832 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24833 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24834 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24835 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24836 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24839 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24840 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24841 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24842 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24843 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24844 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24845 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24846 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24847 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24848 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24849 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24850 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24852 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24854 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24855 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24856 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24857 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24858 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24859 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24860 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24861 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24863 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24864 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24865 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24866 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24867 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24868 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24869 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24870 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24871 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24872 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24873 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24874 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24875 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24876 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24877 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24878 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24879 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24880 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24881 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24882 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24883 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24884 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24886 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24887 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24888 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24889 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24891 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24892 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24893 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24894 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24896 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24898 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24899 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24900 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24901 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24902 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24904 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24905 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24906 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24908 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24910 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24911 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24912 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24914 /* SSE MMX or 3Dnow!A */
24915 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24916 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24917 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24919 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24920 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24921 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24922 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24924 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24925 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24927 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24930 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24932 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24933 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24934 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24935 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24936 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24937 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24938 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24939 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24940 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24941 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24942 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24943 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24945 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24962 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24963 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24966 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24969 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24970 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24971 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24972 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24980 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24981 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24982 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
24983 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24984 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24985 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24986 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24987 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24989 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24993 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25000 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25004 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25006 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25007 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25009 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25012 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25013 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25015 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
25017 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25018 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25019 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25020 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25021 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25022 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25023 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25024 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25032 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25035 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25036 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
25038 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25040 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25041 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25051 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25053 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25054 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25055 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25056 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25058 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25059 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25060 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25061 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25062 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25063 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25064 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25065 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25068 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25069 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25071 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25072 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
25074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
25075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
25079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
25080 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
25081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
25082 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
25084 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25085 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25086 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25087 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25088 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25089 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25090 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25092 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25093 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25094 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25095 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25096 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25097 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25098 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25100 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25101 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25102 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25103 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
25106 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25109 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
25111 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
25112 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
25114 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25117 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25118 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25121 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
25122 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25124 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25125 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25126 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25127 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25128 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25129 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25132 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
25133 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
25134 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25135 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
25136 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
25137 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
25139 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25140 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25141 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25142 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25143 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25144 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25145 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25146 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25147 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25148 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25149 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25150 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25151 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
25152 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
25153 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25154 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25155 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25156 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25157 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25158 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25159 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25160 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25161 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25162 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25165 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
25166 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
25169 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25170 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25171 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
25172 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
25173 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25174 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25175 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25176 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
25177 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
25178 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
25180 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25181 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25182 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25183 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25184 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25185 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25186 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25187 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25188 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25189 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25190 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25191 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25192 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25194 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25195 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25196 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25197 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25198 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25199 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25200 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25201 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25202 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25203 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25204 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25205 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25208 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25209 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25210 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25211 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25213 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
25214 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
25215 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
25216 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
25218 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
25219 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
25220 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
25221 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
25223 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25224 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25225 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25228 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25229 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
25230 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
25231 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25232 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25235 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
25236 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
25237 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
25238 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25241 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
25242 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25244 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25245 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25246 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25247 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25250 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
25253 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25254 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25255 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25256 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25257 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25258 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25259 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25260 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25261 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25262 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25263 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25264 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25265 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25266 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25267 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25268 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25269 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25270 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25271 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25272 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25273 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25274 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25275 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25276 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25277 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25278 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25280 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
25281 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
25282 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
25283 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
25285 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25286 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25287 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
25288 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
25289 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25290 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25291 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25292 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25293 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25294 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25295 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25296 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25297 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25298 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
25299 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
25300 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
25301 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
25302 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
25303 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
25304 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25305 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
25306 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25307 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25308 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25309 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25310 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25311 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
25312 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25313 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25314 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25315 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25316 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
25317 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
25318 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
25320 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25321 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25322 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25324 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25325 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25326 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25327 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25328 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25330 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25332 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25333 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25335 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
25336 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
25337 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
25338 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
25340 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
25341 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
25342 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
25343 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
25345 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25346 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25347 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25348 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25350 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
25351 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
25352 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
25353 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
25354 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
25355 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
25357 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25358 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25359 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25360 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25361 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25362 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25363 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25364 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25365 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25366 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25367 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25368 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25369 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25370 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25371 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25373 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
25374 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
25376 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25377 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25379 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25382 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25383 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25384 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25387 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25388 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25391 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
25392 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
25393 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
25394 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
25397 /* FMA4 and XOP. */
25398 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
25399 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
25400 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
25401 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
25402 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
25403 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
25404 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
25405 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
25406 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
25407 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
25408 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
25409 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
25410 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
25411 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
25412 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
25413 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
25414 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
25415 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
25416 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
25417 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
25418 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
25419 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
25420 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
25421 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
25422 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
25423 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
25424 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
25425 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
25426 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
25427 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
25428 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
25429 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
25430 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
25431 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
25432 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
25433 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25434 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25435 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25436 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25437 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25438 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25439 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25440 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25441 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25442 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25443 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25444 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25445 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25446 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25447 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25448 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25449 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25451 static const struct builtin_description bdesc_multi_arg[] =
25453 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25454 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25455 UNKNOWN, (int)MULTI_ARG_3_SF },
25456 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25457 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25458 UNKNOWN, (int)MULTI_ARG_3_DF },
25460 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
25461 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
25462 UNKNOWN, (int)MULTI_ARG_3_SF },
25463 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
25464 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
25465 UNKNOWN, (int)MULTI_ARG_3_DF },
25466 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
25467 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
25468 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25469 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
25470 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
25471 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25473 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
25474 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
25475 UNKNOWN, (int)MULTI_ARG_3_SF },
25476 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
25477 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
25478 UNKNOWN, (int)MULTI_ARG_3_DF },
25479 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
25480 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
25481 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25482 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
25483 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
25484 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
25487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
25488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
25489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
25490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
25491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
25492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
25519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
25520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
25521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
25522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25623 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25625 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25626 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25628 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25630 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25631 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25632 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25633 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25634 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25635 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25636 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25637 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25639 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25640 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25641 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25642 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25646 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25647 in the current target ISA to allow the user to compile particular modules
25648 with different target specific options that differ from the command line
25651 ix86_init_mmx_sse_builtins (void)
25653 const struct builtin_description * d;
25654 enum ix86_builtin_func_type ftype;
25657 /* Add all special builtins with variable number of operands. */
25658 for (i = 0, d = bdesc_special_args;
25659 i < ARRAY_SIZE (bdesc_special_args);
25665 ftype = (enum ix86_builtin_func_type) d->flag;
25666 def_builtin (d->mask, d->name, ftype, d->code);
25669 /* Add all builtins with variable number of operands. */
25670 for (i = 0, d = bdesc_args;
25671 i < ARRAY_SIZE (bdesc_args);
25677 ftype = (enum ix86_builtin_func_type) d->flag;
25678 def_builtin_const (d->mask, d->name, ftype, d->code);
25681 /* pcmpestr[im] insns. */
25682 for (i = 0, d = bdesc_pcmpestr;
25683 i < ARRAY_SIZE (bdesc_pcmpestr);
25686 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25687 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25689 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25690 def_builtin_const (d->mask, d->name, ftype, d->code);
25693 /* pcmpistr[im] insns. */
25694 for (i = 0, d = bdesc_pcmpistr;
25695 i < ARRAY_SIZE (bdesc_pcmpistr);
25698 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25699 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25701 ftype = INT_FTYPE_V16QI_V16QI_INT;
25702 def_builtin_const (d->mask, d->name, ftype, d->code);
25705 /* comi/ucomi insns. */
25706 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25708 if (d->mask == OPTION_MASK_ISA_SSE2)
25709 ftype = INT_FTYPE_V2DF_V2DF;
25711 ftype = INT_FTYPE_V4SF_V4SF;
25712 def_builtin_const (d->mask, d->name, ftype, d->code);
25716 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25717 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25718 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25719 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25721 /* SSE or 3DNow!A */
25722 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25723 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25724 IX86_BUILTIN_MASKMOVQ);
25727 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25728 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25730 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25731 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25732 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25733 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25736 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25737 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25738 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25739 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25742 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25743 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25744 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25745 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25746 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25747 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25748 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25749 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25750 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25751 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25752 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25753 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25756 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25757 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25760 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
25761 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
25762 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
25763 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
25764 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
25765 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
25766 IX86_BUILTIN_RDRAND64_STEP);
25768 /* MMX access to the vec_init patterns. */
25769 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25770 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25772 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25773 V4HI_FTYPE_HI_HI_HI_HI,
25774 IX86_BUILTIN_VEC_INIT_V4HI);
25776 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25777 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25778 IX86_BUILTIN_VEC_INIT_V8QI);
25780 /* Access to the vec_extract patterns. */
25781 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25782 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25783 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25784 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25785 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25786 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25787 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25788 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25789 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25790 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25792 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25793 "__builtin_ia32_vec_ext_v4hi",
25794 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25796 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25797 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25799 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25800 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25802 /* Access to the vec_set patterns. */
25803 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25804 "__builtin_ia32_vec_set_v2di",
25805 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25807 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25808 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25810 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25811 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25813 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25814 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25816 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25817 "__builtin_ia32_vec_set_v4hi",
25818 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25820 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25821 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25823 /* Add FMA4 multi-arg argument instructions */
25824 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25829 ftype = (enum ix86_builtin_func_type) d->flag;
25830 def_builtin_const (d->mask, d->name, ftype, d->code);
25834 /* Internal method for ix86_init_builtins. */
25837 ix86_init_builtins_va_builtins_abi (void)
25839 tree ms_va_ref, sysv_va_ref;
25840 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25841 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25842 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25843 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25847 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25848 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25849 ms_va_ref = build_reference_type (ms_va_list_type_node);
25851 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25854 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25855 fnvoid_va_start_ms =
25856 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25857 fnvoid_va_end_sysv =
25858 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25859 fnvoid_va_start_sysv =
25860 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25862 fnvoid_va_copy_ms =
25863 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25865 fnvoid_va_copy_sysv =
25866 build_function_type_list (void_type_node, sysv_va_ref,
25867 sysv_va_ref, NULL_TREE);
25869 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25870 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25871 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25872 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25873 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25874 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25875 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25876 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25877 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25878 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25879 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25880 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25884 ix86_init_builtin_types (void)
25886 tree float128_type_node, float80_type_node;
25888 /* The __float80 type. */
25889 float80_type_node = long_double_type_node;
25890 if (TYPE_MODE (float80_type_node) != XFmode)
25892 /* The __float80 type. */
25893 float80_type_node = make_node (REAL_TYPE);
25895 TYPE_PRECISION (float80_type_node) = 80;
25896 layout_type (float80_type_node);
25898 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25900 /* The __float128 type. */
25901 float128_type_node = make_node (REAL_TYPE);
25902 TYPE_PRECISION (float128_type_node) = 128;
25903 layout_type (float128_type_node);
25904 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25906 /* This macro is built by i386-builtin-types.awk. */
25907 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25911 ix86_init_builtins (void)
25915 ix86_init_builtin_types ();
25917 /* TFmode support builtins. */
25918 def_builtin_const (0, "__builtin_infq",
25919 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25920 def_builtin_const (0, "__builtin_huge_valq",
25921 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25923 /* We will expand them to normal call if SSE2 isn't available since
25924 they are used by libgcc. */
25925 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25926 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25927 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25928 TREE_READONLY (t) = 1;
25929 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25931 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25932 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25933 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25934 TREE_READONLY (t) = 1;
25935 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25937 ix86_init_mmx_sse_builtins ();
25940 ix86_init_builtins_va_builtins_abi ();
25942 #ifdef SUBTARGET_INIT_BUILTINS
25943 SUBTARGET_INIT_BUILTINS;
25947 /* Return the ix86 builtin for CODE. */
25950 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25952 if (code >= IX86_BUILTIN_MAX)
25953 return error_mark_node;
25955 return ix86_builtins[code];
25958 /* Errors in the source file can cause expand_expr to return const0_rtx
25959 where we expect a vector. To avoid crashing, use one of the vector
25960 clear instructions. */
25962 safe_vector_operand (rtx x, enum machine_mode mode)
25964 if (x == const0_rtx)
25965 x = CONST0_RTX (mode);
25969 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25972 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
25975 tree arg0 = CALL_EXPR_ARG (exp, 0);
25976 tree arg1 = CALL_EXPR_ARG (exp, 1);
25977 rtx op0 = expand_normal (arg0);
25978 rtx op1 = expand_normal (arg1);
25979 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25980 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25981 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25983 if (VECTOR_MODE_P (mode0))
25984 op0 = safe_vector_operand (op0, mode0);
25985 if (VECTOR_MODE_P (mode1))
25986 op1 = safe_vector_operand (op1, mode1);
25988 if (optimize || !target
25989 || GET_MODE (target) != tmode
25990 || !insn_data[icode].operand[0].predicate (target, tmode))
25991 target = gen_reg_rtx (tmode);
25993 if (GET_MODE (op1) == SImode && mode1 == TImode)
25995 rtx x = gen_reg_rtx (V4SImode);
25996 emit_insn (gen_sse2_loadd (x, op1));
25997 op1 = gen_lowpart (TImode, x);
26000 if (!insn_data[icode].operand[1].predicate (op0, mode0))
26001 op0 = copy_to_mode_reg (mode0, op0);
26002 if (!insn_data[icode].operand[2].predicate (op1, mode1))
26003 op1 = copy_to_mode_reg (mode1, op1);
26005 pat = GEN_FCN (icode) (target, op0, op1);
26014 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
26017 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
26018 enum ix86_builtin_func_type m_type,
26019 enum rtx_code sub_code)
26024 bool comparison_p = false;
26026 bool last_arg_constant = false;
26027 int num_memory = 0;
26030 enum machine_mode mode;
26033 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26037 case MULTI_ARG_4_DF2_DI_I:
26038 case MULTI_ARG_4_DF2_DI_I1:
26039 case MULTI_ARG_4_SF2_SI_I:
26040 case MULTI_ARG_4_SF2_SI_I1:
26042 last_arg_constant = true;
26045 case MULTI_ARG_3_SF:
26046 case MULTI_ARG_3_DF:
26047 case MULTI_ARG_3_SF2:
26048 case MULTI_ARG_3_DF2:
26049 case MULTI_ARG_3_DI:
26050 case MULTI_ARG_3_SI:
26051 case MULTI_ARG_3_SI_DI:
26052 case MULTI_ARG_3_HI:
26053 case MULTI_ARG_3_HI_SI:
26054 case MULTI_ARG_3_QI:
26055 case MULTI_ARG_3_DI2:
26056 case MULTI_ARG_3_SI2:
26057 case MULTI_ARG_3_HI2:
26058 case MULTI_ARG_3_QI2:
26062 case MULTI_ARG_2_SF:
26063 case MULTI_ARG_2_DF:
26064 case MULTI_ARG_2_DI:
26065 case MULTI_ARG_2_SI:
26066 case MULTI_ARG_2_HI:
26067 case MULTI_ARG_2_QI:
26071 case MULTI_ARG_2_DI_IMM:
26072 case MULTI_ARG_2_SI_IMM:
26073 case MULTI_ARG_2_HI_IMM:
26074 case MULTI_ARG_2_QI_IMM:
26076 last_arg_constant = true;
26079 case MULTI_ARG_1_SF:
26080 case MULTI_ARG_1_DF:
26081 case MULTI_ARG_1_SF2:
26082 case MULTI_ARG_1_DF2:
26083 case MULTI_ARG_1_DI:
26084 case MULTI_ARG_1_SI:
26085 case MULTI_ARG_1_HI:
26086 case MULTI_ARG_1_QI:
26087 case MULTI_ARG_1_SI_DI:
26088 case MULTI_ARG_1_HI_DI:
26089 case MULTI_ARG_1_HI_SI:
26090 case MULTI_ARG_1_QI_DI:
26091 case MULTI_ARG_1_QI_SI:
26092 case MULTI_ARG_1_QI_HI:
26096 case MULTI_ARG_2_DI_CMP:
26097 case MULTI_ARG_2_SI_CMP:
26098 case MULTI_ARG_2_HI_CMP:
26099 case MULTI_ARG_2_QI_CMP:
26101 comparison_p = true;
26104 case MULTI_ARG_2_SF_TF:
26105 case MULTI_ARG_2_DF_TF:
26106 case MULTI_ARG_2_DI_TF:
26107 case MULTI_ARG_2_SI_TF:
26108 case MULTI_ARG_2_HI_TF:
26109 case MULTI_ARG_2_QI_TF:
26115 gcc_unreachable ();
26118 if (optimize || !target
26119 || GET_MODE (target) != tmode
26120 || !insn_data[icode].operand[0].predicate (target, tmode))
26121 target = gen_reg_rtx (tmode);
26123 gcc_assert (nargs <= 4);
26125 for (i = 0; i < nargs; i++)
26127 tree arg = CALL_EXPR_ARG (exp, i);
26128 rtx op = expand_normal (arg);
26129 int adjust = (comparison_p) ? 1 : 0;
26130 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
26132 if (last_arg_constant && i == nargs-1)
26134 if (!CONST_INT_P (op))
26136 error ("last argument must be an immediate");
26137 return gen_reg_rtx (tmode);
26142 if (VECTOR_MODE_P (mode))
26143 op = safe_vector_operand (op, mode);
26145 /* If we aren't optimizing, only allow one memory operand to be
26147 if (memory_operand (op, mode))
26150 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
26153 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
26155 op = force_reg (mode, op);
26159 args[i].mode = mode;
26165 pat = GEN_FCN (icode) (target, args[0].op);
26170 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
26171 GEN_INT ((int)sub_code));
26172 else if (! comparison_p)
26173 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26176 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
26180 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
26185 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26189 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
26193 gcc_unreachable ();
26203 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
26204 insns with vec_merge. */
26207 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
26211 tree arg0 = CALL_EXPR_ARG (exp, 0);
26212 rtx op1, op0 = expand_normal (arg0);
26213 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26214 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26216 if (optimize || !target
26217 || GET_MODE (target) != tmode
26218 || !insn_data[icode].operand[0].predicate (target, tmode))
26219 target = gen_reg_rtx (tmode);
26221 if (VECTOR_MODE_P (mode0))
26222 op0 = safe_vector_operand (op0, mode0);
26224 if ((optimize && !register_operand (op0, mode0))
26225 || !insn_data[icode].operand[1].predicate (op0, mode0))
26226 op0 = copy_to_mode_reg (mode0, op0);
26229 if (!insn_data[icode].operand[2].predicate (op1, mode0))
26230 op1 = copy_to_mode_reg (mode0, op1);
26232 pat = GEN_FCN (icode) (target, op0, op1);
26239 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
26242 ix86_expand_sse_compare (const struct builtin_description *d,
26243 tree exp, rtx target, bool swap)
26246 tree arg0 = CALL_EXPR_ARG (exp, 0);
26247 tree arg1 = CALL_EXPR_ARG (exp, 1);
26248 rtx op0 = expand_normal (arg0);
26249 rtx op1 = expand_normal (arg1);
26251 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26252 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26253 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
26254 enum rtx_code comparison = d->comparison;
26256 if (VECTOR_MODE_P (mode0))
26257 op0 = safe_vector_operand (op0, mode0);
26258 if (VECTOR_MODE_P (mode1))
26259 op1 = safe_vector_operand (op1, mode1);
26261 /* Swap operands if we have a comparison that isn't available in
26265 rtx tmp = gen_reg_rtx (mode1);
26266 emit_move_insn (tmp, op1);
26271 if (optimize || !target
26272 || GET_MODE (target) != tmode
26273 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26274 target = gen_reg_rtx (tmode);
26276 if ((optimize && !register_operand (op0, mode0))
26277 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
26278 op0 = copy_to_mode_reg (mode0, op0);
26279 if ((optimize && !register_operand (op1, mode1))
26280 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
26281 op1 = copy_to_mode_reg (mode1, op1);
26283 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
26284 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
26291 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
26294 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
26298 tree arg0 = CALL_EXPR_ARG (exp, 0);
26299 tree arg1 = CALL_EXPR_ARG (exp, 1);
26300 rtx op0 = expand_normal (arg0);
26301 rtx op1 = expand_normal (arg1);
26302 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26303 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26304 enum rtx_code comparison = d->comparison;
26306 if (VECTOR_MODE_P (mode0))
26307 op0 = safe_vector_operand (op0, mode0);
26308 if (VECTOR_MODE_P (mode1))
26309 op1 = safe_vector_operand (op1, mode1);
26311 /* Swap operands if we have a comparison that isn't available in
26313 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
26320 target = gen_reg_rtx (SImode);
26321 emit_move_insn (target, const0_rtx);
26322 target = gen_rtx_SUBREG (QImode, target, 0);
26324 if ((optimize && !register_operand (op0, mode0))
26325 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26326 op0 = copy_to_mode_reg (mode0, op0);
26327 if ((optimize && !register_operand (op1, mode1))
26328 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26329 op1 = copy_to_mode_reg (mode1, op1);
26331 pat = GEN_FCN (d->icode) (op0, op1);
26335 emit_insn (gen_rtx_SET (VOIDmode,
26336 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26337 gen_rtx_fmt_ee (comparison, QImode,
26341 return SUBREG_REG (target);
26344 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
26347 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
26351 tree arg0 = CALL_EXPR_ARG (exp, 0);
26352 rtx op1, op0 = expand_normal (arg0);
26353 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26354 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26356 if (optimize || target == 0
26357 || GET_MODE (target) != tmode
26358 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26359 target = gen_reg_rtx (tmode);
26361 if (VECTOR_MODE_P (mode0))
26362 op0 = safe_vector_operand (op0, mode0);
26364 if ((optimize && !register_operand (op0, mode0))
26365 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26366 op0 = copy_to_mode_reg (mode0, op0);
26368 op1 = GEN_INT (d->comparison);
26370 pat = GEN_FCN (d->icode) (target, op0, op1);
26377 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
26380 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
26384 tree arg0 = CALL_EXPR_ARG (exp, 0);
26385 tree arg1 = CALL_EXPR_ARG (exp, 1);
26386 rtx op0 = expand_normal (arg0);
26387 rtx op1 = expand_normal (arg1);
26388 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26389 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26390 enum rtx_code comparison = d->comparison;
26392 if (VECTOR_MODE_P (mode0))
26393 op0 = safe_vector_operand (op0, mode0);
26394 if (VECTOR_MODE_P (mode1))
26395 op1 = safe_vector_operand (op1, mode1);
26397 target = gen_reg_rtx (SImode);
26398 emit_move_insn (target, const0_rtx);
26399 target = gen_rtx_SUBREG (QImode, target, 0);
26401 if ((optimize && !register_operand (op0, mode0))
26402 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26403 op0 = copy_to_mode_reg (mode0, op0);
26404 if ((optimize && !register_operand (op1, mode1))
26405 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26406 op1 = copy_to_mode_reg (mode1, op1);
26408 pat = GEN_FCN (d->icode) (op0, op1);
26412 emit_insn (gen_rtx_SET (VOIDmode,
26413 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26414 gen_rtx_fmt_ee (comparison, QImode,
26418 return SUBREG_REG (target);
26421 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26424 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
26425 tree exp, rtx target)
26428 tree arg0 = CALL_EXPR_ARG (exp, 0);
26429 tree arg1 = CALL_EXPR_ARG (exp, 1);
26430 tree arg2 = CALL_EXPR_ARG (exp, 2);
26431 tree arg3 = CALL_EXPR_ARG (exp, 3);
26432 tree arg4 = CALL_EXPR_ARG (exp, 4);
26433 rtx scratch0, scratch1;
26434 rtx op0 = expand_normal (arg0);
26435 rtx op1 = expand_normal (arg1);
26436 rtx op2 = expand_normal (arg2);
26437 rtx op3 = expand_normal (arg3);
26438 rtx op4 = expand_normal (arg4);
26439 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
26441 tmode0 = insn_data[d->icode].operand[0].mode;
26442 tmode1 = insn_data[d->icode].operand[1].mode;
26443 modev2 = insn_data[d->icode].operand[2].mode;
26444 modei3 = insn_data[d->icode].operand[3].mode;
26445 modev4 = insn_data[d->icode].operand[4].mode;
26446 modei5 = insn_data[d->icode].operand[5].mode;
26447 modeimm = insn_data[d->icode].operand[6].mode;
26449 if (VECTOR_MODE_P (modev2))
26450 op0 = safe_vector_operand (op0, modev2);
26451 if (VECTOR_MODE_P (modev4))
26452 op2 = safe_vector_operand (op2, modev4);
26454 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26455 op0 = copy_to_mode_reg (modev2, op0);
26456 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
26457 op1 = copy_to_mode_reg (modei3, op1);
26458 if ((optimize && !register_operand (op2, modev4))
26459 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
26460 op2 = copy_to_mode_reg (modev4, op2);
26461 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
26462 op3 = copy_to_mode_reg (modei5, op3);
26464 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
26466 error ("the fifth argument must be a 8-bit immediate");
26470 if (d->code == IX86_BUILTIN_PCMPESTRI128)
26472 if (optimize || !target
26473 || GET_MODE (target) != tmode0
26474 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26475 target = gen_reg_rtx (tmode0);
26477 scratch1 = gen_reg_rtx (tmode1);
26479 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
26481 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
26483 if (optimize || !target
26484 || GET_MODE (target) != tmode1
26485 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26486 target = gen_reg_rtx (tmode1);
26488 scratch0 = gen_reg_rtx (tmode0);
26490 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
26494 gcc_assert (d->flag);
26496 scratch0 = gen_reg_rtx (tmode0);
26497 scratch1 = gen_reg_rtx (tmode1);
26499 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
26509 target = gen_reg_rtx (SImode);
26510 emit_move_insn (target, const0_rtx);
26511 target = gen_rtx_SUBREG (QImode, target, 0);
26514 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26515 gen_rtx_fmt_ee (EQ, QImode,
26516 gen_rtx_REG ((enum machine_mode) d->flag,
26519 return SUBREG_REG (target);
26526 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26529 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
26530 tree exp, rtx target)
26533 tree arg0 = CALL_EXPR_ARG (exp, 0);
26534 tree arg1 = CALL_EXPR_ARG (exp, 1);
26535 tree arg2 = CALL_EXPR_ARG (exp, 2);
26536 rtx scratch0, scratch1;
26537 rtx op0 = expand_normal (arg0);
26538 rtx op1 = expand_normal (arg1);
26539 rtx op2 = expand_normal (arg2);
26540 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26542 tmode0 = insn_data[d->icode].operand[0].mode;
26543 tmode1 = insn_data[d->icode].operand[1].mode;
26544 modev2 = insn_data[d->icode].operand[2].mode;
26545 modev3 = insn_data[d->icode].operand[3].mode;
26546 modeimm = insn_data[d->icode].operand[4].mode;
26548 if (VECTOR_MODE_P (modev2))
26549 op0 = safe_vector_operand (op0, modev2);
26550 if (VECTOR_MODE_P (modev3))
26551 op1 = safe_vector_operand (op1, modev3);
26553 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26554 op0 = copy_to_mode_reg (modev2, op0);
26555 if ((optimize && !register_operand (op1, modev3))
26556 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26557 op1 = copy_to_mode_reg (modev3, op1);
26559 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26561 error ("the third argument must be a 8-bit immediate");
26565 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26567 if (optimize || !target
26568 || GET_MODE (target) != tmode0
26569 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26570 target = gen_reg_rtx (tmode0);
26572 scratch1 = gen_reg_rtx (tmode1);
26574 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26576 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26578 if (optimize || !target
26579 || GET_MODE (target) != tmode1
26580 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26581 target = gen_reg_rtx (tmode1);
26583 scratch0 = gen_reg_rtx (tmode0);
26585 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26589 gcc_assert (d->flag);
26591 scratch0 = gen_reg_rtx (tmode0);
26592 scratch1 = gen_reg_rtx (tmode1);
26594 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26604 target = gen_reg_rtx (SImode);
26605 emit_move_insn (target, const0_rtx);
26606 target = gen_rtx_SUBREG (QImode, target, 0);
26609 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26610 gen_rtx_fmt_ee (EQ, QImode,
26611 gen_rtx_REG ((enum machine_mode) d->flag,
26614 return SUBREG_REG (target);
26620 /* Subroutine of ix86_expand_builtin to take care of insns with
26621 variable number of operands. */
26624 ix86_expand_args_builtin (const struct builtin_description *d,
26625 tree exp, rtx target)
26627 rtx pat, real_target;
26628 unsigned int i, nargs;
26629 unsigned int nargs_constant = 0;
26630 int num_memory = 0;
26634 enum machine_mode mode;
26636 bool last_arg_count = false;
26637 enum insn_code icode = d->icode;
26638 const struct insn_data_d *insn_p = &insn_data[icode];
26639 enum machine_mode tmode = insn_p->operand[0].mode;
26640 enum machine_mode rmode = VOIDmode;
26642 enum rtx_code comparison = d->comparison;
26644 switch ((enum ix86_builtin_func_type) d->flag)
26646 case V2DF_FTYPE_V2DF_ROUND:
26647 case V4DF_FTYPE_V4DF_ROUND:
26648 case V4SF_FTYPE_V4SF_ROUND:
26649 case V8SF_FTYPE_V8SF_ROUND:
26650 return ix86_expand_sse_round (d, exp, target);
26651 case INT_FTYPE_V8SF_V8SF_PTEST:
26652 case INT_FTYPE_V4DI_V4DI_PTEST:
26653 case INT_FTYPE_V4DF_V4DF_PTEST:
26654 case INT_FTYPE_V4SF_V4SF_PTEST:
26655 case INT_FTYPE_V2DI_V2DI_PTEST:
26656 case INT_FTYPE_V2DF_V2DF_PTEST:
26657 return ix86_expand_sse_ptest (d, exp, target);
26658 case FLOAT128_FTYPE_FLOAT128:
26659 case FLOAT_FTYPE_FLOAT:
26660 case INT_FTYPE_INT:
26661 case UINT64_FTYPE_INT:
26662 case UINT16_FTYPE_UINT16:
26663 case INT64_FTYPE_INT64:
26664 case INT64_FTYPE_V4SF:
26665 case INT64_FTYPE_V2DF:
26666 case INT_FTYPE_V16QI:
26667 case INT_FTYPE_V8QI:
26668 case INT_FTYPE_V8SF:
26669 case INT_FTYPE_V4DF:
26670 case INT_FTYPE_V4SF:
26671 case INT_FTYPE_V2DF:
26672 case V16QI_FTYPE_V16QI:
26673 case V8SI_FTYPE_V8SF:
26674 case V8SI_FTYPE_V4SI:
26675 case V8HI_FTYPE_V8HI:
26676 case V8HI_FTYPE_V16QI:
26677 case V8QI_FTYPE_V8QI:
26678 case V8SF_FTYPE_V8SF:
26679 case V8SF_FTYPE_V8SI:
26680 case V8SF_FTYPE_V4SF:
26681 case V8SF_FTYPE_V8HI:
26682 case V4SI_FTYPE_V4SI:
26683 case V4SI_FTYPE_V16QI:
26684 case V4SI_FTYPE_V4SF:
26685 case V4SI_FTYPE_V8SI:
26686 case V4SI_FTYPE_V8HI:
26687 case V4SI_FTYPE_V4DF:
26688 case V4SI_FTYPE_V2DF:
26689 case V4HI_FTYPE_V4HI:
26690 case V4DF_FTYPE_V4DF:
26691 case V4DF_FTYPE_V4SI:
26692 case V4DF_FTYPE_V4SF:
26693 case V4DF_FTYPE_V2DF:
26694 case V4SF_FTYPE_V4SF:
26695 case V4SF_FTYPE_V4SI:
26696 case V4SF_FTYPE_V8SF:
26697 case V4SF_FTYPE_V4DF:
26698 case V4SF_FTYPE_V8HI:
26699 case V4SF_FTYPE_V2DF:
26700 case V2DI_FTYPE_V2DI:
26701 case V2DI_FTYPE_V16QI:
26702 case V2DI_FTYPE_V8HI:
26703 case V2DI_FTYPE_V4SI:
26704 case V2DF_FTYPE_V2DF:
26705 case V2DF_FTYPE_V4SI:
26706 case V2DF_FTYPE_V4DF:
26707 case V2DF_FTYPE_V4SF:
26708 case V2DF_FTYPE_V2SI:
26709 case V2SI_FTYPE_V2SI:
26710 case V2SI_FTYPE_V4SF:
26711 case V2SI_FTYPE_V2SF:
26712 case V2SI_FTYPE_V2DF:
26713 case V2SF_FTYPE_V2SF:
26714 case V2SF_FTYPE_V2SI:
26717 case V4SF_FTYPE_V4SF_VEC_MERGE:
26718 case V2DF_FTYPE_V2DF_VEC_MERGE:
26719 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26720 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26721 case V16QI_FTYPE_V16QI_V16QI:
26722 case V16QI_FTYPE_V8HI_V8HI:
26723 case V8QI_FTYPE_V8QI_V8QI:
26724 case V8QI_FTYPE_V4HI_V4HI:
26725 case V8HI_FTYPE_V8HI_V8HI:
26726 case V8HI_FTYPE_V16QI_V16QI:
26727 case V8HI_FTYPE_V4SI_V4SI:
26728 case V8SF_FTYPE_V8SF_V8SF:
26729 case V8SF_FTYPE_V8SF_V8SI:
26730 case V4SI_FTYPE_V4SI_V4SI:
26731 case V4SI_FTYPE_V8HI_V8HI:
26732 case V4SI_FTYPE_V4SF_V4SF:
26733 case V4SI_FTYPE_V2DF_V2DF:
26734 case V4HI_FTYPE_V4HI_V4HI:
26735 case V4HI_FTYPE_V8QI_V8QI:
26736 case V4HI_FTYPE_V2SI_V2SI:
26737 case V4DF_FTYPE_V4DF_V4DF:
26738 case V4DF_FTYPE_V4DF_V4DI:
26739 case V4SF_FTYPE_V4SF_V4SF:
26740 case V4SF_FTYPE_V4SF_V4SI:
26741 case V4SF_FTYPE_V4SF_V2SI:
26742 case V4SF_FTYPE_V4SF_V2DF:
26743 case V4SF_FTYPE_V4SF_DI:
26744 case V4SF_FTYPE_V4SF_SI:
26745 case V2DI_FTYPE_V2DI_V2DI:
26746 case V2DI_FTYPE_V16QI_V16QI:
26747 case V2DI_FTYPE_V4SI_V4SI:
26748 case V2DI_FTYPE_V2DI_V16QI:
26749 case V2DI_FTYPE_V2DF_V2DF:
26750 case V2SI_FTYPE_V2SI_V2SI:
26751 case V2SI_FTYPE_V4HI_V4HI:
26752 case V2SI_FTYPE_V2SF_V2SF:
26753 case V2DF_FTYPE_V2DF_V2DF:
26754 case V2DF_FTYPE_V2DF_V4SF:
26755 case V2DF_FTYPE_V2DF_V2DI:
26756 case V2DF_FTYPE_V2DF_DI:
26757 case V2DF_FTYPE_V2DF_SI:
26758 case V2SF_FTYPE_V2SF_V2SF:
26759 case V1DI_FTYPE_V1DI_V1DI:
26760 case V1DI_FTYPE_V8QI_V8QI:
26761 case V1DI_FTYPE_V2SI_V2SI:
26762 if (comparison == UNKNOWN)
26763 return ix86_expand_binop_builtin (icode, exp, target);
26766 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26767 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26768 gcc_assert (comparison != UNKNOWN);
26772 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26773 case V8HI_FTYPE_V8HI_SI_COUNT:
26774 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26775 case V4SI_FTYPE_V4SI_SI_COUNT:
26776 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26777 case V4HI_FTYPE_V4HI_SI_COUNT:
26778 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26779 case V2DI_FTYPE_V2DI_SI_COUNT:
26780 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26781 case V2SI_FTYPE_V2SI_SI_COUNT:
26782 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26783 case V1DI_FTYPE_V1DI_SI_COUNT:
26785 last_arg_count = true;
26787 case UINT64_FTYPE_UINT64_UINT64:
26788 case UINT_FTYPE_UINT_UINT:
26789 case UINT_FTYPE_UINT_USHORT:
26790 case UINT_FTYPE_UINT_UCHAR:
26791 case UINT16_FTYPE_UINT16_INT:
26792 case UINT8_FTYPE_UINT8_INT:
26795 case V2DI_FTYPE_V2DI_INT_CONVERT:
26798 nargs_constant = 1;
26800 case V8HI_FTYPE_V8HI_INT:
26801 case V8HI_FTYPE_V8SF_INT:
26802 case V8HI_FTYPE_V4SF_INT:
26803 case V8SF_FTYPE_V8SF_INT:
26804 case V4SI_FTYPE_V4SI_INT:
26805 case V4SI_FTYPE_V8SI_INT:
26806 case V4HI_FTYPE_V4HI_INT:
26807 case V4DF_FTYPE_V4DF_INT:
26808 case V4SF_FTYPE_V4SF_INT:
26809 case V4SF_FTYPE_V8SF_INT:
26810 case V2DI_FTYPE_V2DI_INT:
26811 case V2DF_FTYPE_V2DF_INT:
26812 case V2DF_FTYPE_V4DF_INT:
26814 nargs_constant = 1;
26816 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26817 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26818 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26819 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26820 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26823 case V16QI_FTYPE_V16QI_V16QI_INT:
26824 case V8HI_FTYPE_V8HI_V8HI_INT:
26825 case V8SI_FTYPE_V8SI_V8SI_INT:
26826 case V8SI_FTYPE_V8SI_V4SI_INT:
26827 case V8SF_FTYPE_V8SF_V8SF_INT:
26828 case V8SF_FTYPE_V8SF_V4SF_INT:
26829 case V4SI_FTYPE_V4SI_V4SI_INT:
26830 case V4DF_FTYPE_V4DF_V4DF_INT:
26831 case V4DF_FTYPE_V4DF_V2DF_INT:
26832 case V4SF_FTYPE_V4SF_V4SF_INT:
26833 case V2DI_FTYPE_V2DI_V2DI_INT:
26834 case V2DF_FTYPE_V2DF_V2DF_INT:
26836 nargs_constant = 1;
26838 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26841 nargs_constant = 1;
26843 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26846 nargs_constant = 1;
26848 case V2DI_FTYPE_V2DI_UINT_UINT:
26850 nargs_constant = 2;
26852 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26853 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26854 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26855 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26857 nargs_constant = 1;
26859 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26861 nargs_constant = 2;
26864 gcc_unreachable ();
26867 gcc_assert (nargs <= ARRAY_SIZE (args));
26869 if (comparison != UNKNOWN)
26871 gcc_assert (nargs == 2);
26872 return ix86_expand_sse_compare (d, exp, target, swap);
26875 if (rmode == VOIDmode || rmode == tmode)
26879 || GET_MODE (target) != tmode
26880 || !insn_p->operand[0].predicate (target, tmode))
26881 target = gen_reg_rtx (tmode);
26882 real_target = target;
26886 target = gen_reg_rtx (rmode);
26887 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26890 for (i = 0; i < nargs; i++)
26892 tree arg = CALL_EXPR_ARG (exp, i);
26893 rtx op = expand_normal (arg);
26894 enum machine_mode mode = insn_p->operand[i + 1].mode;
26895 bool match = insn_p->operand[i + 1].predicate (op, mode);
26897 if (last_arg_count && (i + 1) == nargs)
26899 /* SIMD shift insns take either an 8-bit immediate or
26900 register as count. But builtin functions take int as
26901 count. If count doesn't match, we put it in register. */
26904 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26905 if (!insn_p->operand[i + 1].predicate (op, mode))
26906 op = copy_to_reg (op);
26909 else if ((nargs - i) <= nargs_constant)
26914 case CODE_FOR_sse4_1_roundpd:
26915 case CODE_FOR_sse4_1_roundps:
26916 case CODE_FOR_sse4_1_roundsd:
26917 case CODE_FOR_sse4_1_roundss:
26918 case CODE_FOR_sse4_1_blendps:
26919 case CODE_FOR_avx_blendpd256:
26920 case CODE_FOR_avx_vpermilv4df:
26921 case CODE_FOR_avx_roundpd256:
26922 case CODE_FOR_avx_roundps256:
26923 error ("the last argument must be a 4-bit immediate");
26926 case CODE_FOR_sse4_1_blendpd:
26927 case CODE_FOR_avx_vpermilv2df:
26928 case CODE_FOR_xop_vpermil2v2df3:
26929 case CODE_FOR_xop_vpermil2v4sf3:
26930 case CODE_FOR_xop_vpermil2v4df3:
26931 case CODE_FOR_xop_vpermil2v8sf3:
26932 error ("the last argument must be a 2-bit immediate");
26935 case CODE_FOR_avx_vextractf128v4df:
26936 case CODE_FOR_avx_vextractf128v8sf:
26937 case CODE_FOR_avx_vextractf128v8si:
26938 case CODE_FOR_avx_vinsertf128v4df:
26939 case CODE_FOR_avx_vinsertf128v8sf:
26940 case CODE_FOR_avx_vinsertf128v8si:
26941 error ("the last argument must be a 1-bit immediate");
26944 case CODE_FOR_avx_vmcmpv2df3:
26945 case CODE_FOR_avx_vmcmpv4sf3:
26946 case CODE_FOR_avx_cmpv2df3:
26947 case CODE_FOR_avx_cmpv4sf3:
26948 case CODE_FOR_avx_cmpv4df3:
26949 case CODE_FOR_avx_cmpv8sf3:
26950 error ("the last argument must be a 5-bit immediate");
26954 switch (nargs_constant)
26957 if ((nargs - i) == nargs_constant)
26959 error ("the next to last argument must be an 8-bit immediate");
26963 error ("the last argument must be an 8-bit immediate");
26966 gcc_unreachable ();
26973 if (VECTOR_MODE_P (mode))
26974 op = safe_vector_operand (op, mode);
26976 /* If we aren't optimizing, only allow one memory operand to
26978 if (memory_operand (op, mode))
26981 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
26983 if (optimize || !match || num_memory > 1)
26984 op = copy_to_mode_reg (mode, op);
26988 op = copy_to_reg (op);
26989 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
26994 args[i].mode = mode;
27000 pat = GEN_FCN (icode) (real_target, args[0].op);
27003 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
27006 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
27010 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
27011 args[2].op, args[3].op);
27014 gcc_unreachable ();
27024 /* Subroutine of ix86_expand_builtin to take care of special insns
27025 with variable number of operands. */
27028 ix86_expand_special_args_builtin (const struct builtin_description *d,
27029 tree exp, rtx target)
27033 unsigned int i, nargs, arg_adjust, memory;
27037 enum machine_mode mode;
27039 enum insn_code icode = d->icode;
27040 bool last_arg_constant = false;
27041 const struct insn_data_d *insn_p = &insn_data[icode];
27042 enum machine_mode tmode = insn_p->operand[0].mode;
27043 enum { load, store } klass;
27045 switch ((enum ix86_builtin_func_type) d->flag)
27047 case VOID_FTYPE_VOID:
27048 if (icode == CODE_FOR_avx_vzeroupper)
27049 target = GEN_INT (vzeroupper_intrinsic);
27050 emit_insn (GEN_FCN (icode) (target));
27052 case VOID_FTYPE_UINT64:
27053 case VOID_FTYPE_UNSIGNED:
27059 case UINT64_FTYPE_VOID:
27060 case UNSIGNED_FTYPE_VOID:
27065 case UINT64_FTYPE_PUNSIGNED:
27066 case V2DI_FTYPE_PV2DI:
27067 case V32QI_FTYPE_PCCHAR:
27068 case V16QI_FTYPE_PCCHAR:
27069 case V8SF_FTYPE_PCV4SF:
27070 case V8SF_FTYPE_PCFLOAT:
27071 case V4SF_FTYPE_PCFLOAT:
27072 case V4DF_FTYPE_PCV2DF:
27073 case V4DF_FTYPE_PCDOUBLE:
27074 case V2DF_FTYPE_PCDOUBLE:
27075 case VOID_FTYPE_PVOID:
27080 case VOID_FTYPE_PV2SF_V4SF:
27081 case VOID_FTYPE_PV4DI_V4DI:
27082 case VOID_FTYPE_PV2DI_V2DI:
27083 case VOID_FTYPE_PCHAR_V32QI:
27084 case VOID_FTYPE_PCHAR_V16QI:
27085 case VOID_FTYPE_PFLOAT_V8SF:
27086 case VOID_FTYPE_PFLOAT_V4SF:
27087 case VOID_FTYPE_PDOUBLE_V4DF:
27088 case VOID_FTYPE_PDOUBLE_V2DF:
27089 case VOID_FTYPE_PULONGLONG_ULONGLONG:
27090 case VOID_FTYPE_PINT_INT:
27093 /* Reserve memory operand for target. */
27094 memory = ARRAY_SIZE (args);
27096 case V4SF_FTYPE_V4SF_PCV2SF:
27097 case V2DF_FTYPE_V2DF_PCDOUBLE:
27102 case V8SF_FTYPE_PCV8SF_V8SI:
27103 case V4DF_FTYPE_PCV4DF_V4DI:
27104 case V4SF_FTYPE_PCV4SF_V4SI:
27105 case V2DF_FTYPE_PCV2DF_V2DI:
27110 case VOID_FTYPE_PV8SF_V8SI_V8SF:
27111 case VOID_FTYPE_PV4DF_V4DI_V4DF:
27112 case VOID_FTYPE_PV4SF_V4SI_V4SF:
27113 case VOID_FTYPE_PV2DF_V2DI_V2DF:
27116 /* Reserve memory operand for target. */
27117 memory = ARRAY_SIZE (args);
27119 case VOID_FTYPE_UINT_UINT_UINT:
27120 case VOID_FTYPE_UINT64_UINT_UINT:
27121 case UCHAR_FTYPE_UINT_UINT_UINT:
27122 case UCHAR_FTYPE_UINT64_UINT_UINT:
27125 memory = ARRAY_SIZE (args);
27126 last_arg_constant = true;
27129 gcc_unreachable ();
27132 gcc_assert (nargs <= ARRAY_SIZE (args));
27134 if (klass == store)
27136 arg = CALL_EXPR_ARG (exp, 0);
27137 op = expand_normal (arg);
27138 gcc_assert (target == 0);
27140 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
27142 target = force_reg (tmode, op);
27150 || GET_MODE (target) != tmode
27151 || !insn_p->operand[0].predicate (target, tmode))
27152 target = gen_reg_rtx (tmode);
27155 for (i = 0; i < nargs; i++)
27157 enum machine_mode mode = insn_p->operand[i + 1].mode;
27160 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
27161 op = expand_normal (arg);
27162 match = insn_p->operand[i + 1].predicate (op, mode);
27164 if (last_arg_constant && (i + 1) == nargs)
27168 if (icode == CODE_FOR_lwp_lwpvalsi3
27169 || icode == CODE_FOR_lwp_lwpinssi3
27170 || icode == CODE_FOR_lwp_lwpvaldi3
27171 || icode == CODE_FOR_lwp_lwpinsdi3)
27172 error ("the last argument must be a 32-bit immediate");
27174 error ("the last argument must be an 8-bit immediate");
27182 /* This must be the memory operand. */
27183 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
27184 gcc_assert (GET_MODE (op) == mode
27185 || GET_MODE (op) == VOIDmode);
27189 /* This must be register. */
27190 if (VECTOR_MODE_P (mode))
27191 op = safe_vector_operand (op, mode);
27193 gcc_assert (GET_MODE (op) == mode
27194 || GET_MODE (op) == VOIDmode);
27195 op = copy_to_mode_reg (mode, op);
27200 args[i].mode = mode;
27206 pat = GEN_FCN (icode) (target);
27209 pat = GEN_FCN (icode) (target, args[0].op);
27212 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
27215 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
27218 gcc_unreachable ();
27224 return klass == store ? 0 : target;
27227 /* Return the integer constant in ARG. Constrain it to be in the range
27228 of the subparts of VEC_TYPE; issue an error if not. */
27231 get_element_number (tree vec_type, tree arg)
27233 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
27235 if (!host_integerp (arg, 1)
27236 || (elt = tree_low_cst (arg, 1), elt > max))
27238 error ("selector must be an integer constant in the range 0..%wi", max);
27245 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27246 ix86_expand_vector_init. We DO have language-level syntax for this, in
27247 the form of (type){ init-list }. Except that since we can't place emms
27248 instructions from inside the compiler, we can't allow the use of MMX
27249 registers unless the user explicitly asks for it. So we do *not* define
27250 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
27251 we have builtins invoked by mmintrin.h that gives us license to emit
27252 these sorts of instructions. */
27255 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
27257 enum machine_mode tmode = TYPE_MODE (type);
27258 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
27259 int i, n_elt = GET_MODE_NUNITS (tmode);
27260 rtvec v = rtvec_alloc (n_elt);
27262 gcc_assert (VECTOR_MODE_P (tmode));
27263 gcc_assert (call_expr_nargs (exp) == n_elt);
27265 for (i = 0; i < n_elt; ++i)
27267 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
27268 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
27271 if (!target || !register_operand (target, tmode))
27272 target = gen_reg_rtx (tmode);
27274 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
27278 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27279 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
27280 had a language-level syntax for referencing vector elements. */
27283 ix86_expand_vec_ext_builtin (tree exp, rtx target)
27285 enum machine_mode tmode, mode0;
27290 arg0 = CALL_EXPR_ARG (exp, 0);
27291 arg1 = CALL_EXPR_ARG (exp, 1);
27293 op0 = expand_normal (arg0);
27294 elt = get_element_number (TREE_TYPE (arg0), arg1);
27296 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27297 mode0 = TYPE_MODE (TREE_TYPE (arg0));
27298 gcc_assert (VECTOR_MODE_P (mode0));
27300 op0 = force_reg (mode0, op0);
27302 if (optimize || !target || !register_operand (target, tmode))
27303 target = gen_reg_rtx (tmode);
27305 ix86_expand_vector_extract (true, target, op0, elt);
27310 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27311 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
27312 a language-level syntax for referencing vector elements. */
27315 ix86_expand_vec_set_builtin (tree exp)
27317 enum machine_mode tmode, mode1;
27318 tree arg0, arg1, arg2;
27320 rtx op0, op1, target;
27322 arg0 = CALL_EXPR_ARG (exp, 0);
27323 arg1 = CALL_EXPR_ARG (exp, 1);
27324 arg2 = CALL_EXPR_ARG (exp, 2);
27326 tmode = TYPE_MODE (TREE_TYPE (arg0));
27327 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27328 gcc_assert (VECTOR_MODE_P (tmode));
27330 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
27331 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
27332 elt = get_element_number (TREE_TYPE (arg0), arg2);
27334 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
27335 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
27337 op0 = force_reg (tmode, op0);
27338 op1 = force_reg (mode1, op1);
27340 /* OP0 is the source of these builtin functions and shouldn't be
27341 modified. Create a copy, use it and return it as target. */
27342 target = gen_reg_rtx (tmode);
27343 emit_move_insn (target, op0);
27344 ix86_expand_vector_set (true, target, op1, elt);
27349 /* Expand an expression EXP that calls a built-in function,
27350 with result going to TARGET if that's convenient
27351 (and in mode MODE if that's convenient).
27352 SUBTARGET may be used as the target for computing one of EXP's operands.
27353 IGNORE is nonzero if the value is to be ignored. */
27356 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
27357 enum machine_mode mode ATTRIBUTE_UNUSED,
27358 int ignore ATTRIBUTE_UNUSED)
27360 const struct builtin_description *d;
27362 enum insn_code icode;
27363 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
27364 tree arg0, arg1, arg2;
27365 rtx op0, op1, op2, pat;
27366 enum machine_mode mode0, mode1, mode2;
27367 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
27369 /* Determine whether the builtin function is available under the current ISA.
27370 Originally the builtin was not created if it wasn't applicable to the
27371 current ISA based on the command line switches. With function specific
27372 options, we need to check in the context of the function making the call
27373 whether it is supported. */
27374 if (ix86_builtins_isa[fcode].isa
27375 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
27377 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
27378 NULL, NULL, false);
27381 error ("%qE needs unknown isa option", fndecl);
27384 gcc_assert (opts != NULL);
27385 error ("%qE needs isa option %s", fndecl, opts);
27393 case IX86_BUILTIN_MASKMOVQ:
27394 case IX86_BUILTIN_MASKMOVDQU:
27395 icode = (fcode == IX86_BUILTIN_MASKMOVQ
27396 ? CODE_FOR_mmx_maskmovq
27397 : CODE_FOR_sse2_maskmovdqu);
27398 /* Note the arg order is different from the operand order. */
27399 arg1 = CALL_EXPR_ARG (exp, 0);
27400 arg2 = CALL_EXPR_ARG (exp, 1);
27401 arg0 = CALL_EXPR_ARG (exp, 2);
27402 op0 = expand_normal (arg0);
27403 op1 = expand_normal (arg1);
27404 op2 = expand_normal (arg2);
27405 mode0 = insn_data[icode].operand[0].mode;
27406 mode1 = insn_data[icode].operand[1].mode;
27407 mode2 = insn_data[icode].operand[2].mode;
27409 op0 = force_reg (Pmode, op0);
27410 op0 = gen_rtx_MEM (mode1, op0);
27412 if (!insn_data[icode].operand[0].predicate (op0, mode0))
27413 op0 = copy_to_mode_reg (mode0, op0);
27414 if (!insn_data[icode].operand[1].predicate (op1, mode1))
27415 op1 = copy_to_mode_reg (mode1, op1);
27416 if (!insn_data[icode].operand[2].predicate (op2, mode2))
27417 op2 = copy_to_mode_reg (mode2, op2);
27418 pat = GEN_FCN (icode) (op0, op1, op2);
27424 case IX86_BUILTIN_LDMXCSR:
27425 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
27426 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27427 emit_move_insn (target, op0);
27428 emit_insn (gen_sse_ldmxcsr (target));
27431 case IX86_BUILTIN_STMXCSR:
27432 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27433 emit_insn (gen_sse_stmxcsr (target));
27434 return copy_to_mode_reg (SImode, target);
27436 case IX86_BUILTIN_CLFLUSH:
27437 arg0 = CALL_EXPR_ARG (exp, 0);
27438 op0 = expand_normal (arg0);
27439 icode = CODE_FOR_sse2_clflush;
27440 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27441 op0 = copy_to_mode_reg (Pmode, op0);
27443 emit_insn (gen_sse2_clflush (op0));
27446 case IX86_BUILTIN_MONITOR:
27447 arg0 = CALL_EXPR_ARG (exp, 0);
27448 arg1 = CALL_EXPR_ARG (exp, 1);
27449 arg2 = CALL_EXPR_ARG (exp, 2);
27450 op0 = expand_normal (arg0);
27451 op1 = expand_normal (arg1);
27452 op2 = expand_normal (arg2);
27454 op0 = copy_to_mode_reg (Pmode, op0);
27456 op1 = copy_to_mode_reg (SImode, op1);
27458 op2 = copy_to_mode_reg (SImode, op2);
27459 emit_insn (ix86_gen_monitor (op0, op1, op2));
27462 case IX86_BUILTIN_MWAIT:
27463 arg0 = CALL_EXPR_ARG (exp, 0);
27464 arg1 = CALL_EXPR_ARG (exp, 1);
27465 op0 = expand_normal (arg0);
27466 op1 = expand_normal (arg1);
27468 op0 = copy_to_mode_reg (SImode, op0);
27470 op1 = copy_to_mode_reg (SImode, op1);
27471 emit_insn (gen_sse3_mwait (op0, op1));
27474 case IX86_BUILTIN_VEC_INIT_V2SI:
27475 case IX86_BUILTIN_VEC_INIT_V4HI:
27476 case IX86_BUILTIN_VEC_INIT_V8QI:
27477 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
27479 case IX86_BUILTIN_VEC_EXT_V2DF:
27480 case IX86_BUILTIN_VEC_EXT_V2DI:
27481 case IX86_BUILTIN_VEC_EXT_V4SF:
27482 case IX86_BUILTIN_VEC_EXT_V4SI:
27483 case IX86_BUILTIN_VEC_EXT_V8HI:
27484 case IX86_BUILTIN_VEC_EXT_V2SI:
27485 case IX86_BUILTIN_VEC_EXT_V4HI:
27486 case IX86_BUILTIN_VEC_EXT_V16QI:
27487 return ix86_expand_vec_ext_builtin (exp, target);
27489 case IX86_BUILTIN_VEC_SET_V2DI:
27490 case IX86_BUILTIN_VEC_SET_V4SF:
27491 case IX86_BUILTIN_VEC_SET_V4SI:
27492 case IX86_BUILTIN_VEC_SET_V8HI:
27493 case IX86_BUILTIN_VEC_SET_V4HI:
27494 case IX86_BUILTIN_VEC_SET_V16QI:
27495 return ix86_expand_vec_set_builtin (exp);
27497 case IX86_BUILTIN_VEC_PERM_V2DF:
27498 case IX86_BUILTIN_VEC_PERM_V4SF:
27499 case IX86_BUILTIN_VEC_PERM_V2DI:
27500 case IX86_BUILTIN_VEC_PERM_V4SI:
27501 case IX86_BUILTIN_VEC_PERM_V8HI:
27502 case IX86_BUILTIN_VEC_PERM_V16QI:
27503 case IX86_BUILTIN_VEC_PERM_V2DI_U:
27504 case IX86_BUILTIN_VEC_PERM_V4SI_U:
27505 case IX86_BUILTIN_VEC_PERM_V8HI_U:
27506 case IX86_BUILTIN_VEC_PERM_V16QI_U:
27507 case IX86_BUILTIN_VEC_PERM_V4DF:
27508 case IX86_BUILTIN_VEC_PERM_V8SF:
27509 return ix86_expand_vec_perm_builtin (exp);
27511 case IX86_BUILTIN_INFQ:
27512 case IX86_BUILTIN_HUGE_VALQ:
27514 REAL_VALUE_TYPE inf;
27518 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
27520 tmp = validize_mem (force_const_mem (mode, tmp));
27523 target = gen_reg_rtx (mode);
27525 emit_move_insn (target, tmp);
27529 case IX86_BUILTIN_LLWPCB:
27530 arg0 = CALL_EXPR_ARG (exp, 0);
27531 op0 = expand_normal (arg0);
27532 icode = CODE_FOR_lwp_llwpcb;
27533 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27534 op0 = copy_to_mode_reg (Pmode, op0);
27535 emit_insn (gen_lwp_llwpcb (op0));
27538 case IX86_BUILTIN_SLWPCB:
27539 icode = CODE_FOR_lwp_slwpcb;
27541 || !insn_data[icode].operand[0].predicate (target, Pmode))
27542 target = gen_reg_rtx (Pmode);
27543 emit_insn (gen_lwp_slwpcb (target));
27546 case IX86_BUILTIN_BEXTRI32:
27547 case IX86_BUILTIN_BEXTRI64:
27548 arg0 = CALL_EXPR_ARG (exp, 0);
27549 arg1 = CALL_EXPR_ARG (exp, 1);
27550 op0 = expand_normal (arg0);
27551 op1 = expand_normal (arg1);
27552 icode = (fcode == IX86_BUILTIN_BEXTRI32
27553 ? CODE_FOR_tbm_bextri_si
27554 : CODE_FOR_tbm_bextri_di);
27555 if (!CONST_INT_P (op1))
27557 error ("last argument must be an immediate");
27562 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
27563 unsigned char lsb_index = INTVAL (op1) & 0xFF;
27564 op1 = GEN_INT (length);
27565 op2 = GEN_INT (lsb_index);
27566 pat = GEN_FCN (icode) (target, op0, op1, op2);
27572 case IX86_BUILTIN_RDRAND16_STEP:
27573 icode = CODE_FOR_rdrandhi_1;
27577 case IX86_BUILTIN_RDRAND32_STEP:
27578 icode = CODE_FOR_rdrandsi_1;
27582 case IX86_BUILTIN_RDRAND64_STEP:
27583 icode = CODE_FOR_rdranddi_1;
27587 op0 = gen_reg_rtx (mode0);
27588 emit_insn (GEN_FCN (icode) (op0));
27590 op1 = gen_reg_rtx (SImode);
27591 emit_move_insn (op1, CONST1_RTX (SImode));
27593 /* Emit SImode conditional move. */
27594 if (mode0 == HImode)
27596 op2 = gen_reg_rtx (SImode);
27597 emit_insn (gen_zero_extendhisi2 (op2, op0));
27599 else if (mode0 == SImode)
27602 op2 = gen_rtx_SUBREG (SImode, op0, 0);
27604 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
27606 emit_insn (gen_rtx_SET (VOIDmode, op1,
27607 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
27608 emit_move_insn (target, op1);
27610 arg0 = CALL_EXPR_ARG (exp, 0);
27611 op1 = expand_normal (arg0);
27612 if (!address_operand (op1, VOIDmode))
27613 op1 = copy_addr_to_reg (op1);
27614 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
27621 for (i = 0, d = bdesc_special_args;
27622 i < ARRAY_SIZE (bdesc_special_args);
27624 if (d->code == fcode)
27625 return ix86_expand_special_args_builtin (d, exp, target);
27627 for (i = 0, d = bdesc_args;
27628 i < ARRAY_SIZE (bdesc_args);
27630 if (d->code == fcode)
27633 case IX86_BUILTIN_FABSQ:
27634 case IX86_BUILTIN_COPYSIGNQ:
27636 /* Emit a normal call if SSE2 isn't available. */
27637 return expand_call (exp, target, ignore);
27639 return ix86_expand_args_builtin (d, exp, target);
27642 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27643 if (d->code == fcode)
27644 return ix86_expand_sse_comi (d, exp, target);
27646 for (i = 0, d = bdesc_pcmpestr;
27647 i < ARRAY_SIZE (bdesc_pcmpestr);
27649 if (d->code == fcode)
27650 return ix86_expand_sse_pcmpestr (d, exp, target);
27652 for (i = 0, d = bdesc_pcmpistr;
27653 i < ARRAY_SIZE (bdesc_pcmpistr);
27655 if (d->code == fcode)
27656 return ix86_expand_sse_pcmpistr (d, exp, target);
27658 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27659 if (d->code == fcode)
27660 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27661 (enum ix86_builtin_func_type)
27662 d->flag, d->comparison);
27664 gcc_unreachable ();
27667 /* Returns a function decl for a vectorized version of the builtin function
27668 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27669 if it is not available. */
27672 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27675 enum machine_mode in_mode, out_mode;
27677 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27679 if (TREE_CODE (type_out) != VECTOR_TYPE
27680 || TREE_CODE (type_in) != VECTOR_TYPE
27681 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27684 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27685 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27686 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27687 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27691 case BUILT_IN_SQRT:
27692 if (out_mode == DFmode && in_mode == DFmode)
27694 if (out_n == 2 && in_n == 2)
27695 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27696 else if (out_n == 4 && in_n == 4)
27697 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27701 case BUILT_IN_SQRTF:
27702 if (out_mode == SFmode && in_mode == SFmode)
27704 if (out_n == 4 && in_n == 4)
27705 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27706 else if (out_n == 8 && in_n == 8)
27707 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27711 case BUILT_IN_LRINT:
27712 if (out_mode == SImode && out_n == 4
27713 && in_mode == DFmode && in_n == 2)
27714 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27717 case BUILT_IN_LRINTF:
27718 if (out_mode == SImode && in_mode == SFmode)
27720 if (out_n == 4 && in_n == 4)
27721 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27722 else if (out_n == 8 && in_n == 8)
27723 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27727 case BUILT_IN_COPYSIGN:
27728 if (out_mode == DFmode && in_mode == DFmode)
27730 if (out_n == 2 && in_n == 2)
27731 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27732 else if (out_n == 4 && in_n == 4)
27733 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27737 case BUILT_IN_COPYSIGNF:
27738 if (out_mode == SFmode && in_mode == SFmode)
27740 if (out_n == 4 && in_n == 4)
27741 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27742 else if (out_n == 8 && in_n == 8)
27743 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27747 case BUILT_IN_FLOOR:
27748 /* The round insn does not trap on denormals. */
27749 if (flag_trapping_math || !TARGET_ROUND)
27752 if (out_mode == DFmode && in_mode == DFmode)
27754 if (out_n == 2 && in_n == 2)
27755 return ix86_builtins[IX86_BUILTIN_FLOORPD];
27756 else if (out_n == 4 && in_n == 4)
27757 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
27761 case BUILT_IN_FLOORF:
27762 /* The round insn does not trap on denormals. */
27763 if (flag_trapping_math || !TARGET_ROUND)
27766 if (out_mode == SFmode && in_mode == SFmode)
27768 if (out_n == 4 && in_n == 4)
27769 return ix86_builtins[IX86_BUILTIN_FLOORPS];
27770 else if (out_n == 8 && in_n == 8)
27771 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
27775 case BUILT_IN_CEIL:
27776 /* The round insn does not trap on denormals. */
27777 if (flag_trapping_math || !TARGET_ROUND)
27780 if (out_mode == DFmode && in_mode == DFmode)
27782 if (out_n == 2 && in_n == 2)
27783 return ix86_builtins[IX86_BUILTIN_CEILPD];
27784 else if (out_n == 4 && in_n == 4)
27785 return ix86_builtins[IX86_BUILTIN_CEILPD256];
27789 case BUILT_IN_CEILF:
27790 /* The round insn does not trap on denormals. */
27791 if (flag_trapping_math || !TARGET_ROUND)
27794 if (out_mode == SFmode && in_mode == SFmode)
27796 if (out_n == 4 && in_n == 4)
27797 return ix86_builtins[IX86_BUILTIN_CEILPS];
27798 else if (out_n == 8 && in_n == 8)
27799 return ix86_builtins[IX86_BUILTIN_CEILPS256];
27803 case BUILT_IN_TRUNC:
27804 /* The round insn does not trap on denormals. */
27805 if (flag_trapping_math || !TARGET_ROUND)
27808 if (out_mode == DFmode && in_mode == DFmode)
27810 if (out_n == 2 && in_n == 2)
27811 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
27812 else if (out_n == 4 && in_n == 4)
27813 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
27817 case BUILT_IN_TRUNCF:
27818 /* The round insn does not trap on denormals. */
27819 if (flag_trapping_math || !TARGET_ROUND)
27822 if (out_mode == SFmode && in_mode == SFmode)
27824 if (out_n == 4 && in_n == 4)
27825 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
27826 else if (out_n == 8 && in_n == 8)
27827 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
27831 case BUILT_IN_RINT:
27832 /* The round insn does not trap on denormals. */
27833 if (flag_trapping_math || !TARGET_ROUND)
27836 if (out_mode == DFmode && in_mode == DFmode)
27838 if (out_n == 2 && in_n == 2)
27839 return ix86_builtins[IX86_BUILTIN_RINTPD];
27840 else if (out_n == 4 && in_n == 4)
27841 return ix86_builtins[IX86_BUILTIN_RINTPD256];
27845 case BUILT_IN_RINTF:
27846 /* The round insn does not trap on denormals. */
27847 if (flag_trapping_math || !TARGET_ROUND)
27850 if (out_mode == SFmode && in_mode == SFmode)
27852 if (out_n == 4 && in_n == 4)
27853 return ix86_builtins[IX86_BUILTIN_RINTPS];
27854 else if (out_n == 8 && in_n == 8)
27855 return ix86_builtins[IX86_BUILTIN_RINTPS256];
27860 if (out_mode == DFmode && in_mode == DFmode)
27862 if (out_n == 2 && in_n == 2)
27863 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27864 if (out_n == 4 && in_n == 4)
27865 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27869 case BUILT_IN_FMAF:
27870 if (out_mode == SFmode && in_mode == SFmode)
27872 if (out_n == 4 && in_n == 4)
27873 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27874 if (out_n == 8 && in_n == 8)
27875 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27883 /* Dispatch to a handler for a vectorization library. */
27884 if (ix86_veclib_handler)
27885 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27891 /* Handler for an SVML-style interface to
27892 a library with vectorized intrinsics. */
27895 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27898 tree fntype, new_fndecl, args;
27901 enum machine_mode el_mode, in_mode;
27904 /* The SVML is suitable for unsafe math only. */
27905 if (!flag_unsafe_math_optimizations)
27908 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27909 n = TYPE_VECTOR_SUBPARTS (type_out);
27910 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27911 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27912 if (el_mode != in_mode
27920 case BUILT_IN_LOG10:
27922 case BUILT_IN_TANH:
27924 case BUILT_IN_ATAN:
27925 case BUILT_IN_ATAN2:
27926 case BUILT_IN_ATANH:
27927 case BUILT_IN_CBRT:
27928 case BUILT_IN_SINH:
27930 case BUILT_IN_ASINH:
27931 case BUILT_IN_ASIN:
27932 case BUILT_IN_COSH:
27934 case BUILT_IN_ACOSH:
27935 case BUILT_IN_ACOS:
27936 if (el_mode != DFmode || n != 2)
27940 case BUILT_IN_EXPF:
27941 case BUILT_IN_LOGF:
27942 case BUILT_IN_LOG10F:
27943 case BUILT_IN_POWF:
27944 case BUILT_IN_TANHF:
27945 case BUILT_IN_TANF:
27946 case BUILT_IN_ATANF:
27947 case BUILT_IN_ATAN2F:
27948 case BUILT_IN_ATANHF:
27949 case BUILT_IN_CBRTF:
27950 case BUILT_IN_SINHF:
27951 case BUILT_IN_SINF:
27952 case BUILT_IN_ASINHF:
27953 case BUILT_IN_ASINF:
27954 case BUILT_IN_COSHF:
27955 case BUILT_IN_COSF:
27956 case BUILT_IN_ACOSHF:
27957 case BUILT_IN_ACOSF:
27958 if (el_mode != SFmode || n != 4)
27966 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27968 if (fn == BUILT_IN_LOGF)
27969 strcpy (name, "vmlsLn4");
27970 else if (fn == BUILT_IN_LOG)
27971 strcpy (name, "vmldLn2");
27974 sprintf (name, "vmls%s", bname+10);
27975 name[strlen (name)-1] = '4';
27978 sprintf (name, "vmld%s2", bname+10);
27980 /* Convert to uppercase. */
27984 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27985 args = TREE_CHAIN (args))
27989 fntype = build_function_type_list (type_out, type_in, NULL);
27991 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27993 /* Build a function declaration for the vectorized function. */
27994 new_fndecl = build_decl (BUILTINS_LOCATION,
27995 FUNCTION_DECL, get_identifier (name), fntype);
27996 TREE_PUBLIC (new_fndecl) = 1;
27997 DECL_EXTERNAL (new_fndecl) = 1;
27998 DECL_IS_NOVOPS (new_fndecl) = 1;
27999 TREE_READONLY (new_fndecl) = 1;
28004 /* Handler for an ACML-style interface to
28005 a library with vectorized intrinsics. */
28008 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
28010 char name[20] = "__vr.._";
28011 tree fntype, new_fndecl, args;
28014 enum machine_mode el_mode, in_mode;
28017 /* The ACML is 64bits only and suitable for unsafe math only as
28018 it does not correctly support parts of IEEE with the required
28019 precision such as denormals. */
28021 || !flag_unsafe_math_optimizations)
28024 el_mode = TYPE_MODE (TREE_TYPE (type_out));
28025 n = TYPE_VECTOR_SUBPARTS (type_out);
28026 in_mode = TYPE_MODE (TREE_TYPE (type_in));
28027 in_n = TYPE_VECTOR_SUBPARTS (type_in);
28028 if (el_mode != in_mode
28038 case BUILT_IN_LOG2:
28039 case BUILT_IN_LOG10:
28042 if (el_mode != DFmode
28047 case BUILT_IN_SINF:
28048 case BUILT_IN_COSF:
28049 case BUILT_IN_EXPF:
28050 case BUILT_IN_POWF:
28051 case BUILT_IN_LOGF:
28052 case BUILT_IN_LOG2F:
28053 case BUILT_IN_LOG10F:
28056 if (el_mode != SFmode
28065 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
28066 sprintf (name + 7, "%s", bname+10);
28069 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
28070 args = TREE_CHAIN (args))
28074 fntype = build_function_type_list (type_out, type_in, NULL);
28076 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
28078 /* Build a function declaration for the vectorized function. */
28079 new_fndecl = build_decl (BUILTINS_LOCATION,
28080 FUNCTION_DECL, get_identifier (name), fntype);
28081 TREE_PUBLIC (new_fndecl) = 1;
28082 DECL_EXTERNAL (new_fndecl) = 1;
28083 DECL_IS_NOVOPS (new_fndecl) = 1;
28084 TREE_READONLY (new_fndecl) = 1;
28090 /* Returns a decl of a function that implements conversion of an integer vector
28091 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
28092 are the types involved when converting according to CODE.
28093 Return NULL_TREE if it is not available. */
28096 ix86_vectorize_builtin_conversion (unsigned int code,
28097 tree dest_type, tree src_type)
28105 switch (TYPE_MODE (src_type))
28108 switch (TYPE_MODE (dest_type))
28111 return (TYPE_UNSIGNED (src_type)
28112 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
28113 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
28115 return (TYPE_UNSIGNED (src_type)
28117 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
28123 switch (TYPE_MODE (dest_type))
28126 return (TYPE_UNSIGNED (src_type)
28128 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
28137 case FIX_TRUNC_EXPR:
28138 switch (TYPE_MODE (dest_type))
28141 switch (TYPE_MODE (src_type))
28144 return (TYPE_UNSIGNED (dest_type)
28146 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
28148 return (TYPE_UNSIGNED (dest_type)
28150 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
28157 switch (TYPE_MODE (src_type))
28160 return (TYPE_UNSIGNED (dest_type)
28162 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
28179 /* Returns a code for a target-specific builtin that implements
28180 reciprocal of the function, or NULL_TREE if not available. */
28183 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
28184 bool sqrt ATTRIBUTE_UNUSED)
28186 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
28187 && flag_finite_math_only && !flag_trapping_math
28188 && flag_unsafe_math_optimizations))
28192 /* Machine dependent builtins. */
28195 /* Vectorized version of sqrt to rsqrt conversion. */
28196 case IX86_BUILTIN_SQRTPS_NR:
28197 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
28199 case IX86_BUILTIN_SQRTPS_NR256:
28200 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
28206 /* Normal builtins. */
28209 /* Sqrt to rsqrt conversion. */
28210 case BUILT_IN_SQRTF:
28211 return ix86_builtins[IX86_BUILTIN_RSQRTF];
28218 /* Helper for avx_vpermilps256_operand et al. This is also used by
28219 the expansion functions to turn the parallel back into a mask.
28220 The return value is 0 for no match and the imm8+1 for a match. */
28223 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
28225 unsigned i, nelt = GET_MODE_NUNITS (mode);
28227 unsigned char ipar[8];
28229 if (XVECLEN (par, 0) != (int) nelt)
28232 /* Validate that all of the elements are constants, and not totally
28233 out of range. Copy the data into an integral array to make the
28234 subsequent checks easier. */
28235 for (i = 0; i < nelt; ++i)
28237 rtx er = XVECEXP (par, 0, i);
28238 unsigned HOST_WIDE_INT ei;
28240 if (!CONST_INT_P (er))
28251 /* In the 256-bit DFmode case, we can only move elements within
28253 for (i = 0; i < 2; ++i)
28257 mask |= ipar[i] << i;
28259 for (i = 2; i < 4; ++i)
28263 mask |= (ipar[i] - 2) << i;
28268 /* In the 256-bit SFmode case, we have full freedom of movement
28269 within the low 128-bit lane, but the high 128-bit lane must
28270 mirror the exact same pattern. */
28271 for (i = 0; i < 4; ++i)
28272 if (ipar[i] + 4 != ipar[i + 4])
28279 /* In the 128-bit case, we've full freedom in the placement of
28280 the elements from the source operand. */
28281 for (i = 0; i < nelt; ++i)
28282 mask |= ipar[i] << (i * (nelt / 2));
28286 gcc_unreachable ();
28289 /* Make sure success has a non-zero value by adding one. */
28293 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
28294 the expansion functions to turn the parallel back into a mask.
28295 The return value is 0 for no match and the imm8+1 for a match. */
28298 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
28300 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
28302 unsigned char ipar[8];
28304 if (XVECLEN (par, 0) != (int) nelt)
28307 /* Validate that all of the elements are constants, and not totally
28308 out of range. Copy the data into an integral array to make the
28309 subsequent checks easier. */
28310 for (i = 0; i < nelt; ++i)
28312 rtx er = XVECEXP (par, 0, i);
28313 unsigned HOST_WIDE_INT ei;
28315 if (!CONST_INT_P (er))
28318 if (ei >= 2 * nelt)
28323 /* Validate that the halves of the permute are halves. */
28324 for (i = 0; i < nelt2 - 1; ++i)
28325 if (ipar[i] + 1 != ipar[i + 1])
28327 for (i = nelt2; i < nelt - 1; ++i)
28328 if (ipar[i] + 1 != ipar[i + 1])
28331 /* Reconstruct the mask. */
28332 for (i = 0; i < 2; ++i)
28334 unsigned e = ipar[i * nelt2];
28338 mask |= e << (i * 4);
28341 /* Make sure success has a non-zero value by adding one. */
28346 /* Store OPERAND to the memory after reload is completed. This means
28347 that we can't easily use assign_stack_local. */
28349 ix86_force_to_memory (enum machine_mode mode, rtx operand)
28353 gcc_assert (reload_completed);
28354 if (ix86_using_red_zone ())
28356 result = gen_rtx_MEM (mode,
28357 gen_rtx_PLUS (Pmode,
28359 GEN_INT (-RED_ZONE_SIZE)));
28360 emit_move_insn (result, operand);
28362 else if (TARGET_64BIT)
28368 operand = gen_lowpart (DImode, operand);
28372 gen_rtx_SET (VOIDmode,
28373 gen_rtx_MEM (DImode,
28374 gen_rtx_PRE_DEC (DImode,
28375 stack_pointer_rtx)),
28379 gcc_unreachable ();
28381 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28390 split_double_mode (mode, &operand, 1, operands, operands + 1);
28392 gen_rtx_SET (VOIDmode,
28393 gen_rtx_MEM (SImode,
28394 gen_rtx_PRE_DEC (Pmode,
28395 stack_pointer_rtx)),
28398 gen_rtx_SET (VOIDmode,
28399 gen_rtx_MEM (SImode,
28400 gen_rtx_PRE_DEC (Pmode,
28401 stack_pointer_rtx)),
28406 /* Store HImodes as SImodes. */
28407 operand = gen_lowpart (SImode, operand);
28411 gen_rtx_SET (VOIDmode,
28412 gen_rtx_MEM (GET_MODE (operand),
28413 gen_rtx_PRE_DEC (SImode,
28414 stack_pointer_rtx)),
28418 gcc_unreachable ();
28420 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28425 /* Free operand from the memory. */
28427 ix86_free_from_memory (enum machine_mode mode)
28429 if (!ix86_using_red_zone ())
28433 if (mode == DImode || TARGET_64BIT)
28437 /* Use LEA to deallocate stack space. In peephole2 it will be converted
28438 to pop or add instruction if registers are available. */
28439 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
28440 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28445 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
28447 Put float CONST_DOUBLE in the constant pool instead of fp regs.
28448 QImode must go into class Q_REGS.
28449 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
28450 movdf to do mem-to-mem moves through integer regs. */
28453 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
28455 enum machine_mode mode = GET_MODE (x);
28457 /* We're only allowed to return a subclass of CLASS. Many of the
28458 following checks fail for NO_REGS, so eliminate that early. */
28459 if (regclass == NO_REGS)
28462 /* All classes can load zeros. */
28463 if (x == CONST0_RTX (mode))
28466 /* Force constants into memory if we are loading a (nonzero) constant into
28467 an MMX or SSE register. This is because there are no MMX/SSE instructions
28468 to load from a constant. */
28470 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
28473 /* Prefer SSE regs only, if we can use them for math. */
28474 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
28475 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
28477 /* Floating-point constants need more complex checks. */
28478 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
28480 /* General regs can load everything. */
28481 if (reg_class_subset_p (regclass, GENERAL_REGS))
28484 /* Floats can load 0 and 1 plus some others. Note that we eliminated
28485 zero above. We only want to wind up preferring 80387 registers if
28486 we plan on doing computation with them. */
28488 && standard_80387_constant_p (x))
28490 /* Limit class to non-sse. */
28491 if (regclass == FLOAT_SSE_REGS)
28493 if (regclass == FP_TOP_SSE_REGS)
28495 if (regclass == FP_SECOND_SSE_REGS)
28496 return FP_SECOND_REG;
28497 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
28504 /* Generally when we see PLUS here, it's the function invariant
28505 (plus soft-fp const_int). Which can only be computed into general
28507 if (GET_CODE (x) == PLUS)
28508 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
28510 /* QImode constants are easy to load, but non-constant QImode data
28511 must go into Q_REGS. */
28512 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
28514 if (reg_class_subset_p (regclass, Q_REGS))
28516 if (reg_class_subset_p (Q_REGS, regclass))
28524 /* Discourage putting floating-point values in SSE registers unless
28525 SSE math is being used, and likewise for the 387 registers. */
28527 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
28529 enum machine_mode mode = GET_MODE (x);
28531 /* Restrict the output reload class to the register bank that we are doing
28532 math on. If we would like not to return a subset of CLASS, reject this
28533 alternative: if reload cannot do this, it will still use its choice. */
28534 mode = GET_MODE (x);
28535 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
28536 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
28538 if (X87_FLOAT_MODE_P (mode))
28540 if (regclass == FP_TOP_SSE_REGS)
28542 else if (regclass == FP_SECOND_SSE_REGS)
28543 return FP_SECOND_REG;
28545 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
28552 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
28553 enum machine_mode mode,
28554 secondary_reload_info *sri ATTRIBUTE_UNUSED)
28556 /* QImode spills from non-QI registers require
28557 intermediate register on 32bit targets. */
28559 && !in_p && mode == QImode
28560 && (rclass == GENERAL_REGS
28561 || rclass == LEGACY_REGS
28562 || rclass == INDEX_REGS))
28571 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28572 regno = true_regnum (x);
28574 /* Return Q_REGS if the operand is in memory. */
28579 /* This condition handles corner case where an expression involving
28580 pointers gets vectorized. We're trying to use the address of a
28581 stack slot as a vector initializer.
28583 (set (reg:V2DI 74 [ vect_cst_.2 ])
28584 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
28586 Eventually frame gets turned into sp+offset like this:
28588 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28589 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28590 (const_int 392 [0x188]))))
28592 That later gets turned into:
28594 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28595 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28596 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
28598 We'll have the following reload recorded:
28600 Reload 0: reload_in (DI) =
28601 (plus:DI (reg/f:DI 7 sp)
28602 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
28603 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28604 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
28605 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
28606 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28607 reload_reg_rtx: (reg:V2DI 22 xmm1)
28609 Which isn't going to work since SSE instructions can't handle scalar
28610 additions. Returning GENERAL_REGS forces the addition into integer
28611 register and reload can handle subsequent reloads without problems. */
28613 if (in_p && GET_CODE (x) == PLUS
28614 && SSE_CLASS_P (rclass)
28615 && SCALAR_INT_MODE_P (mode))
28616 return GENERAL_REGS;
28621 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28624 ix86_class_likely_spilled_p (reg_class_t rclass)
28635 case SSE_FIRST_REG:
28637 case FP_SECOND_REG:
28647 /* If we are copying between general and FP registers, we need a memory
28648 location. The same is true for SSE and MMX registers.
28650 To optimize register_move_cost performance, allow inline variant.
28652 The macro can't work reliably when one of the CLASSES is class containing
28653 registers from multiple units (SSE, MMX, integer). We avoid this by never
28654 combining those units in single alternative in the machine description.
28655 Ensure that this constraint holds to avoid unexpected surprises.
28657 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28658 enforce these sanity checks. */
28661 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28662 enum machine_mode mode, int strict)
28664 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
28665 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
28666 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
28667 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
28668 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
28669 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
28671 gcc_assert (!strict);
28675 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
28678 /* ??? This is a lie. We do have moves between mmx/general, and for
28679 mmx/sse2. But by saying we need secondary memory we discourage the
28680 register allocator from using the mmx registers unless needed. */
28681 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
28684 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28686 /* SSE1 doesn't have any direct moves from other classes. */
28690 /* If the target says that inter-unit moves are more expensive
28691 than moving through memory, then don't generate them. */
28692 if (!TARGET_INTER_UNIT_MOVES)
28695 /* Between SSE and general, we have moves no larger than word size. */
28696 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
28704 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28705 enum machine_mode mode, int strict)
28707 return inline_secondary_memory_needed (class1, class2, mode, strict);
28710 /* Return true if the registers in CLASS cannot represent the change from
28711 modes FROM to TO. */
28714 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
28715 enum reg_class regclass)
28720 /* x87 registers can't do subreg at all, as all values are reformatted
28721 to extended precision. */
28722 if (MAYBE_FLOAT_CLASS_P (regclass))
28725 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
28727 /* Vector registers do not support QI or HImode loads. If we don't
28728 disallow a change to these modes, reload will assume it's ok to
28729 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28730 the vec_dupv4hi pattern. */
28731 if (GET_MODE_SIZE (from) < 4)
28734 /* Vector registers do not support subreg with nonzero offsets, which
28735 are otherwise valid for integer registers. Since we can't see
28736 whether we have a nonzero offset from here, prohibit all
28737 nonparadoxical subregs changing size. */
28738 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
28745 /* Return the cost of moving data of mode M between a
28746 register and memory. A value of 2 is the default; this cost is
28747 relative to those in `REGISTER_MOVE_COST'.
28749 This function is used extensively by register_move_cost that is used to
28750 build tables at startup. Make it inline in this case.
28751 When IN is 2, return maximum of in and out move cost.
28753 If moving between registers and memory is more expensive than
28754 between two registers, you should define this macro to express the
28757 Model also increased moving costs of QImode registers in non
28761 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28765 if (FLOAT_CLASS_P (regclass))
28783 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28784 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28786 if (SSE_CLASS_P (regclass))
28789 switch (GET_MODE_SIZE (mode))
28804 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28805 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28807 if (MMX_CLASS_P (regclass))
28810 switch (GET_MODE_SIZE (mode))
28822 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28823 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28825 switch (GET_MODE_SIZE (mode))
28828 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28831 return ix86_cost->int_store[0];
28832 if (TARGET_PARTIAL_REG_DEPENDENCY
28833 && optimize_function_for_speed_p (cfun))
28834 cost = ix86_cost->movzbl_load;
28836 cost = ix86_cost->int_load[0];
28838 return MAX (cost, ix86_cost->int_store[0]);
28844 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28846 return ix86_cost->movzbl_load;
28848 return ix86_cost->int_store[0] + 4;
28853 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28854 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28856 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28857 if (mode == TFmode)
28860 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28862 cost = ix86_cost->int_load[2];
28864 cost = ix86_cost->int_store[2];
28865 return (cost * (((int) GET_MODE_SIZE (mode)
28866 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28871 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28874 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28878 /* Return the cost of moving data from a register in class CLASS1 to
28879 one in class CLASS2.
28881 It is not required that the cost always equal 2 when FROM is the same as TO;
28882 on some machines it is expensive to move between registers if they are not
28883 general registers. */
28886 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28887 reg_class_t class2_i)
28889 enum reg_class class1 = (enum reg_class) class1_i;
28890 enum reg_class class2 = (enum reg_class) class2_i;
28892 /* In case we require secondary memory, compute cost of the store followed
28893 by load. In order to avoid bad register allocation choices, we need
28894 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28896 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28900 cost += inline_memory_move_cost (mode, class1, 2);
28901 cost += inline_memory_move_cost (mode, class2, 2);
28903 /* In case of copying from general_purpose_register we may emit multiple
28904 stores followed by single load causing memory size mismatch stall.
28905 Count this as arbitrarily high cost of 20. */
28906 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
28909 /* In the case of FP/MMX moves, the registers actually overlap, and we
28910 have to switch modes in order to treat them differently. */
28911 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28912 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28918 /* Moves between SSE/MMX and integer unit are expensive. */
28919 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28920 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28922 /* ??? By keeping returned value relatively high, we limit the number
28923 of moves between integer and MMX/SSE registers for all targets.
28924 Additionally, high value prevents problem with x86_modes_tieable_p(),
28925 where integer modes in MMX/SSE registers are not tieable
28926 because of missing QImode and HImode moves to, from or between
28927 MMX/SSE registers. */
28928 return MAX (8, ix86_cost->mmxsse_to_integer);
28930 if (MAYBE_FLOAT_CLASS_P (class1))
28931 return ix86_cost->fp_move;
28932 if (MAYBE_SSE_CLASS_P (class1))
28933 return ix86_cost->sse_move;
28934 if (MAYBE_MMX_CLASS_P (class1))
28935 return ix86_cost->mmx_move;
28939 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
28942 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28944 /* Flags and only flags can only hold CCmode values. */
28945 if (CC_REGNO_P (regno))
28946 return GET_MODE_CLASS (mode) == MODE_CC;
28947 if (GET_MODE_CLASS (mode) == MODE_CC
28948 || GET_MODE_CLASS (mode) == MODE_RANDOM
28949 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28951 if (FP_REGNO_P (regno))
28952 return VALID_FP_MODE_P (mode);
28953 if (SSE_REGNO_P (regno))
28955 /* We implement the move patterns for all vector modes into and
28956 out of SSE registers, even when no operation instructions
28957 are available. OImode move is available only when AVX is
28959 return ((TARGET_AVX && mode == OImode)
28960 || VALID_AVX256_REG_MODE (mode)
28961 || VALID_SSE_REG_MODE (mode)
28962 || VALID_SSE2_REG_MODE (mode)
28963 || VALID_MMX_REG_MODE (mode)
28964 || VALID_MMX_REG_MODE_3DNOW (mode));
28966 if (MMX_REGNO_P (regno))
28968 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28969 so if the register is available at all, then we can move data of
28970 the given mode into or out of it. */
28971 return (VALID_MMX_REG_MODE (mode)
28972 || VALID_MMX_REG_MODE_3DNOW (mode));
28975 if (mode == QImode)
28977 /* Take care for QImode values - they can be in non-QI regs,
28978 but then they do cause partial register stalls. */
28979 if (regno <= BX_REG || TARGET_64BIT)
28981 if (!TARGET_PARTIAL_REG_STALL)
28983 return reload_in_progress || reload_completed;
28985 /* We handle both integer and floats in the general purpose registers. */
28986 else if (VALID_INT_MODE_P (mode))
28988 else if (VALID_FP_MODE_P (mode))
28990 else if (VALID_DFP_MODE_P (mode))
28992 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
28993 on to use that value in smaller contexts, this can easily force a
28994 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
28995 supporting DImode, allow it. */
28996 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
29002 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
29003 tieable integer mode. */
29006 ix86_tieable_integer_mode_p (enum machine_mode mode)
29015 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
29018 return TARGET_64BIT;
29025 /* Return true if MODE1 is accessible in a register that can hold MODE2
29026 without copying. That is, all register classes that can hold MODE2
29027 can also hold MODE1. */
29030 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
29032 if (mode1 == mode2)
29035 if (ix86_tieable_integer_mode_p (mode1)
29036 && ix86_tieable_integer_mode_p (mode2))
29039 /* MODE2 being XFmode implies fp stack or general regs, which means we
29040 can tie any smaller floating point modes to it. Note that we do not
29041 tie this with TFmode. */
29042 if (mode2 == XFmode)
29043 return mode1 == SFmode || mode1 == DFmode;
29045 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
29046 that we can tie it with SFmode. */
29047 if (mode2 == DFmode)
29048 return mode1 == SFmode;
29050 /* If MODE2 is only appropriate for an SSE register, then tie with
29051 any other mode acceptable to SSE registers. */
29052 if (GET_MODE_SIZE (mode2) == 16
29053 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
29054 return (GET_MODE_SIZE (mode1) == 16
29055 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
29057 /* If MODE2 is appropriate for an MMX register, then tie
29058 with any other mode acceptable to MMX registers. */
29059 if (GET_MODE_SIZE (mode2) == 8
29060 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
29061 return (GET_MODE_SIZE (mode1) == 8
29062 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
29067 /* Compute a (partial) cost for rtx X. Return true if the complete
29068 cost has been computed, and false if subexpressions should be
29069 scanned. In either case, *TOTAL contains the cost result. */
29072 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
29074 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
29075 enum machine_mode mode = GET_MODE (x);
29076 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
29084 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
29086 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
29088 else if (flag_pic && SYMBOLIC_CONST (x)
29090 || (!GET_CODE (x) != LABEL_REF
29091 && (GET_CODE (x) != SYMBOL_REF
29092 || !SYMBOL_REF_LOCAL_P (x)))))
29099 if (mode == VOIDmode)
29102 switch (standard_80387_constant_p (x))
29107 default: /* Other constants */
29112 /* Start with (MEM (SYMBOL_REF)), since that's where
29113 it'll probably end up. Add a penalty for size. */
29114 *total = (COSTS_N_INSNS (1)
29115 + (flag_pic != 0 && !TARGET_64BIT)
29116 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
29122 /* The zero extensions is often completely free on x86_64, so make
29123 it as cheap as possible. */
29124 if (TARGET_64BIT && mode == DImode
29125 && GET_MODE (XEXP (x, 0)) == SImode)
29127 else if (TARGET_ZERO_EXTEND_WITH_AND)
29128 *total = cost->add;
29130 *total = cost->movzx;
29134 *total = cost->movsx;
29138 if (CONST_INT_P (XEXP (x, 1))
29139 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
29141 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
29144 *total = cost->add;
29147 if ((value == 2 || value == 3)
29148 && cost->lea <= cost->shift_const)
29150 *total = cost->lea;
29160 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
29162 if (CONST_INT_P (XEXP (x, 1)))
29164 if (INTVAL (XEXP (x, 1)) > 32)
29165 *total = cost->shift_const + COSTS_N_INSNS (2);
29167 *total = cost->shift_const * 2;
29171 if (GET_CODE (XEXP (x, 1)) == AND)
29172 *total = cost->shift_var * 2;
29174 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
29179 if (CONST_INT_P (XEXP (x, 1)))
29180 *total = cost->shift_const;
29182 *total = cost->shift_var;
29190 gcc_assert (FLOAT_MODE_P (mode));
29191 gcc_assert (TARGET_FMA || TARGET_FMA4);
29193 /* ??? SSE scalar/vector cost should be used here. */
29194 /* ??? Bald assumption that fma has the same cost as fmul. */
29195 *total = cost->fmul;
29196 *total += rtx_cost (XEXP (x, 1), FMA, speed);
29198 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
29200 if (GET_CODE (sub) == NEG)
29202 *total += rtx_cost (sub, FMA, speed);
29205 if (GET_CODE (sub) == NEG)
29207 *total += rtx_cost (sub, FMA, speed);
29212 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29214 /* ??? SSE scalar cost should be used here. */
29215 *total = cost->fmul;
29218 else if (X87_FLOAT_MODE_P (mode))
29220 *total = cost->fmul;
29223 else if (FLOAT_MODE_P (mode))
29225 /* ??? SSE vector cost should be used here. */
29226 *total = cost->fmul;
29231 rtx op0 = XEXP (x, 0);
29232 rtx op1 = XEXP (x, 1);
29234 if (CONST_INT_P (XEXP (x, 1)))
29236 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
29237 for (nbits = 0; value != 0; value &= value - 1)
29241 /* This is arbitrary. */
29244 /* Compute costs correctly for widening multiplication. */
29245 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
29246 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
29247 == GET_MODE_SIZE (mode))
29249 int is_mulwiden = 0;
29250 enum machine_mode inner_mode = GET_MODE (op0);
29252 if (GET_CODE (op0) == GET_CODE (op1))
29253 is_mulwiden = 1, op1 = XEXP (op1, 0);
29254 else if (CONST_INT_P (op1))
29256 if (GET_CODE (op0) == SIGN_EXTEND)
29257 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
29260 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
29264 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
29267 *total = (cost->mult_init[MODE_INDEX (mode)]
29268 + nbits * cost->mult_bit
29269 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
29278 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29279 /* ??? SSE cost should be used here. */
29280 *total = cost->fdiv;
29281 else if (X87_FLOAT_MODE_P (mode))
29282 *total = cost->fdiv;
29283 else if (FLOAT_MODE_P (mode))
29284 /* ??? SSE vector cost should be used here. */
29285 *total = cost->fdiv;
29287 *total = cost->divide[MODE_INDEX (mode)];
29291 if (GET_MODE_CLASS (mode) == MODE_INT
29292 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
29294 if (GET_CODE (XEXP (x, 0)) == PLUS
29295 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
29296 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
29297 && CONSTANT_P (XEXP (x, 1)))
29299 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
29300 if (val == 2 || val == 4 || val == 8)
29302 *total = cost->lea;
29303 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
29304 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
29305 outer_code, speed);
29306 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29310 else if (GET_CODE (XEXP (x, 0)) == MULT
29311 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
29313 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
29314 if (val == 2 || val == 4 || val == 8)
29316 *total = cost->lea;
29317 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
29318 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29322 else if (GET_CODE (XEXP (x, 0)) == PLUS)
29324 *total = cost->lea;
29325 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
29326 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
29327 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29334 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29336 /* ??? SSE cost should be used here. */
29337 *total = cost->fadd;
29340 else if (X87_FLOAT_MODE_P (mode))
29342 *total = cost->fadd;
29345 else if (FLOAT_MODE_P (mode))
29347 /* ??? SSE vector cost should be used here. */
29348 *total = cost->fadd;
29356 if (!TARGET_64BIT && mode == DImode)
29358 *total = (cost->add * 2
29359 + (rtx_cost (XEXP (x, 0), outer_code, speed)
29360 << (GET_MODE (XEXP (x, 0)) != DImode))
29361 + (rtx_cost (XEXP (x, 1), outer_code, speed)
29362 << (GET_MODE (XEXP (x, 1)) != DImode)));
29368 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29370 /* ??? SSE cost should be used here. */
29371 *total = cost->fchs;
29374 else if (X87_FLOAT_MODE_P (mode))
29376 *total = cost->fchs;
29379 else if (FLOAT_MODE_P (mode))
29381 /* ??? SSE vector cost should be used here. */
29382 *total = cost->fchs;
29388 if (!TARGET_64BIT && mode == DImode)
29389 *total = cost->add * 2;
29391 *total = cost->add;
29395 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
29396 && XEXP (XEXP (x, 0), 1) == const1_rtx
29397 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
29398 && XEXP (x, 1) == const0_rtx)
29400 /* This kind of construct is implemented using test[bwl].
29401 Treat it as if we had an AND. */
29402 *total = (cost->add
29403 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
29404 + rtx_cost (const1_rtx, outer_code, speed));
29410 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
29415 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29416 /* ??? SSE cost should be used here. */
29417 *total = cost->fabs;
29418 else if (X87_FLOAT_MODE_P (mode))
29419 *total = cost->fabs;
29420 else if (FLOAT_MODE_P (mode))
29421 /* ??? SSE vector cost should be used here. */
29422 *total = cost->fabs;
29426 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29427 /* ??? SSE cost should be used here. */
29428 *total = cost->fsqrt;
29429 else if (X87_FLOAT_MODE_P (mode))
29430 *total = cost->fsqrt;
29431 else if (FLOAT_MODE_P (mode))
29432 /* ??? SSE vector cost should be used here. */
29433 *total = cost->fsqrt;
29437 if (XINT (x, 1) == UNSPEC_TP)
29444 case VEC_DUPLICATE:
29445 /* ??? Assume all of these vector manipulation patterns are
29446 recognizable. In which case they all pretty much have the
29448 *total = COSTS_N_INSNS (1);
29458 static int current_machopic_label_num;
29460 /* Given a symbol name and its associated stub, write out the
29461 definition of the stub. */
29464 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29466 unsigned int length;
29467 char *binder_name, *symbol_name, lazy_ptr_name[32];
29468 int label = ++current_machopic_label_num;
29470 /* For 64-bit we shouldn't get here. */
29471 gcc_assert (!TARGET_64BIT);
29473 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29474 symb = targetm.strip_name_encoding (symb);
29476 length = strlen (stub);
29477 binder_name = XALLOCAVEC (char, length + 32);
29478 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
29480 length = strlen (symb);
29481 symbol_name = XALLOCAVEC (char, length + 32);
29482 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29484 sprintf (lazy_ptr_name, "L%d$lz", label);
29486 if (MACHOPIC_ATT_STUB)
29487 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
29488 else if (MACHOPIC_PURE)
29490 if (TARGET_DEEP_BRANCH_PREDICTION)
29491 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
29493 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
29496 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
29498 fprintf (file, "%s:\n", stub);
29499 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29501 if (MACHOPIC_ATT_STUB)
29503 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
29505 else if (MACHOPIC_PURE)
29508 if (TARGET_DEEP_BRANCH_PREDICTION)
29510 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29511 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
29512 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
29513 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
29517 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
29518 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
29519 fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
29521 fprintf (file, "\tjmp\t*%%ecx\n");
29524 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
29526 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
29527 it needs no stub-binding-helper. */
29528 if (MACHOPIC_ATT_STUB)
29531 fprintf (file, "%s:\n", binder_name);
29535 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
29536 fprintf (file, "\tpushl\t%%ecx\n");
29539 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
29541 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
29543 /* N.B. Keep the correspondence of these
29544 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29545 old-pic/new-pic/non-pic stubs; altering this will break
29546 compatibility with existing dylibs. */
29550 if (TARGET_DEEP_BRANCH_PREDICTION)
29551 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29552 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
29554 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
29555 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29558 /* 16-byte -mdynamic-no-pic stub. */
29559 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
29561 fprintf (file, "%s:\n", lazy_ptr_name);
29562 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29563 fprintf (file, ASM_LONG "%s\n", binder_name);
29565 #endif /* TARGET_MACHO */
29567 /* Order the registers for register allocator. */
29570 x86_order_regs_for_local_alloc (void)
29575 /* First allocate the local general purpose registers. */
29576 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29577 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29578 reg_alloc_order [pos++] = i;
29580 /* Global general purpose registers. */
29581 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29582 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29583 reg_alloc_order [pos++] = i;
29585 /* x87 registers come first in case we are doing FP math
29587 if (!TARGET_SSE_MATH)
29588 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29589 reg_alloc_order [pos++] = i;
29591 /* SSE registers. */
29592 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29593 reg_alloc_order [pos++] = i;
29594 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29595 reg_alloc_order [pos++] = i;
29597 /* x87 registers. */
29598 if (TARGET_SSE_MATH)
29599 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29600 reg_alloc_order [pos++] = i;
29602 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29603 reg_alloc_order [pos++] = i;
29605 /* Initialize the rest of array as we do not allocate some registers
29607 while (pos < FIRST_PSEUDO_REGISTER)
29608 reg_alloc_order [pos++] = 0;
29611 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29612 in struct attribute_spec handler. */
29614 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
29616 int flags ATTRIBUTE_UNUSED,
29617 bool *no_add_attrs)
29619 if (TREE_CODE (*node) != FUNCTION_TYPE
29620 && TREE_CODE (*node) != METHOD_TYPE
29621 && TREE_CODE (*node) != FIELD_DECL
29622 && TREE_CODE (*node) != TYPE_DECL)
29624 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29626 *no_add_attrs = true;
29631 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
29633 *no_add_attrs = true;
29636 if (is_attribute_p ("callee_pop_aggregate_return", name))
29640 cst = TREE_VALUE (args);
29641 if (TREE_CODE (cst) != INTEGER_CST)
29643 warning (OPT_Wattributes,
29644 "%qE attribute requires an integer constant argument",
29646 *no_add_attrs = true;
29648 else if (compare_tree_int (cst, 0) != 0
29649 && compare_tree_int (cst, 1) != 0)
29651 warning (OPT_Wattributes,
29652 "argument to %qE attribute is neither zero, nor one",
29654 *no_add_attrs = true;
29663 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29664 struct attribute_spec.handler. */
29666 ix86_handle_abi_attribute (tree *node, tree name,
29667 tree args ATTRIBUTE_UNUSED,
29668 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29670 if (TREE_CODE (*node) != FUNCTION_TYPE
29671 && TREE_CODE (*node) != METHOD_TYPE
29672 && TREE_CODE (*node) != FIELD_DECL
29673 && TREE_CODE (*node) != TYPE_DECL)
29675 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29677 *no_add_attrs = true;
29682 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
29684 *no_add_attrs = true;
29688 /* Can combine regparm with all attributes but fastcall. */
29689 if (is_attribute_p ("ms_abi", name))
29691 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
29693 error ("ms_abi and sysv_abi attributes are not compatible");
29698 else if (is_attribute_p ("sysv_abi", name))
29700 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
29702 error ("ms_abi and sysv_abi attributes are not compatible");
29711 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29712 struct attribute_spec.handler. */
29714 ix86_handle_struct_attribute (tree *node, tree name,
29715 tree args ATTRIBUTE_UNUSED,
29716 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29719 if (DECL_P (*node))
29721 if (TREE_CODE (*node) == TYPE_DECL)
29722 type = &TREE_TYPE (*node);
29727 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29728 || TREE_CODE (*type) == UNION_TYPE)))
29730 warning (OPT_Wattributes, "%qE attribute ignored",
29732 *no_add_attrs = true;
29735 else if ((is_attribute_p ("ms_struct", name)
29736 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29737 || ((is_attribute_p ("gcc_struct", name)
29738 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29740 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29742 *no_add_attrs = true;
29749 ix86_handle_fndecl_attribute (tree *node, tree name,
29750 tree args ATTRIBUTE_UNUSED,
29751 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29753 if (TREE_CODE (*node) != FUNCTION_DECL)
29755 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29757 *no_add_attrs = true;
29763 ix86_ms_bitfield_layout_p (const_tree record_type)
29765 return ((TARGET_MS_BITFIELD_LAYOUT
29766 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29767 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
29770 /* Returns an expression indicating where the this parameter is
29771 located on entry to the FUNCTION. */
29774 x86_this_parameter (tree function)
29776 tree type = TREE_TYPE (function);
29777 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
29782 const int *parm_regs;
29784 if (ix86_function_type_abi (type) == MS_ABI)
29785 parm_regs = x86_64_ms_abi_int_parameter_registers;
29787 parm_regs = x86_64_int_parameter_registers;
29788 return gen_rtx_REG (DImode, parm_regs[aggr]);
29791 nregs = ix86_function_regparm (type, function);
29793 if (nregs > 0 && !stdarg_p (type))
29796 unsigned int ccvt = ix86_get_callcvt (type);
29798 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29799 regno = aggr ? DX_REG : CX_REG;
29800 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29804 return gen_rtx_MEM (SImode,
29805 plus_constant (stack_pointer_rtx, 4));
29814 return gen_rtx_MEM (SImode,
29815 plus_constant (stack_pointer_rtx, 4));
29818 return gen_rtx_REG (SImode, regno);
29821 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29824 /* Determine whether x86_output_mi_thunk can succeed. */
29827 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29828 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29829 HOST_WIDE_INT vcall_offset, const_tree function)
29831 /* 64-bit can handle anything. */
29835 /* For 32-bit, everything's fine if we have one free register. */
29836 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29839 /* Need a free register for vcall_offset. */
29843 /* Need a free register for GOT references. */
29844 if (flag_pic && !targetm.binds_local_p (function))
29847 /* Otherwise ok. */
29851 /* Output the assembler code for a thunk function. THUNK_DECL is the
29852 declaration for the thunk function itself, FUNCTION is the decl for
29853 the target function. DELTA is an immediate constant offset to be
29854 added to THIS. If VCALL_OFFSET is nonzero, the word at
29855 *(*this + vcall_offset) should be added to THIS. */
29858 x86_output_mi_thunk (FILE *file,
29859 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29860 HOST_WIDE_INT vcall_offset, tree function)
29863 rtx this_param = x86_this_parameter (function);
29866 /* Make sure unwind info is emitted for the thunk if needed. */
29867 final_start_function (emit_barrier (), file, 1);
29869 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29870 pull it in now and let DELTA benefit. */
29871 if (REG_P (this_param))
29872 this_reg = this_param;
29873 else if (vcall_offset)
29875 /* Put the this parameter into %eax. */
29876 xops[0] = this_param;
29877 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
29878 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29881 this_reg = NULL_RTX;
29883 /* Adjust the this parameter by a fixed constant. */
29886 xops[0] = GEN_INT (delta);
29887 xops[1] = this_reg ? this_reg : this_param;
29890 if (!x86_64_general_operand (xops[0], DImode))
29892 tmp = gen_rtx_REG (DImode, R10_REG);
29894 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
29896 xops[1] = this_param;
29898 if (x86_maybe_negate_const_int (&xops[0], DImode))
29899 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
29901 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
29903 else if (x86_maybe_negate_const_int (&xops[0], SImode))
29904 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
29906 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
29909 /* Adjust the this parameter by a value stored in the vtable. */
29913 tmp = gen_rtx_REG (DImode, R10_REG);
29916 int tmp_regno = CX_REG;
29917 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
29918 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
29919 tmp_regno = AX_REG;
29920 tmp = gen_rtx_REG (SImode, tmp_regno);
29923 xops[0] = gen_rtx_MEM (Pmode, this_reg);
29925 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29927 /* Adjust the this parameter. */
29928 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
29929 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
29931 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
29932 xops[0] = GEN_INT (vcall_offset);
29934 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
29935 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
29937 xops[1] = this_reg;
29938 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
29941 /* If necessary, drop THIS back to its stack slot. */
29942 if (this_reg && this_reg != this_param)
29944 xops[0] = this_reg;
29945 xops[1] = this_param;
29946 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29949 xops[0] = XEXP (DECL_RTL (function), 0);
29952 if (!flag_pic || targetm.binds_local_p (function)
29953 || DEFAULT_ABI == MS_ABI)
29954 output_asm_insn ("jmp\t%P0", xops);
29955 /* All thunks should be in the same object as their target,
29956 and thus binds_local_p should be true. */
29957 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
29958 gcc_unreachable ();
29961 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
29962 tmp = gen_rtx_CONST (Pmode, tmp);
29963 tmp = gen_rtx_MEM (QImode, tmp);
29965 output_asm_insn ("jmp\t%A0", xops);
29970 if (!flag_pic || targetm.binds_local_p (function))
29971 output_asm_insn ("jmp\t%P0", xops);
29976 rtx sym_ref = XEXP (DECL_RTL (function), 0);
29977 if (TARGET_MACHO_BRANCH_ISLANDS)
29978 sym_ref = (gen_rtx_SYMBOL_REF
29980 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
29981 tmp = gen_rtx_MEM (QImode, sym_ref);
29983 output_asm_insn ("jmp\t%0", xops);
29986 #endif /* TARGET_MACHO */
29988 tmp = gen_rtx_REG (SImode, CX_REG);
29989 output_set_got (tmp, NULL_RTX);
29992 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
29993 output_asm_insn ("jmp\t{*}%1", xops);
29996 final_end_function ();
30000 x86_file_start (void)
30002 default_file_start ();
30004 darwin_file_start ();
30006 if (X86_FILE_START_VERSION_DIRECTIVE)
30007 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
30008 if (X86_FILE_START_FLTUSED)
30009 fputs ("\t.global\t__fltused\n", asm_out_file);
30010 if (ix86_asm_dialect == ASM_INTEL)
30011 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
30015 x86_field_alignment (tree field, int computed)
30017 enum machine_mode mode;
30018 tree type = TREE_TYPE (field);
30020 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
30022 mode = TYPE_MODE (strip_array_types (type));
30023 if (mode == DFmode || mode == DCmode
30024 || GET_MODE_CLASS (mode) == MODE_INT
30025 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
30026 return MIN (32, computed);
30030 /* Output assembler code to FILE to increment profiler label # LABELNO
30031 for profiling a function entry. */
30033 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
30035 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
30040 #ifndef NO_PROFILE_COUNTERS
30041 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
30044 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
30045 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
30047 fprintf (file, "\tcall\t%s\n", mcount_name);
30051 #ifndef NO_PROFILE_COUNTERS
30052 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
30055 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
30059 #ifndef NO_PROFILE_COUNTERS
30060 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
30063 fprintf (file, "\tcall\t%s\n", mcount_name);
30067 /* We don't have exact information about the insn sizes, but we may assume
30068 quite safely that we are informed about all 1 byte insns and memory
30069 address sizes. This is enough to eliminate unnecessary padding in
30073 min_insn_size (rtx insn)
30077 if (!INSN_P (insn) || !active_insn_p (insn))
30080 /* Discard alignments we've emit and jump instructions. */
30081 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
30082 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
30084 if (JUMP_TABLE_DATA_P (insn))
30087 /* Important case - calls are always 5 bytes.
30088 It is common to have many calls in the row. */
30090 && symbolic_reference_mentioned_p (PATTERN (insn))
30091 && !SIBLING_CALL_P (insn))
30093 len = get_attr_length (insn);
30097 /* For normal instructions we rely on get_attr_length being exact,
30098 with a few exceptions. */
30099 if (!JUMP_P (insn))
30101 enum attr_type type = get_attr_type (insn);
30106 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
30107 || asm_noperands (PATTERN (insn)) >= 0)
30114 /* Otherwise trust get_attr_length. */
30118 l = get_attr_length_address (insn);
30119 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
30128 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30130 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
30134 ix86_avoid_jump_mispredicts (void)
30136 rtx insn, start = get_insns ();
30137 int nbytes = 0, njumps = 0;
30140 /* Look for all minimal intervals of instructions containing 4 jumps.
30141 The intervals are bounded by START and INSN. NBYTES is the total
30142 size of instructions in the interval including INSN and not including
30143 START. When the NBYTES is smaller than 16 bytes, it is possible
30144 that the end of START and INSN ends up in the same 16byte page.
30146 The smallest offset in the page INSN can start is the case where START
30147 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
30148 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
30150 for (insn = start; insn; insn = NEXT_INSN (insn))
30154 if (LABEL_P (insn))
30156 int align = label_to_alignment (insn);
30157 int max_skip = label_to_max_skip (insn);
30161 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
30162 already in the current 16 byte page, because otherwise
30163 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
30164 bytes to reach 16 byte boundary. */
30166 || (align <= 3 && max_skip != (1 << align) - 1))
30169 fprintf (dump_file, "Label %i with max_skip %i\n",
30170 INSN_UID (insn), max_skip);
30173 while (nbytes + max_skip >= 16)
30175 start = NEXT_INSN (start);
30176 if ((JUMP_P (start)
30177 && GET_CODE (PATTERN (start)) != ADDR_VEC
30178 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
30180 njumps--, isjump = 1;
30183 nbytes -= min_insn_size (start);
30189 min_size = min_insn_size (insn);
30190 nbytes += min_size;
30192 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
30193 INSN_UID (insn), min_size);
30195 && GET_CODE (PATTERN (insn)) != ADDR_VEC
30196 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
30204 start = NEXT_INSN (start);
30205 if ((JUMP_P (start)
30206 && GET_CODE (PATTERN (start)) != ADDR_VEC
30207 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
30209 njumps--, isjump = 1;
30212 nbytes -= min_insn_size (start);
30214 gcc_assert (njumps >= 0);
30216 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
30217 INSN_UID (start), INSN_UID (insn), nbytes);
30219 if (njumps == 3 && isjump && nbytes < 16)
30221 int padsize = 15 - nbytes + min_insn_size (insn);
30224 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
30225 INSN_UID (insn), padsize);
30226 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
30232 /* AMD Athlon works faster
30233 when RET is not destination of conditional jump or directly preceded
30234 by other jump instruction. We avoid the penalty by inserting NOP just
30235 before the RET instructions in such cases. */
30237 ix86_pad_returns (void)
30242 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30244 basic_block bb = e->src;
30245 rtx ret = BB_END (bb);
30247 bool replace = false;
30249 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
30250 || optimize_bb_for_size_p (bb))
30252 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
30253 if (active_insn_p (prev) || LABEL_P (prev))
30255 if (prev && LABEL_P (prev))
30260 FOR_EACH_EDGE (e, ei, bb->preds)
30261 if (EDGE_FREQUENCY (e) && e->src->index >= 0
30262 && !(e->flags & EDGE_FALLTHRU))
30267 prev = prev_active_insn (ret);
30269 && ((JUMP_P (prev) && any_condjump_p (prev))
30272 /* Empty functions get branch mispredict even when
30273 the jump destination is not visible to us. */
30274 if (!prev && !optimize_function_for_size_p (cfun))
30279 emit_jump_insn_before (gen_return_internal_long (), ret);
30285 /* Count the minimum number of instructions in BB. Return 4 if the
30286 number of instructions >= 4. */
30289 ix86_count_insn_bb (basic_block bb)
30292 int insn_count = 0;
30294 /* Count number of instructions in this block. Return 4 if the number
30295 of instructions >= 4. */
30296 FOR_BB_INSNS (bb, insn)
30298 /* Only happen in exit blocks. */
30300 && GET_CODE (PATTERN (insn)) == RETURN)
30303 if (NONDEBUG_INSN_P (insn)
30304 && GET_CODE (PATTERN (insn)) != USE
30305 && GET_CODE (PATTERN (insn)) != CLOBBER)
30308 if (insn_count >= 4)
30317 /* Count the minimum number of instructions in code path in BB.
30318 Return 4 if the number of instructions >= 4. */
30321 ix86_count_insn (basic_block bb)
30325 int min_prev_count;
30327 /* Only bother counting instructions along paths with no
30328 more than 2 basic blocks between entry and exit. Given
30329 that BB has an edge to exit, determine if a predecessor
30330 of BB has an edge from entry. If so, compute the number
30331 of instructions in the predecessor block. If there
30332 happen to be multiple such blocks, compute the minimum. */
30333 min_prev_count = 4;
30334 FOR_EACH_EDGE (e, ei, bb->preds)
30337 edge_iterator prev_ei;
30339 if (e->src == ENTRY_BLOCK_PTR)
30341 min_prev_count = 0;
30344 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
30346 if (prev_e->src == ENTRY_BLOCK_PTR)
30348 int count = ix86_count_insn_bb (e->src);
30349 if (count < min_prev_count)
30350 min_prev_count = count;
30356 if (min_prev_count < 4)
30357 min_prev_count += ix86_count_insn_bb (bb);
30359 return min_prev_count;
30362 /* Pad short funtion to 4 instructions. */
30365 ix86_pad_short_function (void)
30370 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30372 rtx ret = BB_END (e->src);
30373 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
30375 int insn_count = ix86_count_insn (e->src);
30377 /* Pad short function. */
30378 if (insn_count < 4)
30382 /* Find epilogue. */
30385 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
30386 insn = PREV_INSN (insn);
30391 /* Two NOPs count as one instruction. */
30392 insn_count = 2 * (4 - insn_count);
30393 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
30399 /* Implement machine specific optimizations. We implement padding of returns
30400 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
30404 /* We are freeing block_for_insn in the toplev to keep compatibility
30405 with old MDEP_REORGS that are not CFG based. Recompute it now. */
30406 compute_bb_for_insn ();
30408 /* Run the vzeroupper optimization if needed. */
30409 if (TARGET_VZEROUPPER)
30410 move_or_delete_vzeroupper ();
30412 if (optimize && optimize_function_for_speed_p (cfun))
30414 if (TARGET_PAD_SHORT_FUNCTION)
30415 ix86_pad_short_function ();
30416 else if (TARGET_PAD_RETURNS)
30417 ix86_pad_returns ();
30418 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30419 if (TARGET_FOUR_JUMP_LIMIT)
30420 ix86_avoid_jump_mispredicts ();
30425 /* Return nonzero when QImode register that must be represented via REX prefix
30428 x86_extended_QIreg_mentioned_p (rtx insn)
30431 extract_insn_cached (insn);
30432 for (i = 0; i < recog_data.n_operands; i++)
30433 if (REG_P (recog_data.operand[i])
30434 && REGNO (recog_data.operand[i]) > BX_REG)
30439 /* Return nonzero when P points to register encoded via REX prefix.
30440 Called via for_each_rtx. */
30442 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
30444 unsigned int regno;
30447 regno = REGNO (*p);
30448 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
30451 /* Return true when INSN mentions register that must be encoded using REX
30454 x86_extended_reg_mentioned_p (rtx insn)
30456 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
30457 extended_reg_mentioned_1, NULL);
30460 /* If profitable, negate (without causing overflow) integer constant
30461 of mode MODE at location LOC. Return true in this case. */
30463 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
30467 if (!CONST_INT_P (*loc))
30473 /* DImode x86_64 constants must fit in 32 bits. */
30474 gcc_assert (x86_64_immediate_operand (*loc, mode));
30485 gcc_unreachable ();
30488 /* Avoid overflows. */
30489 if (mode_signbit_p (mode, *loc))
30492 val = INTVAL (*loc);
30494 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
30495 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
30496 if ((val < 0 && val != -128)
30499 *loc = GEN_INT (-val);
30506 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
30507 optabs would emit if we didn't have TFmode patterns. */
30510 x86_emit_floatuns (rtx operands[2])
30512 rtx neglab, donelab, i0, i1, f0, in, out;
30513 enum machine_mode mode, inmode;
30515 inmode = GET_MODE (operands[1]);
30516 gcc_assert (inmode == SImode || inmode == DImode);
30519 in = force_reg (inmode, operands[1]);
30520 mode = GET_MODE (out);
30521 neglab = gen_label_rtx ();
30522 donelab = gen_label_rtx ();
30523 f0 = gen_reg_rtx (mode);
30525 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
30527 expand_float (out, in, 0);
30529 emit_jump_insn (gen_jump (donelab));
30532 emit_label (neglab);
30534 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
30536 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
30538 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
30540 expand_float (f0, i0, 0);
30542 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
30544 emit_label (donelab);
30547 /* AVX does not support 32-byte integer vector operations,
30548 thus the longest vector we are faced with is V16QImode. */
30549 #define MAX_VECT_LEN 16
30551 struct expand_vec_perm_d
30553 rtx target, op0, op1;
30554 unsigned char perm[MAX_VECT_LEN];
30555 enum machine_mode vmode;
30556 unsigned char nelt;
30560 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
30561 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
30563 /* Get a vector mode of the same size as the original but with elements
30564 twice as wide. This is only guaranteed to apply to integral vectors. */
30566 static inline enum machine_mode
30567 get_mode_wider_vector (enum machine_mode o)
30569 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30570 enum machine_mode n = GET_MODE_WIDER_MODE (o);
30571 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
30572 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
30576 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30577 with all elements equal to VAR. Return true if successful. */
30580 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
30581 rtx target, rtx val)
30604 /* First attempt to recognize VAL as-is. */
30605 dup = gen_rtx_VEC_DUPLICATE (mode, val);
30606 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30607 if (recog_memoized (insn) < 0)
30610 /* If that fails, force VAL into a register. */
30613 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30614 seq = get_insns ();
30617 emit_insn_before (seq, insn);
30619 ok = recog_memoized (insn) >= 0;
30628 if (TARGET_SSE || TARGET_3DNOW_A)
30632 val = gen_lowpart (SImode, val);
30633 x = gen_rtx_TRUNCATE (HImode, val);
30634 x = gen_rtx_VEC_DUPLICATE (mode, x);
30635 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30648 struct expand_vec_perm_d dperm;
30652 memset (&dperm, 0, sizeof (dperm));
30653 dperm.target = target;
30654 dperm.vmode = mode;
30655 dperm.nelt = GET_MODE_NUNITS (mode);
30656 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
30658 /* Extend to SImode using a paradoxical SUBREG. */
30659 tmp1 = gen_reg_rtx (SImode);
30660 emit_move_insn (tmp1, gen_lowpart (SImode, val));
30662 /* Insert the SImode value as low element of a V4SImode vector. */
30663 tmp2 = gen_lowpart (V4SImode, dperm.op0);
30664 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
30666 ok = (expand_vec_perm_1 (&dperm)
30667 || expand_vec_perm_broadcast_1 (&dperm));
30679 /* Replicate the value once into the next wider mode and recurse. */
30681 enum machine_mode smode, wsmode, wvmode;
30684 smode = GET_MODE_INNER (mode);
30685 wvmode = get_mode_wider_vector (mode);
30686 wsmode = GET_MODE_INNER (wvmode);
30688 val = convert_modes (wsmode, smode, val, true);
30689 x = expand_simple_binop (wsmode, ASHIFT, val,
30690 GEN_INT (GET_MODE_BITSIZE (smode)),
30691 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30692 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
30694 x = gen_lowpart (wvmode, target);
30695 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
30703 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
30704 rtx x = gen_reg_rtx (hvmode);
30706 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
30709 x = gen_rtx_VEC_CONCAT (mode, x, x);
30710 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30719 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30720 whose ONE_VAR element is VAR, and other elements are zero. Return true
30724 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
30725 rtx target, rtx var, int one_var)
30727 enum machine_mode vsimode;
30730 bool use_vector_set = false;
30735 /* For SSE4.1, we normally use vector set. But if the second
30736 element is zero and inter-unit moves are OK, we use movq
30738 use_vector_set = (TARGET_64BIT
30740 && !(TARGET_INTER_UNIT_MOVES
30746 use_vector_set = TARGET_SSE4_1;
30749 use_vector_set = TARGET_SSE2;
30752 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
30759 use_vector_set = TARGET_AVX;
30762 /* Use ix86_expand_vector_set in 64bit mode only. */
30763 use_vector_set = TARGET_AVX && TARGET_64BIT;
30769 if (use_vector_set)
30771 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
30772 var = force_reg (GET_MODE_INNER (mode), var);
30773 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30789 var = force_reg (GET_MODE_INNER (mode), var);
30790 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
30791 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30796 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
30797 new_target = gen_reg_rtx (mode);
30799 new_target = target;
30800 var = force_reg (GET_MODE_INNER (mode), var);
30801 x = gen_rtx_VEC_DUPLICATE (mode, var);
30802 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
30803 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
30806 /* We need to shuffle the value to the correct position, so
30807 create a new pseudo to store the intermediate result. */
30809 /* With SSE2, we can use the integer shuffle insns. */
30810 if (mode != V4SFmode && TARGET_SSE2)
30812 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
30814 GEN_INT (one_var == 1 ? 0 : 1),
30815 GEN_INT (one_var == 2 ? 0 : 1),
30816 GEN_INT (one_var == 3 ? 0 : 1)));
30817 if (target != new_target)
30818 emit_move_insn (target, new_target);
30822 /* Otherwise convert the intermediate result to V4SFmode and
30823 use the SSE1 shuffle instructions. */
30824 if (mode != V4SFmode)
30826 tmp = gen_reg_rtx (V4SFmode);
30827 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30832 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30834 GEN_INT (one_var == 1 ? 0 : 1),
30835 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30836 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30838 if (mode != V4SFmode)
30839 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30840 else if (tmp != target)
30841 emit_move_insn (target, tmp);
30843 else if (target != new_target)
30844 emit_move_insn (target, new_target);
30849 vsimode = V4SImode;
30855 vsimode = V2SImode;
30861 /* Zero extend the variable element to SImode and recurse. */
30862 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30864 x = gen_reg_rtx (vsimode);
30865 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30867 gcc_unreachable ();
30869 emit_move_insn (target, gen_lowpart (mode, x));
30877 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30878 consisting of the values in VALS. It is known that all elements
30879 except ONE_VAR are constants. Return true if successful. */
30882 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30883 rtx target, rtx vals, int one_var)
30885 rtx var = XVECEXP (vals, 0, one_var);
30886 enum machine_mode wmode;
30889 const_vec = copy_rtx (vals);
30890 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30891 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30899 /* For the two element vectors, it's just as easy to use
30900 the general case. */
30904 /* Use ix86_expand_vector_set in 64bit mode only. */
30927 /* There's no way to set one QImode entry easily. Combine
30928 the variable value with its adjacent constant value, and
30929 promote to an HImode set. */
30930 x = XVECEXP (vals, 0, one_var ^ 1);
30933 var = convert_modes (HImode, QImode, var, true);
30934 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30935 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30936 x = GEN_INT (INTVAL (x) & 0xff);
30940 var = convert_modes (HImode, QImode, var, true);
30941 x = gen_int_mode (INTVAL (x) << 8, HImode);
30943 if (x != const0_rtx)
30944 var = expand_simple_binop (HImode, IOR, var, x, var,
30945 1, OPTAB_LIB_WIDEN);
30947 x = gen_reg_rtx (wmode);
30948 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30949 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30951 emit_move_insn (target, gen_lowpart (mode, x));
30958 emit_move_insn (target, const_vec);
30959 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30963 /* A subroutine of ix86_expand_vector_init_general. Use vector
30964 concatenate to handle the most general case: all values variable,
30965 and none identical. */
30968 ix86_expand_vector_init_concat (enum machine_mode mode,
30969 rtx target, rtx *ops, int n)
30971 enum machine_mode cmode, hmode = VOIDmode;
30972 rtx first[8], second[4];
31012 gcc_unreachable ();
31015 if (!register_operand (ops[1], cmode))
31016 ops[1] = force_reg (cmode, ops[1]);
31017 if (!register_operand (ops[0], cmode))
31018 ops[0] = force_reg (cmode, ops[0]);
31019 emit_insn (gen_rtx_SET (VOIDmode, target,
31020 gen_rtx_VEC_CONCAT (mode, ops[0],
31040 gcc_unreachable ();
31056 gcc_unreachable ();
31061 /* FIXME: We process inputs backward to help RA. PR 36222. */
31064 for (; i > 0; i -= 2, j--)
31066 first[j] = gen_reg_rtx (cmode);
31067 v = gen_rtvec (2, ops[i - 1], ops[i]);
31068 ix86_expand_vector_init (false, first[j],
31069 gen_rtx_PARALLEL (cmode, v));
31075 gcc_assert (hmode != VOIDmode);
31076 for (i = j = 0; i < n; i += 2, j++)
31078 second[j] = gen_reg_rtx (hmode);
31079 ix86_expand_vector_init_concat (hmode, second [j],
31083 ix86_expand_vector_init_concat (mode, target, second, n);
31086 ix86_expand_vector_init_concat (mode, target, first, n);
31090 gcc_unreachable ();
31094 /* A subroutine of ix86_expand_vector_init_general. Use vector
31095 interleave to handle the most general case: all values variable,
31096 and none identical. */
31099 ix86_expand_vector_init_interleave (enum machine_mode mode,
31100 rtx target, rtx *ops, int n)
31102 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
31105 rtx (*gen_load_even) (rtx, rtx, rtx);
31106 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
31107 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
31112 gen_load_even = gen_vec_setv8hi;
31113 gen_interleave_first_low = gen_vec_interleave_lowv4si;
31114 gen_interleave_second_low = gen_vec_interleave_lowv2di;
31115 inner_mode = HImode;
31116 first_imode = V4SImode;
31117 second_imode = V2DImode;
31118 third_imode = VOIDmode;
31121 gen_load_even = gen_vec_setv16qi;
31122 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
31123 gen_interleave_second_low = gen_vec_interleave_lowv4si;
31124 inner_mode = QImode;
31125 first_imode = V8HImode;
31126 second_imode = V4SImode;
31127 third_imode = V2DImode;
31130 gcc_unreachable ();
31133 for (i = 0; i < n; i++)
31135 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
31136 op0 = gen_reg_rtx (SImode);
31137 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
31139 /* Insert the SImode value as low element of V4SImode vector. */
31140 op1 = gen_reg_rtx (V4SImode);
31141 op0 = gen_rtx_VEC_MERGE (V4SImode,
31142 gen_rtx_VEC_DUPLICATE (V4SImode,
31144 CONST0_RTX (V4SImode),
31146 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
31148 /* Cast the V4SImode vector back to a vector in orignal mode. */
31149 op0 = gen_reg_rtx (mode);
31150 emit_move_insn (op0, gen_lowpart (mode, op1));
31152 /* Load even elements into the second positon. */
31153 emit_insn (gen_load_even (op0,
31154 force_reg (inner_mode,
31158 /* Cast vector to FIRST_IMODE vector. */
31159 ops[i] = gen_reg_rtx (first_imode);
31160 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
31163 /* Interleave low FIRST_IMODE vectors. */
31164 for (i = j = 0; i < n; i += 2, j++)
31166 op0 = gen_reg_rtx (first_imode);
31167 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
31169 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
31170 ops[j] = gen_reg_rtx (second_imode);
31171 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
31174 /* Interleave low SECOND_IMODE vectors. */
31175 switch (second_imode)
31178 for (i = j = 0; i < n / 2; i += 2, j++)
31180 op0 = gen_reg_rtx (second_imode);
31181 emit_insn (gen_interleave_second_low (op0, ops[i],
31184 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
31186 ops[j] = gen_reg_rtx (third_imode);
31187 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
31189 second_imode = V2DImode;
31190 gen_interleave_second_low = gen_vec_interleave_lowv2di;
31194 op0 = gen_reg_rtx (second_imode);
31195 emit_insn (gen_interleave_second_low (op0, ops[0],
31198 /* Cast the SECOND_IMODE vector back to a vector on original
31200 emit_insn (gen_rtx_SET (VOIDmode, target,
31201 gen_lowpart (mode, op0)));
31205 gcc_unreachable ();
31209 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
31210 all values variable, and none identical. */
31213 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
31214 rtx target, rtx vals)
31216 rtx ops[32], op0, op1;
31217 enum machine_mode half_mode = VOIDmode;
31224 if (!mmx_ok && !TARGET_SSE)
31236 n = GET_MODE_NUNITS (mode);
31237 for (i = 0; i < n; i++)
31238 ops[i] = XVECEXP (vals, 0, i);
31239 ix86_expand_vector_init_concat (mode, target, ops, n);
31243 half_mode = V16QImode;
31247 half_mode = V8HImode;
31251 n = GET_MODE_NUNITS (mode);
31252 for (i = 0; i < n; i++)
31253 ops[i] = XVECEXP (vals, 0, i);
31254 op0 = gen_reg_rtx (half_mode);
31255 op1 = gen_reg_rtx (half_mode);
31256 ix86_expand_vector_init_interleave (half_mode, op0, ops,
31258 ix86_expand_vector_init_interleave (half_mode, op1,
31259 &ops [n >> 1], n >> 2);
31260 emit_insn (gen_rtx_SET (VOIDmode, target,
31261 gen_rtx_VEC_CONCAT (mode, op0, op1)));
31265 if (!TARGET_SSE4_1)
31273 /* Don't use ix86_expand_vector_init_interleave if we can't
31274 move from GPR to SSE register directly. */
31275 if (!TARGET_INTER_UNIT_MOVES)
31278 n = GET_MODE_NUNITS (mode);
31279 for (i = 0; i < n; i++)
31280 ops[i] = XVECEXP (vals, 0, i);
31281 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
31289 gcc_unreachable ();
31293 int i, j, n_elts, n_words, n_elt_per_word;
31294 enum machine_mode inner_mode;
31295 rtx words[4], shift;
31297 inner_mode = GET_MODE_INNER (mode);
31298 n_elts = GET_MODE_NUNITS (mode);
31299 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
31300 n_elt_per_word = n_elts / n_words;
31301 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
31303 for (i = 0; i < n_words; ++i)
31305 rtx word = NULL_RTX;
31307 for (j = 0; j < n_elt_per_word; ++j)
31309 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
31310 elt = convert_modes (word_mode, inner_mode, elt, true);
31316 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
31317 word, 1, OPTAB_LIB_WIDEN);
31318 word = expand_simple_binop (word_mode, IOR, word, elt,
31319 word, 1, OPTAB_LIB_WIDEN);
31327 emit_move_insn (target, gen_lowpart (mode, words[0]));
31328 else if (n_words == 2)
31330 rtx tmp = gen_reg_rtx (mode);
31331 emit_clobber (tmp);
31332 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
31333 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
31334 emit_move_insn (target, tmp);
31336 else if (n_words == 4)
31338 rtx tmp = gen_reg_rtx (V4SImode);
31339 gcc_assert (word_mode == SImode);
31340 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
31341 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
31342 emit_move_insn (target, gen_lowpart (mode, tmp));
31345 gcc_unreachable ();
31349 /* Initialize vector TARGET via VALS. Suppress the use of MMX
31350 instructions unless MMX_OK is true. */
31353 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
31355 enum machine_mode mode = GET_MODE (target);
31356 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31357 int n_elts = GET_MODE_NUNITS (mode);
31358 int n_var = 0, one_var = -1;
31359 bool all_same = true, all_const_zero = true;
31363 for (i = 0; i < n_elts; ++i)
31365 x = XVECEXP (vals, 0, i);
31366 if (!(CONST_INT_P (x)
31367 || GET_CODE (x) == CONST_DOUBLE
31368 || GET_CODE (x) == CONST_FIXED))
31369 n_var++, one_var = i;
31370 else if (x != CONST0_RTX (inner_mode))
31371 all_const_zero = false;
31372 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
31376 /* Constants are best loaded from the constant pool. */
31379 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
31383 /* If all values are identical, broadcast the value. */
31385 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
31386 XVECEXP (vals, 0, 0)))
31389 /* Values where only one field is non-constant are best loaded from
31390 the pool and overwritten via move later. */
31394 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
31395 XVECEXP (vals, 0, one_var),
31399 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
31403 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
31407 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
31409 enum machine_mode mode = GET_MODE (target);
31410 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31411 enum machine_mode half_mode;
31412 bool use_vec_merge = false;
31414 static rtx (*gen_extract[6][2]) (rtx, rtx)
31416 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
31417 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
31418 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
31419 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
31420 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
31421 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
31423 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
31425 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
31426 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
31427 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
31428 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
31429 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
31430 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
31440 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31441 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
31443 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31445 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31446 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31452 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
31456 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31457 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
31459 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31461 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31462 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31469 /* For the two element vectors, we implement a VEC_CONCAT with
31470 the extraction of the other element. */
31472 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
31473 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
31476 op0 = val, op1 = tmp;
31478 op0 = tmp, op1 = val;
31480 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
31481 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31486 use_vec_merge = TARGET_SSE4_1;
31493 use_vec_merge = true;
31497 /* tmp = target = A B C D */
31498 tmp = copy_to_reg (target);
31499 /* target = A A B B */
31500 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
31501 /* target = X A B B */
31502 ix86_expand_vector_set (false, target, val, 0);
31503 /* target = A X C D */
31504 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31505 const1_rtx, const0_rtx,
31506 GEN_INT (2+4), GEN_INT (3+4)));
31510 /* tmp = target = A B C D */
31511 tmp = copy_to_reg (target);
31512 /* tmp = X B C D */
31513 ix86_expand_vector_set (false, tmp, val, 0);
31514 /* target = A B X D */
31515 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31516 const0_rtx, const1_rtx,
31517 GEN_INT (0+4), GEN_INT (3+4)));
31521 /* tmp = target = A B C D */
31522 tmp = copy_to_reg (target);
31523 /* tmp = X B C D */
31524 ix86_expand_vector_set (false, tmp, val, 0);
31525 /* target = A B X D */
31526 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31527 const0_rtx, const1_rtx,
31528 GEN_INT (2+4), GEN_INT (0+4)));
31532 gcc_unreachable ();
31537 use_vec_merge = TARGET_SSE4_1;
31541 /* Element 0 handled by vec_merge below. */
31544 use_vec_merge = true;
31550 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31551 store into element 0, then shuffle them back. */
31555 order[0] = GEN_INT (elt);
31556 order[1] = const1_rtx;
31557 order[2] = const2_rtx;
31558 order[3] = GEN_INT (3);
31559 order[elt] = const0_rtx;
31561 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31562 order[1], order[2], order[3]));
31564 ix86_expand_vector_set (false, target, val, 0);
31566 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31567 order[1], order[2], order[3]));
31571 /* For SSE1, we have to reuse the V4SF code. */
31572 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
31573 gen_lowpart (SFmode, val), elt);
31578 use_vec_merge = TARGET_SSE2;
31581 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31585 use_vec_merge = TARGET_SSE4_1;
31592 half_mode = V16QImode;
31598 half_mode = V8HImode;
31604 half_mode = V4SImode;
31610 half_mode = V2DImode;
31616 half_mode = V4SFmode;
31622 half_mode = V2DFmode;
31628 /* Compute offset. */
31632 gcc_assert (i <= 1);
31634 /* Extract the half. */
31635 tmp = gen_reg_rtx (half_mode);
31636 emit_insn (gen_extract[j][i] (tmp, target));
31638 /* Put val in tmp at elt. */
31639 ix86_expand_vector_set (false, tmp, val, elt);
31642 emit_insn (gen_insert[j][i] (target, target, tmp));
31651 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
31652 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
31653 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31657 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31659 emit_move_insn (mem, target);
31661 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31662 emit_move_insn (tmp, val);
31664 emit_move_insn (target, mem);
31669 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
31671 enum machine_mode mode = GET_MODE (vec);
31672 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31673 bool use_vec_extr = false;
31686 use_vec_extr = true;
31690 use_vec_extr = TARGET_SSE4_1;
31702 tmp = gen_reg_rtx (mode);
31703 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
31704 GEN_INT (elt), GEN_INT (elt),
31705 GEN_INT (elt+4), GEN_INT (elt+4)));
31709 tmp = gen_reg_rtx (mode);
31710 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
31714 gcc_unreachable ();
31717 use_vec_extr = true;
31722 use_vec_extr = TARGET_SSE4_1;
31736 tmp = gen_reg_rtx (mode);
31737 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
31738 GEN_INT (elt), GEN_INT (elt),
31739 GEN_INT (elt), GEN_INT (elt)));
31743 tmp = gen_reg_rtx (mode);
31744 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
31748 gcc_unreachable ();
31751 use_vec_extr = true;
31756 /* For SSE1, we have to reuse the V4SF code. */
31757 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
31758 gen_lowpart (V4SFmode, vec), elt);
31764 use_vec_extr = TARGET_SSE2;
31767 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31771 use_vec_extr = TARGET_SSE4_1;
31775 /* ??? Could extract the appropriate HImode element and shift. */
31782 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
31783 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
31785 /* Let the rtl optimizers know about the zero extension performed. */
31786 if (inner_mode == QImode || inner_mode == HImode)
31788 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
31789 target = gen_lowpart (SImode, target);
31792 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31796 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31798 emit_move_insn (mem, vec);
31800 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31801 emit_move_insn (target, tmp);
31805 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31806 pattern to reduce; DEST is the destination; IN is the input vector. */
31809 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
31811 rtx tmp1, tmp2, tmp3;
31813 tmp1 = gen_reg_rtx (V4SFmode);
31814 tmp2 = gen_reg_rtx (V4SFmode);
31815 tmp3 = gen_reg_rtx (V4SFmode);
31817 emit_insn (gen_sse_movhlps (tmp1, in, in));
31818 emit_insn (fn (tmp2, tmp1, in));
31820 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
31821 const1_rtx, const1_rtx,
31822 GEN_INT (1+4), GEN_INT (1+4)));
31823 emit_insn (fn (dest, tmp2, tmp3));
31826 /* Target hook for scalar_mode_supported_p. */
31828 ix86_scalar_mode_supported_p (enum machine_mode mode)
31830 if (DECIMAL_FLOAT_MODE_P (mode))
31831 return default_decimal_float_supported_p ();
31832 else if (mode == TFmode)
31835 return default_scalar_mode_supported_p (mode);
31838 /* Implements target hook vector_mode_supported_p. */
31840 ix86_vector_mode_supported_p (enum machine_mode mode)
31842 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31844 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31846 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31848 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31850 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31855 /* Target hook for c_mode_for_suffix. */
31856 static enum machine_mode
31857 ix86_c_mode_for_suffix (char suffix)
31867 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31869 We do this in the new i386 backend to maintain source compatibility
31870 with the old cc0-based compiler. */
31873 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31874 tree inputs ATTRIBUTE_UNUSED,
31877 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31879 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31884 /* Implements target vector targetm.asm.encode_section_info. This
31885 is not used by netware. */
31887 static void ATTRIBUTE_UNUSED
31888 ix86_encode_section_info (tree decl, rtx rtl, int first)
31890 default_encode_section_info (decl, rtl, first);
31892 if (TREE_CODE (decl) == VAR_DECL
31893 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31894 && ix86_in_large_data_p (decl))
31895 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31898 /* Worker function for REVERSE_CONDITION. */
31901 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31903 return (mode != CCFPmode && mode != CCFPUmode
31904 ? reverse_condition (code)
31905 : reverse_condition_maybe_unordered (code));
31908 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31912 output_387_reg_move (rtx insn, rtx *operands)
31914 if (REG_P (operands[0]))
31916 if (REG_P (operands[1])
31917 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31919 if (REGNO (operands[0]) == FIRST_STACK_REG)
31920 return output_387_ffreep (operands, 0);
31921 return "fstp\t%y0";
31923 if (STACK_TOP_P (operands[0]))
31924 return "fld%Z1\t%y1";
31927 else if (MEM_P (operands[0]))
31929 gcc_assert (REG_P (operands[1]));
31930 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31931 return "fstp%Z0\t%y0";
31934 /* There is no non-popping store to memory for XFmode.
31935 So if we need one, follow the store with a load. */
31936 if (GET_MODE (operands[0]) == XFmode)
31937 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31939 return "fst%Z0\t%y0";
31946 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31947 FP status register is set. */
31950 ix86_emit_fp_unordered_jump (rtx label)
31952 rtx reg = gen_reg_rtx (HImode);
31955 emit_insn (gen_x86_fnstsw_1 (reg));
31957 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31959 emit_insn (gen_x86_sahf_1 (reg));
31961 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31962 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31966 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31968 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31969 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
31972 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
31973 gen_rtx_LABEL_REF (VOIDmode, label),
31975 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
31977 emit_jump_insn (temp);
31978 predict_jump (REG_BR_PROB_BASE * 10 / 100);
31981 /* Output code to perform a log1p XFmode calculation. */
31983 void ix86_emit_i387_log1p (rtx op0, rtx op1)
31985 rtx label1 = gen_label_rtx ();
31986 rtx label2 = gen_label_rtx ();
31988 rtx tmp = gen_reg_rtx (XFmode);
31989 rtx tmp2 = gen_reg_rtx (XFmode);
31992 emit_insn (gen_absxf2 (tmp, op1));
31993 test = gen_rtx_GE (VOIDmode, tmp,
31994 CONST_DOUBLE_FROM_REAL_VALUE (
31995 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
31997 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
31999 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
32000 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
32001 emit_jump (label2);
32003 emit_label (label1);
32004 emit_move_insn (tmp, CONST1_RTX (XFmode));
32005 emit_insn (gen_addxf3 (tmp, op1, tmp));
32006 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
32007 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
32009 emit_label (label2);
32012 /* Output code to perform a Newton-Rhapson approximation of a single precision
32013 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
32015 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
32017 rtx x0, x1, e0, e1;
32019 x0 = gen_reg_rtx (mode);
32020 e0 = gen_reg_rtx (mode);
32021 e1 = gen_reg_rtx (mode);
32022 x1 = gen_reg_rtx (mode);
32024 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
32026 /* x0 = rcp(b) estimate */
32027 emit_insn (gen_rtx_SET (VOIDmode, x0,
32028 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
32031 emit_insn (gen_rtx_SET (VOIDmode, e0,
32032 gen_rtx_MULT (mode, x0, b)));
32035 emit_insn (gen_rtx_SET (VOIDmode, e0,
32036 gen_rtx_MULT (mode, x0, e0)));
32039 emit_insn (gen_rtx_SET (VOIDmode, e1,
32040 gen_rtx_PLUS (mode, x0, x0)));
32043 emit_insn (gen_rtx_SET (VOIDmode, x1,
32044 gen_rtx_MINUS (mode, e1, e0)));
32047 emit_insn (gen_rtx_SET (VOIDmode, res,
32048 gen_rtx_MULT (mode, a, x1)));
32051 /* Output code to perform a Newton-Rhapson approximation of a
32052 single precision floating point [reciprocal] square root. */
32054 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
32057 rtx x0, e0, e1, e2, e3, mthree, mhalf;
32060 x0 = gen_reg_rtx (mode);
32061 e0 = gen_reg_rtx (mode);
32062 e1 = gen_reg_rtx (mode);
32063 e2 = gen_reg_rtx (mode);
32064 e3 = gen_reg_rtx (mode);
32066 real_from_integer (&r, VOIDmode, -3, -1, 0);
32067 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
32069 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
32070 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
32072 if (VECTOR_MODE_P (mode))
32074 mthree = ix86_build_const_vector (mode, true, mthree);
32075 mhalf = ix86_build_const_vector (mode, true, mhalf);
32078 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
32079 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
32081 /* x0 = rsqrt(a) estimate */
32082 emit_insn (gen_rtx_SET (VOIDmode, x0,
32083 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
32086 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
32091 zero = gen_reg_rtx (mode);
32092 mask = gen_reg_rtx (mode);
32094 zero = force_reg (mode, CONST0_RTX(mode));
32095 emit_insn (gen_rtx_SET (VOIDmode, mask,
32096 gen_rtx_NE (mode, zero, a)));
32098 emit_insn (gen_rtx_SET (VOIDmode, x0,
32099 gen_rtx_AND (mode, x0, mask)));
32103 emit_insn (gen_rtx_SET (VOIDmode, e0,
32104 gen_rtx_MULT (mode, x0, a)));
32106 emit_insn (gen_rtx_SET (VOIDmode, e1,
32107 gen_rtx_MULT (mode, e0, x0)));
32110 mthree = force_reg (mode, mthree);
32111 emit_insn (gen_rtx_SET (VOIDmode, e2,
32112 gen_rtx_PLUS (mode, e1, mthree)));
32114 mhalf = force_reg (mode, mhalf);
32116 /* e3 = -.5 * x0 */
32117 emit_insn (gen_rtx_SET (VOIDmode, e3,
32118 gen_rtx_MULT (mode, x0, mhalf)));
32120 /* e3 = -.5 * e0 */
32121 emit_insn (gen_rtx_SET (VOIDmode, e3,
32122 gen_rtx_MULT (mode, e0, mhalf)));
32123 /* ret = e2 * e3 */
32124 emit_insn (gen_rtx_SET (VOIDmode, res,
32125 gen_rtx_MULT (mode, e2, e3)));
32128 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
32130 static void ATTRIBUTE_UNUSED
32131 i386_solaris_elf_named_section (const char *name, unsigned int flags,
32134 /* With Binutils 2.15, the "@unwind" marker must be specified on
32135 every occurrence of the ".eh_frame" section, not just the first
32138 && strcmp (name, ".eh_frame") == 0)
32140 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
32141 flags & SECTION_WRITE ? "aw" : "a");
32144 default_elf_asm_named_section (name, flags, decl);
32147 /* Return the mangling of TYPE if it is an extended fundamental type. */
32149 static const char *
32150 ix86_mangle_type (const_tree type)
32152 type = TYPE_MAIN_VARIANT (type);
32154 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32155 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32158 switch (TYPE_MODE (type))
32161 /* __float128 is "g". */
32164 /* "long double" or __float80 is "e". */
32171 /* For 32-bit code we can save PIC register setup by using
32172 __stack_chk_fail_local hidden function instead of calling
32173 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
32174 register, so it is better to call __stack_chk_fail directly. */
32177 ix86_stack_protect_fail (void)
32179 return TARGET_64BIT
32180 ? default_external_stack_protect_fail ()
32181 : default_hidden_stack_protect_fail ();
32184 /* Select a format to encode pointers in exception handling data. CODE
32185 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
32186 true if the symbol may be affected by dynamic relocations.
32188 ??? All x86 object file formats are capable of representing this.
32189 After all, the relocation needed is the same as for the call insn.
32190 Whether or not a particular assembler allows us to enter such, I
32191 guess we'll have to see. */
32193 asm_preferred_eh_data_format (int code, int global)
32197 int type = DW_EH_PE_sdata8;
32199 || ix86_cmodel == CM_SMALL_PIC
32200 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
32201 type = DW_EH_PE_sdata4;
32202 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
32204 if (ix86_cmodel == CM_SMALL
32205 || (ix86_cmodel == CM_MEDIUM && code))
32206 return DW_EH_PE_udata4;
32207 return DW_EH_PE_absptr;
32210 /* Expand copysign from SIGN to the positive value ABS_VALUE
32211 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
32214 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
32216 enum machine_mode mode = GET_MODE (sign);
32217 rtx sgn = gen_reg_rtx (mode);
32218 if (mask == NULL_RTX)
32220 enum machine_mode vmode;
32222 if (mode == SFmode)
32224 else if (mode == DFmode)
32229 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
32230 if (!VECTOR_MODE_P (mode))
32232 /* We need to generate a scalar mode mask in this case. */
32233 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32234 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32235 mask = gen_reg_rtx (mode);
32236 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32240 mask = gen_rtx_NOT (mode, mask);
32241 emit_insn (gen_rtx_SET (VOIDmode, sgn,
32242 gen_rtx_AND (mode, mask, sign)));
32243 emit_insn (gen_rtx_SET (VOIDmode, result,
32244 gen_rtx_IOR (mode, abs_value, sgn)));
32247 /* Expand fabs (OP0) and return a new rtx that holds the result. The
32248 mask for masking out the sign-bit is stored in *SMASK, if that is
32251 ix86_expand_sse_fabs (rtx op0, rtx *smask)
32253 enum machine_mode vmode, mode = GET_MODE (op0);
32256 xa = gen_reg_rtx (mode);
32257 if (mode == SFmode)
32259 else if (mode == DFmode)
32263 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
32264 if (!VECTOR_MODE_P (mode))
32266 /* We need to generate a scalar mode mask in this case. */
32267 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32268 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32269 mask = gen_reg_rtx (mode);
32270 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32272 emit_insn (gen_rtx_SET (VOIDmode, xa,
32273 gen_rtx_AND (mode, op0, mask)));
32281 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
32282 swapping the operands if SWAP_OPERANDS is true. The expanded
32283 code is a forward jump to a newly created label in case the
32284 comparison is true. The generated label rtx is returned. */
32286 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
32287 bool swap_operands)
32298 label = gen_label_rtx ();
32299 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
32300 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32301 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
32302 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
32303 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
32304 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
32305 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32306 JUMP_LABEL (tmp) = label;
32311 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
32312 using comparison code CODE. Operands are swapped for the comparison if
32313 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
32315 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
32316 bool swap_operands)
32318 rtx (*insn)(rtx, rtx, rtx, rtx);
32319 enum machine_mode mode = GET_MODE (op0);
32320 rtx mask = gen_reg_rtx (mode);
32329 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
32331 emit_insn (insn (mask, op0, op1,
32332 gen_rtx_fmt_ee (code, mode, op0, op1)));
32336 /* Generate and return a rtx of mode MODE for 2**n where n is the number
32337 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
32339 ix86_gen_TWO52 (enum machine_mode mode)
32341 REAL_VALUE_TYPE TWO52r;
32344 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
32345 TWO52 = const_double_from_real_value (TWO52r, mode);
32346 TWO52 = force_reg (mode, TWO52);
32351 /* Expand SSE sequence for computing lround from OP1 storing
32354 ix86_expand_lround (rtx op0, rtx op1)
32356 /* C code for the stuff we're doing below:
32357 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
32360 enum machine_mode mode = GET_MODE (op1);
32361 const struct real_format *fmt;
32362 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32365 /* load nextafter (0.5, 0.0) */
32366 fmt = REAL_MODE_FORMAT (mode);
32367 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32368 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32370 /* adj = copysign (0.5, op1) */
32371 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
32372 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
32374 /* adj = op1 + adj */
32375 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
32377 /* op0 = (imode)adj */
32378 expand_fix (op0, adj, 0);
32381 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
32384 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
32386 /* C code for the stuff we're doing below (for do_floor):
32388 xi -= (double)xi > op1 ? 1 : 0;
32391 enum machine_mode fmode = GET_MODE (op1);
32392 enum machine_mode imode = GET_MODE (op0);
32393 rtx ireg, freg, label, tmp;
32395 /* reg = (long)op1 */
32396 ireg = gen_reg_rtx (imode);
32397 expand_fix (ireg, op1, 0);
32399 /* freg = (double)reg */
32400 freg = gen_reg_rtx (fmode);
32401 expand_float (freg, ireg, 0);
32403 /* ireg = (freg > op1) ? ireg - 1 : ireg */
32404 label = ix86_expand_sse_compare_and_jump (UNLE,
32405 freg, op1, !do_floor);
32406 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
32407 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
32408 emit_move_insn (ireg, tmp);
32410 emit_label (label);
32411 LABEL_NUSES (label) = 1;
32413 emit_move_insn (op0, ireg);
32416 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
32417 result in OPERAND0. */
32419 ix86_expand_rint (rtx operand0, rtx operand1)
32421 /* C code for the stuff we're doing below:
32422 xa = fabs (operand1);
32423 if (!isless (xa, 2**52))
32425 xa = xa + 2**52 - 2**52;
32426 return copysign (xa, operand1);
32428 enum machine_mode mode = GET_MODE (operand0);
32429 rtx res, xa, label, TWO52, mask;
32431 res = gen_reg_rtx (mode);
32432 emit_move_insn (res, operand1);
32434 /* xa = abs (operand1) */
32435 xa = ix86_expand_sse_fabs (res, &mask);
32437 /* if (!isless (xa, TWO52)) goto label; */
32438 TWO52 = ix86_gen_TWO52 (mode);
32439 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32441 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32442 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32444 ix86_sse_copysign_to_positive (res, xa, res, mask);
32446 emit_label (label);
32447 LABEL_NUSES (label) = 1;
32449 emit_move_insn (operand0, res);
32452 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32455 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
32457 /* C code for the stuff we expand below.
32458 double xa = fabs (x), x2;
32459 if (!isless (xa, TWO52))
32461 xa = xa + TWO52 - TWO52;
32462 x2 = copysign (xa, x);
32471 enum machine_mode mode = GET_MODE (operand0);
32472 rtx xa, TWO52, tmp, label, one, res, mask;
32474 TWO52 = ix86_gen_TWO52 (mode);
32476 /* Temporary for holding the result, initialized to the input
32477 operand to ease control flow. */
32478 res = gen_reg_rtx (mode);
32479 emit_move_insn (res, operand1);
32481 /* xa = abs (operand1) */
32482 xa = ix86_expand_sse_fabs (res, &mask);
32484 /* if (!isless (xa, TWO52)) goto label; */
32485 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32487 /* xa = xa + TWO52 - TWO52; */
32488 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32489 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32491 /* xa = copysign (xa, operand1) */
32492 ix86_sse_copysign_to_positive (xa, xa, res, mask);
32494 /* generate 1.0 or -1.0 */
32495 one = force_reg (mode,
32496 const_double_from_real_value (do_floor
32497 ? dconst1 : dconstm1, mode));
32499 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32500 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32501 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32502 gen_rtx_AND (mode, one, tmp)));
32503 /* We always need to subtract here to preserve signed zero. */
32504 tmp = expand_simple_binop (mode, MINUS,
32505 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32506 emit_move_insn (res, tmp);
32508 emit_label (label);
32509 LABEL_NUSES (label) = 1;
32511 emit_move_insn (operand0, res);
32514 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32517 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
32519 /* C code for the stuff we expand below.
32520 double xa = fabs (x), x2;
32521 if (!isless (xa, TWO52))
32523 x2 = (double)(long)x;
32530 if (HONOR_SIGNED_ZEROS (mode))
32531 return copysign (x2, x);
32534 enum machine_mode mode = GET_MODE (operand0);
32535 rtx xa, xi, TWO52, tmp, label, one, res, mask;
32537 TWO52 = ix86_gen_TWO52 (mode);
32539 /* Temporary for holding the result, initialized to the input
32540 operand to ease control flow. */
32541 res = gen_reg_rtx (mode);
32542 emit_move_insn (res, operand1);
32544 /* xa = abs (operand1) */
32545 xa = ix86_expand_sse_fabs (res, &mask);
32547 /* if (!isless (xa, TWO52)) goto label; */
32548 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32550 /* xa = (double)(long)x */
32551 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32552 expand_fix (xi, res, 0);
32553 expand_float (xa, xi, 0);
32556 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32558 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32559 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32560 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32561 gen_rtx_AND (mode, one, tmp)));
32562 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
32563 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32564 emit_move_insn (res, tmp);
32566 if (HONOR_SIGNED_ZEROS (mode))
32567 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32569 emit_label (label);
32570 LABEL_NUSES (label) = 1;
32572 emit_move_insn (operand0, res);
32575 /* Expand SSE sequence for computing round from OPERAND1 storing
32576 into OPERAND0. Sequence that works without relying on DImode truncation
32577 via cvttsd2siq that is only available on 64bit targets. */
32579 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
32581 /* C code for the stuff we expand below.
32582 double xa = fabs (x), xa2, x2;
32583 if (!isless (xa, TWO52))
32585 Using the absolute value and copying back sign makes
32586 -0.0 -> -0.0 correct.
32587 xa2 = xa + TWO52 - TWO52;
32592 else if (dxa > 0.5)
32594 x2 = copysign (xa2, x);
32597 enum machine_mode mode = GET_MODE (operand0);
32598 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
32600 TWO52 = ix86_gen_TWO52 (mode);
32602 /* Temporary for holding the result, initialized to the input
32603 operand to ease control flow. */
32604 res = gen_reg_rtx (mode);
32605 emit_move_insn (res, operand1);
32607 /* xa = abs (operand1) */
32608 xa = ix86_expand_sse_fabs (res, &mask);
32610 /* if (!isless (xa, TWO52)) goto label; */
32611 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32613 /* xa2 = xa + TWO52 - TWO52; */
32614 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32615 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
32617 /* dxa = xa2 - xa; */
32618 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
32620 /* generate 0.5, 1.0 and -0.5 */
32621 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
32622 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
32623 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
32627 tmp = gen_reg_rtx (mode);
32628 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32629 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
32630 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32631 gen_rtx_AND (mode, one, tmp)));
32632 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32633 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32634 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
32635 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32636 gen_rtx_AND (mode, one, tmp)));
32637 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32639 /* res = copysign (xa2, operand1) */
32640 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
32642 emit_label (label);
32643 LABEL_NUSES (label) = 1;
32645 emit_move_insn (operand0, res);
32648 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32651 ix86_expand_trunc (rtx operand0, rtx operand1)
32653 /* C code for SSE variant we expand below.
32654 double xa = fabs (x), x2;
32655 if (!isless (xa, TWO52))
32657 x2 = (double)(long)x;
32658 if (HONOR_SIGNED_ZEROS (mode))
32659 return copysign (x2, x);
32662 enum machine_mode mode = GET_MODE (operand0);
32663 rtx xa, xi, TWO52, label, res, mask;
32665 TWO52 = ix86_gen_TWO52 (mode);
32667 /* Temporary for holding the result, initialized to the input
32668 operand to ease control flow. */
32669 res = gen_reg_rtx (mode);
32670 emit_move_insn (res, operand1);
32672 /* xa = abs (operand1) */
32673 xa = ix86_expand_sse_fabs (res, &mask);
32675 /* if (!isless (xa, TWO52)) goto label; */
32676 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32678 /* x = (double)(long)x */
32679 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32680 expand_fix (xi, res, 0);
32681 expand_float (res, xi, 0);
32683 if (HONOR_SIGNED_ZEROS (mode))
32684 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32686 emit_label (label);
32687 LABEL_NUSES (label) = 1;
32689 emit_move_insn (operand0, res);
32692 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32695 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
32697 enum machine_mode mode = GET_MODE (operand0);
32698 rtx xa, mask, TWO52, label, one, res, smask, tmp;
32700 /* C code for SSE variant we expand below.
32701 double xa = fabs (x), x2;
32702 if (!isless (xa, TWO52))
32704 xa2 = xa + TWO52 - TWO52;
32708 x2 = copysign (xa2, x);
32712 TWO52 = ix86_gen_TWO52 (mode);
32714 /* Temporary for holding the result, initialized to the input
32715 operand to ease control flow. */
32716 res = gen_reg_rtx (mode);
32717 emit_move_insn (res, operand1);
32719 /* xa = abs (operand1) */
32720 xa = ix86_expand_sse_fabs (res, &smask);
32722 /* if (!isless (xa, TWO52)) goto label; */
32723 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32725 /* res = xa + TWO52 - TWO52; */
32726 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32727 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
32728 emit_move_insn (res, tmp);
32731 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32733 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32734 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
32735 emit_insn (gen_rtx_SET (VOIDmode, mask,
32736 gen_rtx_AND (mode, mask, one)));
32737 tmp = expand_simple_binop (mode, MINUS,
32738 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
32739 emit_move_insn (res, tmp);
32741 /* res = copysign (res, operand1) */
32742 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
32744 emit_label (label);
32745 LABEL_NUSES (label) = 1;
32747 emit_move_insn (operand0, res);
32750 /* Expand SSE sequence for computing round from OPERAND1 storing
32753 ix86_expand_round (rtx operand0, rtx operand1)
32755 /* C code for the stuff we're doing below:
32756 double xa = fabs (x);
32757 if (!isless (xa, TWO52))
32759 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32760 return copysign (xa, x);
32762 enum machine_mode mode = GET_MODE (operand0);
32763 rtx res, TWO52, xa, label, xi, half, mask;
32764 const struct real_format *fmt;
32765 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32767 /* Temporary for holding the result, initialized to the input
32768 operand to ease control flow. */
32769 res = gen_reg_rtx (mode);
32770 emit_move_insn (res, operand1);
32772 TWO52 = ix86_gen_TWO52 (mode);
32773 xa = ix86_expand_sse_fabs (res, &mask);
32774 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32776 /* load nextafter (0.5, 0.0) */
32777 fmt = REAL_MODE_FORMAT (mode);
32778 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32779 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32781 /* xa = xa + 0.5 */
32782 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32783 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
32785 /* xa = (double)(int64_t)xa */
32786 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32787 expand_fix (xi, xa, 0);
32788 expand_float (xa, xi, 0);
32790 /* res = copysign (xa, operand1) */
32791 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
32793 emit_label (label);
32794 LABEL_NUSES (label) = 1;
32796 emit_move_insn (operand0, res);
32800 /* Table of valid machine attributes. */
32801 static const struct attribute_spec ix86_attribute_table[] =
32803 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
32804 affects_type_identity } */
32805 /* Stdcall attribute says callee is responsible for popping arguments
32806 if they are not variable. */
32807 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32809 /* Fastcall attribute says callee is responsible for popping arguments
32810 if they are not variable. */
32811 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32813 /* Thiscall attribute says callee is responsible for popping arguments
32814 if they are not variable. */
32815 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32817 /* Cdecl attribute says the callee is a normal C declaration */
32818 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32820 /* Regparm attribute specifies how many integer arguments are to be
32821 passed in registers. */
32822 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
32824 /* Sseregparm attribute says we are using x86_64 calling conventions
32825 for FP arguments. */
32826 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32828 /* force_align_arg_pointer says this function realigns the stack at entry. */
32829 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
32830 false, true, true, ix86_handle_cconv_attribute, false },
32831 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32832 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
32833 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
32834 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
32837 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32839 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32841 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32842 SUBTARGET_ATTRIBUTE_TABLE,
32844 /* ms_abi and sysv_abi calling convention function attributes. */
32845 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32846 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32847 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
32849 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32850 ix86_handle_callee_pop_aggregate_return, true },
32852 { NULL, 0, 0, false, false, false, NULL, false }
32855 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32857 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32858 tree vectype ATTRIBUTE_UNUSED,
32859 int misalign ATTRIBUTE_UNUSED)
32861 switch (type_of_cost)
32864 return ix86_cost->scalar_stmt_cost;
32867 return ix86_cost->scalar_load_cost;
32870 return ix86_cost->scalar_store_cost;
32873 return ix86_cost->vec_stmt_cost;
32876 return ix86_cost->vec_align_load_cost;
32879 return ix86_cost->vec_store_cost;
32881 case vec_to_scalar:
32882 return ix86_cost->vec_to_scalar_cost;
32884 case scalar_to_vec:
32885 return ix86_cost->scalar_to_vec_cost;
32887 case unaligned_load:
32888 case unaligned_store:
32889 return ix86_cost->vec_unalign_load_cost;
32891 case cond_branch_taken:
32892 return ix86_cost->cond_taken_branch_cost;
32894 case cond_branch_not_taken:
32895 return ix86_cost->cond_not_taken_branch_cost;
32901 gcc_unreachable ();
32906 /* Implement targetm.vectorize.builtin_vec_perm. */
32909 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32911 tree itype = TREE_TYPE (vec_type);
32912 bool u = TYPE_UNSIGNED (itype);
32913 enum machine_mode vmode = TYPE_MODE (vec_type);
32914 enum ix86_builtins fcode;
32915 bool ok = TARGET_SSE2;
32921 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32924 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32926 itype = ix86_get_builtin_type (IX86_BT_DI);
32931 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32935 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32937 itype = ix86_get_builtin_type (IX86_BT_SI);
32941 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32944 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32947 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32950 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32960 *mask_type = itype;
32961 return ix86_builtins[(int) fcode];
32964 /* Return a vector mode with twice as many elements as VMODE. */
32965 /* ??? Consider moving this to a table generated by genmodes.c. */
32967 static enum machine_mode
32968 doublesize_vector_mode (enum machine_mode vmode)
32972 case V2SFmode: return V4SFmode;
32973 case V1DImode: return V2DImode;
32974 case V2SImode: return V4SImode;
32975 case V4HImode: return V8HImode;
32976 case V8QImode: return V16QImode;
32978 case V2DFmode: return V4DFmode;
32979 case V4SFmode: return V8SFmode;
32980 case V2DImode: return V4DImode;
32981 case V4SImode: return V8SImode;
32982 case V8HImode: return V16HImode;
32983 case V16QImode: return V32QImode;
32985 case V4DFmode: return V8DFmode;
32986 case V8SFmode: return V16SFmode;
32987 case V4DImode: return V8DImode;
32988 case V8SImode: return V16SImode;
32989 case V16HImode: return V32HImode;
32990 case V32QImode: return V64QImode;
32993 gcc_unreachable ();
32997 /* Construct (set target (vec_select op0 (parallel perm))) and
32998 return true if that's a valid instruction in the active ISA. */
33001 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
33003 rtx rperm[MAX_VECT_LEN], x;
33006 for (i = 0; i < nelt; ++i)
33007 rperm[i] = GEN_INT (perm[i]);
33009 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
33010 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
33011 x = gen_rtx_SET (VOIDmode, target, x);
33014 if (recog_memoized (x) < 0)
33022 /* Similar, but generate a vec_concat from op0 and op1 as well. */
33025 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
33026 const unsigned char *perm, unsigned nelt)
33028 enum machine_mode v2mode;
33031 v2mode = doublesize_vector_mode (GET_MODE (op0));
33032 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
33033 return expand_vselect (target, x, perm, nelt);
33036 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33037 in terms of blendp[sd] / pblendw / pblendvb. */
33040 expand_vec_perm_blend (struct expand_vec_perm_d *d)
33042 enum machine_mode vmode = d->vmode;
33043 unsigned i, mask, nelt = d->nelt;
33044 rtx target, op0, op1, x;
33046 if (!TARGET_SSE4_1 || d->op0 == d->op1)
33048 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
33051 /* This is a blend, not a permute. Elements must stay in their
33052 respective lanes. */
33053 for (i = 0; i < nelt; ++i)
33055 unsigned e = d->perm[i];
33056 if (!(e == i || e == i + nelt))
33063 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
33064 decision should be extracted elsewhere, so that we only try that
33065 sequence once all budget==3 options have been tried. */
33067 /* For bytes, see if bytes move in pairs so we can use pblendw with
33068 an immediate argument, rather than pblendvb with a vector argument. */
33069 if (vmode == V16QImode)
33071 bool pblendw_ok = true;
33072 for (i = 0; i < 16 && pblendw_ok; i += 2)
33073 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
33077 rtx rperm[16], vperm;
33079 for (i = 0; i < nelt; ++i)
33080 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
33082 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
33083 vperm = force_reg (V16QImode, vperm);
33085 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
33090 target = d->target;
33102 for (i = 0; i < nelt; ++i)
33103 mask |= (d->perm[i] >= nelt) << i;
33107 for (i = 0; i < 2; ++i)
33108 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
33112 for (i = 0; i < 4; ++i)
33113 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
33117 for (i = 0; i < 8; ++i)
33118 mask |= (d->perm[i * 2] >= 16) << i;
33122 target = gen_lowpart (vmode, target);
33123 op0 = gen_lowpart (vmode, op0);
33124 op1 = gen_lowpart (vmode, op1);
33128 gcc_unreachable ();
33131 /* This matches five different patterns with the different modes. */
33132 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
33133 x = gen_rtx_SET (VOIDmode, target, x);
33139 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33140 in terms of the variable form of vpermilps.
33142 Note that we will have already failed the immediate input vpermilps,
33143 which requires that the high and low part shuffle be identical; the
33144 variable form doesn't require that. */
33147 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
33149 rtx rperm[8], vperm;
33152 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
33155 /* We can only permute within the 128-bit lane. */
33156 for (i = 0; i < 8; ++i)
33158 unsigned e = d->perm[i];
33159 if (i < 4 ? e >= 4 : e < 4)
33166 for (i = 0; i < 8; ++i)
33168 unsigned e = d->perm[i];
33170 /* Within each 128-bit lane, the elements of op0 are numbered
33171 from 0 and the elements of op1 are numbered from 4. */
33177 rperm[i] = GEN_INT (e);
33180 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
33181 vperm = force_reg (V8SImode, vperm);
33182 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
33187 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33188 in terms of pshufb or vpperm. */
33191 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
33193 unsigned i, nelt, eltsz;
33194 rtx rperm[16], vperm, target, op0, op1;
33196 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
33198 if (GET_MODE_SIZE (d->vmode) != 16)
33205 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33207 for (i = 0; i < nelt; ++i)
33209 unsigned j, e = d->perm[i];
33210 for (j = 0; j < eltsz; ++j)
33211 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
33214 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
33215 vperm = force_reg (V16QImode, vperm);
33217 target = gen_lowpart (V16QImode, d->target);
33218 op0 = gen_lowpart (V16QImode, d->op0);
33219 if (d->op0 == d->op1)
33220 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
33223 op1 = gen_lowpart (V16QImode, d->op1);
33224 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
33230 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
33231 in a single instruction. */
33234 expand_vec_perm_1 (struct expand_vec_perm_d *d)
33236 unsigned i, nelt = d->nelt;
33237 unsigned char perm2[MAX_VECT_LEN];
33239 /* Check plain VEC_SELECT first, because AVX has instructions that could
33240 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
33241 input where SEL+CONCAT may not. */
33242 if (d->op0 == d->op1)
33244 int mask = nelt - 1;
33246 for (i = 0; i < nelt; i++)
33247 perm2[i] = d->perm[i] & mask;
33249 if (expand_vselect (d->target, d->op0, perm2, nelt))
33252 /* There are plenty of patterns in sse.md that are written for
33253 SEL+CONCAT and are not replicated for a single op. Perhaps
33254 that should be changed, to avoid the nastiness here. */
33256 /* Recognize interleave style patterns, which means incrementing
33257 every other permutation operand. */
33258 for (i = 0; i < nelt; i += 2)
33260 perm2[i] = d->perm[i] & mask;
33261 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
33263 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33266 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
33269 for (i = 0; i < nelt; i += 4)
33271 perm2[i + 0] = d->perm[i + 0] & mask;
33272 perm2[i + 1] = d->perm[i + 1] & mask;
33273 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
33274 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
33277 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33282 /* Finally, try the fully general two operand permute. */
33283 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
33286 /* Recognize interleave style patterns with reversed operands. */
33287 if (d->op0 != d->op1)
33289 for (i = 0; i < nelt; ++i)
33291 unsigned e = d->perm[i];
33299 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
33303 /* Try the SSE4.1 blend variable merge instructions. */
33304 if (expand_vec_perm_blend (d))
33307 /* Try one of the AVX vpermil variable permutations. */
33308 if (expand_vec_perm_vpermil (d))
33311 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
33312 if (expand_vec_perm_pshufb (d))
33318 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33319 in terms of a pair of pshuflw + pshufhw instructions. */
33322 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
33324 unsigned char perm2[MAX_VECT_LEN];
33328 if (d->vmode != V8HImode || d->op0 != d->op1)
33331 /* The two permutations only operate in 64-bit lanes. */
33332 for (i = 0; i < 4; ++i)
33333 if (d->perm[i] >= 4)
33335 for (i = 4; i < 8; ++i)
33336 if (d->perm[i] < 4)
33342 /* Emit the pshuflw. */
33343 memcpy (perm2, d->perm, 4);
33344 for (i = 4; i < 8; ++i)
33346 ok = expand_vselect (d->target, d->op0, perm2, 8);
33349 /* Emit the pshufhw. */
33350 memcpy (perm2 + 4, d->perm + 4, 4);
33351 for (i = 0; i < 4; ++i)
33353 ok = expand_vselect (d->target, d->target, perm2, 8);
33359 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33360 the permutation using the SSSE3 palignr instruction. This succeeds
33361 when all of the elements in PERM fit within one vector and we merely
33362 need to shift them down so that a single vector permutation has a
33363 chance to succeed. */
33366 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
33368 unsigned i, nelt = d->nelt;
33373 /* Even with AVX, palignr only operates on 128-bit vectors. */
33374 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33377 min = nelt, max = 0;
33378 for (i = 0; i < nelt; ++i)
33380 unsigned e = d->perm[i];
33386 if (min == 0 || max - min >= nelt)
33389 /* Given that we have SSSE3, we know we'll be able to implement the
33390 single operand permutation after the palignr with pshufb. */
33394 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
33395 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
33396 gen_lowpart (TImode, d->op1),
33397 gen_lowpart (TImode, d->op0), shift));
33399 d->op0 = d->op1 = d->target;
33402 for (i = 0; i < nelt; ++i)
33404 unsigned e = d->perm[i] - min;
33410 /* Test for the degenerate case where the alignment by itself
33411 produces the desired permutation. */
33415 ok = expand_vec_perm_1 (d);
33421 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33422 a two vector permutation into a single vector permutation by using
33423 an interleave operation to merge the vectors. */
33426 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
33428 struct expand_vec_perm_d dremap, dfinal;
33429 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
33430 unsigned contents, h1, h2, h3, h4;
33431 unsigned char remap[2 * MAX_VECT_LEN];
33435 if (d->op0 == d->op1)
33438 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
33439 lanes. We can use similar techniques with the vperm2f128 instruction,
33440 but it requires slightly different logic. */
33441 if (GET_MODE_SIZE (d->vmode) != 16)
33444 /* Examine from whence the elements come. */
33446 for (i = 0; i < nelt; ++i)
33447 contents |= 1u << d->perm[i];
33449 /* Split the two input vectors into 4 halves. */
33450 h1 = (1u << nelt2) - 1;
33455 memset (remap, 0xff, sizeof (remap));
33458 /* If the elements from the low halves use interleave low, and similarly
33459 for interleave high. If the elements are from mis-matched halves, we
33460 can use shufps for V4SF/V4SI or do a DImode shuffle. */
33461 if ((contents & (h1 | h3)) == contents)
33463 for (i = 0; i < nelt2; ++i)
33466 remap[i + nelt] = i * 2 + 1;
33467 dremap.perm[i * 2] = i;
33468 dremap.perm[i * 2 + 1] = i + nelt;
33471 else if ((contents & (h2 | h4)) == contents)
33473 for (i = 0; i < nelt2; ++i)
33475 remap[i + nelt2] = i * 2;
33476 remap[i + nelt + nelt2] = i * 2 + 1;
33477 dremap.perm[i * 2] = i + nelt2;
33478 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
33481 else if ((contents & (h1 | h4)) == contents)
33483 for (i = 0; i < nelt2; ++i)
33486 remap[i + nelt + nelt2] = i + nelt2;
33487 dremap.perm[i] = i;
33488 dremap.perm[i + nelt2] = i + nelt + nelt2;
33492 dremap.vmode = V2DImode;
33494 dremap.perm[0] = 0;
33495 dremap.perm[1] = 3;
33498 else if ((contents & (h2 | h3)) == contents)
33500 for (i = 0; i < nelt2; ++i)
33502 remap[i + nelt2] = i;
33503 remap[i + nelt] = i + nelt2;
33504 dremap.perm[i] = i + nelt2;
33505 dremap.perm[i + nelt2] = i + nelt;
33509 dremap.vmode = V2DImode;
33511 dremap.perm[0] = 1;
33512 dremap.perm[1] = 2;
33518 /* Use the remapping array set up above to move the elements from their
33519 swizzled locations into their final destinations. */
33521 for (i = 0; i < nelt; ++i)
33523 unsigned e = remap[d->perm[i]];
33524 gcc_assert (e < nelt);
33525 dfinal.perm[i] = e;
33527 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
33528 dfinal.op1 = dfinal.op0;
33529 dremap.target = dfinal.op0;
33531 /* Test if the final remap can be done with a single insn. For V4SFmode or
33532 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
33534 ok = expand_vec_perm_1 (&dfinal);
33535 seq = get_insns ();
33541 if (dremap.vmode != dfinal.vmode)
33543 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
33544 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
33545 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
33548 ok = expand_vec_perm_1 (&dremap);
33555 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33556 permutation with two pshufb insns and an ior. We should have already
33557 failed all two instruction sequences. */
33560 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
33562 rtx rperm[2][16], vperm, l, h, op, m128;
33563 unsigned int i, nelt, eltsz;
33565 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33567 gcc_assert (d->op0 != d->op1);
33570 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33572 /* Generate two permutation masks. If the required element is within
33573 the given vector it is shuffled into the proper lane. If the required
33574 element is in the other vector, force a zero into the lane by setting
33575 bit 7 in the permutation mask. */
33576 m128 = GEN_INT (-128);
33577 for (i = 0; i < nelt; ++i)
33579 unsigned j, e = d->perm[i];
33580 unsigned which = (e >= nelt);
33584 for (j = 0; j < eltsz; ++j)
33586 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
33587 rperm[1-which][i*eltsz + j] = m128;
33591 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
33592 vperm = force_reg (V16QImode, vperm);
33594 l = gen_reg_rtx (V16QImode);
33595 op = gen_lowpart (V16QImode, d->op0);
33596 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
33598 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
33599 vperm = force_reg (V16QImode, vperm);
33601 h = gen_reg_rtx (V16QImode);
33602 op = gen_lowpart (V16QImode, d->op1);
33603 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
33605 op = gen_lowpart (V16QImode, d->target);
33606 emit_insn (gen_iorv16qi3 (op, l, h));
33611 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33612 and extract-odd permutations. */
33615 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
33622 t1 = gen_reg_rtx (V4DFmode);
33623 t2 = gen_reg_rtx (V4DFmode);
33625 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33626 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
33627 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
33629 /* Now an unpck[lh]pd will produce the result required. */
33631 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
33633 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
33639 int mask = odd ? 0xdd : 0x88;
33641 t1 = gen_reg_rtx (V8SFmode);
33642 t2 = gen_reg_rtx (V8SFmode);
33643 t3 = gen_reg_rtx (V8SFmode);
33645 /* Shuffle within the 128-bit lanes to produce:
33646 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33647 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
33650 /* Shuffle the lanes around to produce:
33651 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33652 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
33655 /* Shuffle within the 128-bit lanes to produce:
33656 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33657 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
33659 /* Shuffle within the 128-bit lanes to produce:
33660 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33661 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
33663 /* Shuffle the lanes around to produce:
33664 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33665 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
33674 /* These are always directly implementable by expand_vec_perm_1. */
33675 gcc_unreachable ();
33679 return expand_vec_perm_pshufb2 (d);
33682 /* We need 2*log2(N)-1 operations to achieve odd/even
33683 with interleave. */
33684 t1 = gen_reg_rtx (V8HImode);
33685 t2 = gen_reg_rtx (V8HImode);
33686 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
33687 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
33688 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
33689 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
33691 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
33693 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
33700 return expand_vec_perm_pshufb2 (d);
33703 t1 = gen_reg_rtx (V16QImode);
33704 t2 = gen_reg_rtx (V16QImode);
33705 t3 = gen_reg_rtx (V16QImode);
33706 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
33707 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
33708 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
33709 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
33710 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
33711 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
33713 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
33715 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
33721 gcc_unreachable ();
33727 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33728 extract-even and extract-odd permutations. */
33731 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
33733 unsigned i, odd, nelt = d->nelt;
33736 if (odd != 0 && odd != 1)
33739 for (i = 1; i < nelt; ++i)
33740 if (d->perm[i] != 2 * i + odd)
33743 return expand_vec_perm_even_odd_1 (d, odd);
33746 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33747 permutations. We assume that expand_vec_perm_1 has already failed. */
33750 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
33752 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
33753 enum machine_mode vmode = d->vmode;
33754 unsigned char perm2[4];
33762 /* These are special-cased in sse.md so that we can optionally
33763 use the vbroadcast instruction. They expand to two insns
33764 if the input happens to be in a register. */
33765 gcc_unreachable ();
33771 /* These are always implementable using standard shuffle patterns. */
33772 gcc_unreachable ();
33776 /* These can be implemented via interleave. We save one insn by
33777 stopping once we have promoted to V4SImode and then use pshufd. */
33780 optab otab = vec_interleave_low_optab;
33784 otab = vec_interleave_high_optab;
33789 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
33790 vmode = get_mode_wider_vector (vmode);
33791 op0 = gen_lowpart (vmode, op0);
33793 while (vmode != V4SImode);
33795 memset (perm2, elt, 4);
33796 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
33801 gcc_unreachable ();
33805 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33806 broadcast permutations. */
33809 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
33811 unsigned i, elt, nelt = d->nelt;
33813 if (d->op0 != d->op1)
33817 for (i = 1; i < nelt; ++i)
33818 if (d->perm[i] != elt)
33821 return expand_vec_perm_broadcast_1 (d);
33824 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33825 With all of the interface bits taken care of, perform the expansion
33826 in D and return true on success. */
33829 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
33831 /* Try a single instruction expansion. */
33832 if (expand_vec_perm_1 (d))
33835 /* Try sequences of two instructions. */
33837 if (expand_vec_perm_pshuflw_pshufhw (d))
33840 if (expand_vec_perm_palignr (d))
33843 if (expand_vec_perm_interleave2 (d))
33846 if (expand_vec_perm_broadcast (d))
33849 /* Try sequences of three instructions. */
33851 if (expand_vec_perm_pshufb2 (d))
33854 /* ??? Look for narrow permutations whose element orderings would
33855 allow the promotion to a wider mode. */
33857 /* ??? Look for sequences of interleave or a wider permute that place
33858 the data into the correct lanes for a half-vector shuffle like
33859 pshuf[lh]w or vpermilps. */
33861 /* ??? Look for sequences of interleave that produce the desired results.
33862 The combinatorics of punpck[lh] get pretty ugly... */
33864 if (expand_vec_perm_even_odd (d))
33870 /* Extract the values from the vector CST into the permutation array in D.
33871 Return 0 on error, 1 if all values from the permutation come from the
33872 first vector, 2 if all values from the second vector, and 3 otherwise. */
33875 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33877 tree list = TREE_VECTOR_CST_ELTS (cst);
33878 unsigned i, nelt = d->nelt;
33881 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33883 unsigned HOST_WIDE_INT e;
33885 if (!host_integerp (TREE_VALUE (list), 1))
33887 e = tree_low_cst (TREE_VALUE (list), 1);
33891 ret |= (e < nelt ? 1 : 2);
33894 gcc_assert (list == NULL);
33896 /* For all elements from second vector, fold the elements to first. */
33898 for (i = 0; i < nelt; ++i)
33899 d->perm[i] -= nelt;
33905 ix86_expand_vec_perm_builtin (tree exp)
33907 struct expand_vec_perm_d d;
33908 tree arg0, arg1, arg2;
33910 arg0 = CALL_EXPR_ARG (exp, 0);
33911 arg1 = CALL_EXPR_ARG (exp, 1);
33912 arg2 = CALL_EXPR_ARG (exp, 2);
33914 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33915 d.nelt = GET_MODE_NUNITS (d.vmode);
33916 d.testing_p = false;
33917 gcc_assert (VECTOR_MODE_P (d.vmode));
33919 if (TREE_CODE (arg2) != VECTOR_CST)
33921 error_at (EXPR_LOCATION (exp),
33922 "vector permutation requires vector constant");
33926 switch (extract_vec_perm_cst (&d, arg2))
33932 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33936 if (!operand_equal_p (arg0, arg1, 0))
33938 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33939 d.op0 = force_reg (d.vmode, d.op0);
33940 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33941 d.op1 = force_reg (d.vmode, d.op1);
33945 /* The elements of PERM do not suggest that only the first operand
33946 is used, but both operands are identical. Allow easier matching
33947 of the permutation by folding the permutation into the single
33950 unsigned i, nelt = d.nelt;
33951 for (i = 0; i < nelt; ++i)
33952 if (d.perm[i] >= nelt)
33958 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33959 d.op0 = force_reg (d.vmode, d.op0);
33964 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33965 d.op0 = force_reg (d.vmode, d.op0);
33970 d.target = gen_reg_rtx (d.vmode);
33971 if (ix86_expand_vec_perm_builtin_1 (&d))
33974 /* For compiler generated permutations, we should never got here, because
33975 the compiler should also be checking the ok hook. But since this is a
33976 builtin the user has access too, so don't abort. */
33980 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
33983 sorry ("vector permutation (%d %d %d %d)",
33984 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
33987 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
33988 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33989 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
33992 sorry ("vector permutation "
33993 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
33994 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33995 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
33996 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
33997 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
34000 gcc_unreachable ();
34003 return CONST0_RTX (d.vmode);
34006 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
34009 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
34011 struct expand_vec_perm_d d;
34015 d.vmode = TYPE_MODE (vec_type);
34016 d.nelt = GET_MODE_NUNITS (d.vmode);
34017 d.testing_p = true;
34019 /* Given sufficient ISA support we can just return true here
34020 for selected vector modes. */
34021 if (GET_MODE_SIZE (d.vmode) == 16)
34023 /* All implementable with a single vpperm insn. */
34026 /* All implementable with 2 pshufb + 1 ior. */
34029 /* All implementable with shufpd or unpck[lh]pd. */
34034 vec_mask = extract_vec_perm_cst (&d, mask);
34036 /* This hook is cannot be called in response to something that the
34037 user does (unlike the builtin expander) so we shouldn't ever see
34038 an error generated from the extract. */
34039 gcc_assert (vec_mask > 0 && vec_mask <= 3);
34040 one_vec = (vec_mask != 3);
34042 /* Implementable with shufps or pshufd. */
34043 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
34046 /* Otherwise we have to go through the motions and see if we can
34047 figure out how to generate the requested permutation. */
34048 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
34049 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
34051 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
34054 ret = ix86_expand_vec_perm_builtin_1 (&d);
34061 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
34063 struct expand_vec_perm_d d;
34069 d.vmode = GET_MODE (targ);
34070 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
34071 d.testing_p = false;
34073 for (i = 0; i < nelt; ++i)
34074 d.perm[i] = i * 2 + odd;
34076 /* We'll either be able to implement the permutation directly... */
34077 if (expand_vec_perm_1 (&d))
34080 /* ... or we use the special-case patterns. */
34081 expand_vec_perm_even_odd_1 (&d, odd);
34084 /* Expand an insert into a vector register through pinsr insn.
34085 Return true if successful. */
34088 ix86_expand_pinsr (rtx *operands)
34090 rtx dst = operands[0];
34091 rtx src = operands[3];
34093 unsigned int size = INTVAL (operands[1]);
34094 unsigned int pos = INTVAL (operands[2]);
34096 if (GET_CODE (dst) == SUBREG)
34098 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
34099 dst = SUBREG_REG (dst);
34102 if (GET_CODE (src) == SUBREG)
34103 src = SUBREG_REG (src);
34105 switch (GET_MODE (dst))
34112 enum machine_mode srcmode, dstmode;
34113 rtx (*pinsr)(rtx, rtx, rtx, rtx);
34115 srcmode = mode_for_size (size, MODE_INT, 0);
34120 if (!TARGET_SSE4_1)
34122 dstmode = V16QImode;
34123 pinsr = gen_sse4_1_pinsrb;
34129 dstmode = V8HImode;
34130 pinsr = gen_sse2_pinsrw;
34134 if (!TARGET_SSE4_1)
34136 dstmode = V4SImode;
34137 pinsr = gen_sse4_1_pinsrd;
34141 gcc_assert (TARGET_64BIT);
34142 if (!TARGET_SSE4_1)
34144 dstmode = V2DImode;
34145 pinsr = gen_sse4_1_pinsrq;
34152 dst = gen_lowpart (dstmode, dst);
34153 src = gen_lowpart (srcmode, src);
34157 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
34166 /* This function returns the calling abi specific va_list type node.
34167 It returns the FNDECL specific va_list type. */
34170 ix86_fn_abi_va_list (tree fndecl)
34173 return va_list_type_node;
34174 gcc_assert (fndecl != NULL_TREE);
34176 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
34177 return ms_va_list_type_node;
34179 return sysv_va_list_type_node;
34182 /* Returns the canonical va_list type specified by TYPE. If there
34183 is no valid TYPE provided, it return NULL_TREE. */
34186 ix86_canonical_va_list_type (tree type)
34190 /* Resolve references and pointers to va_list type. */
34191 if (TREE_CODE (type) == MEM_REF)
34192 type = TREE_TYPE (type);
34193 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
34194 type = TREE_TYPE (type);
34195 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
34196 type = TREE_TYPE (type);
34198 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
34200 wtype = va_list_type_node;
34201 gcc_assert (wtype != NULL_TREE);
34203 if (TREE_CODE (wtype) == ARRAY_TYPE)
34205 /* If va_list is an array type, the argument may have decayed
34206 to a pointer type, e.g. by being passed to another function.
34207 In that case, unwrap both types so that we can compare the
34208 underlying records. */
34209 if (TREE_CODE (htype) == ARRAY_TYPE
34210 || POINTER_TYPE_P (htype))
34212 wtype = TREE_TYPE (wtype);
34213 htype = TREE_TYPE (htype);
34216 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34217 return va_list_type_node;
34218 wtype = sysv_va_list_type_node;
34219 gcc_assert (wtype != NULL_TREE);
34221 if (TREE_CODE (wtype) == ARRAY_TYPE)
34223 /* If va_list is an array type, the argument may have decayed
34224 to a pointer type, e.g. by being passed to another function.
34225 In that case, unwrap both types so that we can compare the
34226 underlying records. */
34227 if (TREE_CODE (htype) == ARRAY_TYPE
34228 || POINTER_TYPE_P (htype))
34230 wtype = TREE_TYPE (wtype);
34231 htype = TREE_TYPE (htype);
34234 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34235 return sysv_va_list_type_node;
34236 wtype = ms_va_list_type_node;
34237 gcc_assert (wtype != NULL_TREE);
34239 if (TREE_CODE (wtype) == ARRAY_TYPE)
34241 /* If va_list is an array type, the argument may have decayed
34242 to a pointer type, e.g. by being passed to another function.
34243 In that case, unwrap both types so that we can compare the
34244 underlying records. */
34245 if (TREE_CODE (htype) == ARRAY_TYPE
34246 || POINTER_TYPE_P (htype))
34248 wtype = TREE_TYPE (wtype);
34249 htype = TREE_TYPE (htype);
34252 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34253 return ms_va_list_type_node;
34256 return std_canonical_va_list_type (type);
34259 /* Iterate through the target-specific builtin types for va_list.
34260 IDX denotes the iterator, *PTREE is set to the result type of
34261 the va_list builtin, and *PNAME to its internal type.
34262 Returns zero if there is no element for this index, otherwise
34263 IDX should be increased upon the next call.
34264 Note, do not iterate a base builtin's name like __builtin_va_list.
34265 Used from c_common_nodes_and_builtins. */
34268 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
34278 *ptree = ms_va_list_type_node;
34279 *pname = "__builtin_ms_va_list";
34283 *ptree = sysv_va_list_type_node;
34284 *pname = "__builtin_sysv_va_list";
34292 #undef TARGET_SCHED_DISPATCH
34293 #define TARGET_SCHED_DISPATCH has_dispatch
34294 #undef TARGET_SCHED_DISPATCH_DO
34295 #define TARGET_SCHED_DISPATCH_DO do_dispatch
34297 /* The size of the dispatch window is the total number of bytes of
34298 object code allowed in a window. */
34299 #define DISPATCH_WINDOW_SIZE 16
34301 /* Number of dispatch windows considered for scheduling. */
34302 #define MAX_DISPATCH_WINDOWS 3
34304 /* Maximum number of instructions in a window. */
34307 /* Maximum number of immediate operands in a window. */
34310 /* Maximum number of immediate bits allowed in a window. */
34311 #define MAX_IMM_SIZE 128
34313 /* Maximum number of 32 bit immediates allowed in a window. */
34314 #define MAX_IMM_32 4
34316 /* Maximum number of 64 bit immediates allowed in a window. */
34317 #define MAX_IMM_64 2
34319 /* Maximum total of loads or prefetches allowed in a window. */
34322 /* Maximum total of stores allowed in a window. */
34323 #define MAX_STORE 1
34329 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
34330 enum dispatch_group {
34345 /* Number of allowable groups in a dispatch window. It is an array
34346 indexed by dispatch_group enum. 100 is used as a big number,
34347 because the number of these kind of operations does not have any
34348 effect in dispatch window, but we need them for other reasons in
34350 static unsigned int num_allowable_groups[disp_last] = {
34351 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
34354 char group_name[disp_last + 1][16] = {
34355 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
34356 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
34357 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
34360 /* Instruction path. */
34363 path_single, /* Single micro op. */
34364 path_double, /* Double micro op. */
34365 path_multi, /* Instructions with more than 2 micro op.. */
34369 /* sched_insn_info defines a window to the instructions scheduled in
34370 the basic block. It contains a pointer to the insn_info table and
34371 the instruction scheduled.
34373 Windows are allocated for each basic block and are linked
34375 typedef struct sched_insn_info_s {
34377 enum dispatch_group group;
34378 enum insn_path path;
34383 /* Linked list of dispatch windows. This is a two way list of
34384 dispatch windows of a basic block. It contains information about
34385 the number of uops in the window and the total number of
34386 instructions and of bytes in the object code for this dispatch
34388 typedef struct dispatch_windows_s {
34389 int num_insn; /* Number of insn in the window. */
34390 int num_uops; /* Number of uops in the window. */
34391 int window_size; /* Number of bytes in the window. */
34392 int window_num; /* Window number between 0 or 1. */
34393 int num_imm; /* Number of immediates in an insn. */
34394 int num_imm_32; /* Number of 32 bit immediates in an insn. */
34395 int num_imm_64; /* Number of 64 bit immediates in an insn. */
34396 int imm_size; /* Total immediates in the window. */
34397 int num_loads; /* Total memory loads in the window. */
34398 int num_stores; /* Total memory stores in the window. */
34399 int violation; /* Violation exists in window. */
34400 sched_insn_info *window; /* Pointer to the window. */
34401 struct dispatch_windows_s *next;
34402 struct dispatch_windows_s *prev;
34403 } dispatch_windows;
34405 /* Immediate valuse used in an insn. */
34406 typedef struct imm_info_s
34413 static dispatch_windows *dispatch_window_list;
34414 static dispatch_windows *dispatch_window_list1;
34416 /* Get dispatch group of insn. */
34418 static enum dispatch_group
34419 get_mem_group (rtx insn)
34421 enum attr_memory memory;
34423 if (INSN_CODE (insn) < 0)
34424 return disp_no_group;
34425 memory = get_attr_memory (insn);
34426 if (memory == MEMORY_STORE)
34429 if (memory == MEMORY_LOAD)
34432 if (memory == MEMORY_BOTH)
34433 return disp_load_store;
34435 return disp_no_group;
34438 /* Return true if insn is a compare instruction. */
34443 enum attr_type type;
34445 type = get_attr_type (insn);
34446 return (type == TYPE_TEST
34447 || type == TYPE_ICMP
34448 || type == TYPE_FCMP
34449 || GET_CODE (PATTERN (insn)) == COMPARE);
34452 /* Return true if a dispatch violation encountered. */
34455 dispatch_violation (void)
34457 if (dispatch_window_list->next)
34458 return dispatch_window_list->next->violation;
34459 return dispatch_window_list->violation;
34462 /* Return true if insn is a branch instruction. */
34465 is_branch (rtx insn)
34467 return (CALL_P (insn) || JUMP_P (insn));
34470 /* Return true if insn is a prefetch instruction. */
34473 is_prefetch (rtx insn)
34475 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
34478 /* This function initializes a dispatch window and the list container holding a
34479 pointer to the window. */
34482 init_window (int window_num)
34485 dispatch_windows *new_list;
34487 if (window_num == 0)
34488 new_list = dispatch_window_list;
34490 new_list = dispatch_window_list1;
34492 new_list->num_insn = 0;
34493 new_list->num_uops = 0;
34494 new_list->window_size = 0;
34495 new_list->next = NULL;
34496 new_list->prev = NULL;
34497 new_list->window_num = window_num;
34498 new_list->num_imm = 0;
34499 new_list->num_imm_32 = 0;
34500 new_list->num_imm_64 = 0;
34501 new_list->imm_size = 0;
34502 new_list->num_loads = 0;
34503 new_list->num_stores = 0;
34504 new_list->violation = false;
34506 for (i = 0; i < MAX_INSN; i++)
34508 new_list->window[i].insn = NULL;
34509 new_list->window[i].group = disp_no_group;
34510 new_list->window[i].path = no_path;
34511 new_list->window[i].byte_len = 0;
34512 new_list->window[i].imm_bytes = 0;
34517 /* This function allocates and initializes a dispatch window and the
34518 list container holding a pointer to the window. */
34520 static dispatch_windows *
34521 allocate_window (void)
34523 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
34524 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
34529 /* This routine initializes the dispatch scheduling information. It
34530 initiates building dispatch scheduler tables and constructs the
34531 first dispatch window. */
34534 init_dispatch_sched (void)
34536 /* Allocate a dispatch list and a window. */
34537 dispatch_window_list = allocate_window ();
34538 dispatch_window_list1 = allocate_window ();
34543 /* This function returns true if a branch is detected. End of a basic block
34544 does not have to be a branch, but here we assume only branches end a
34548 is_end_basic_block (enum dispatch_group group)
34550 return group == disp_branch;
34553 /* This function is called when the end of a window processing is reached. */
34556 process_end_window (void)
34558 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
34559 if (dispatch_window_list->next)
34561 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
34562 gcc_assert (dispatch_window_list->window_size
34563 + dispatch_window_list1->window_size <= 48);
34569 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
34570 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
34571 for 48 bytes of instructions. Note that these windows are not dispatch
34572 windows that their sizes are DISPATCH_WINDOW_SIZE. */
34574 static dispatch_windows *
34575 allocate_next_window (int window_num)
34577 if (window_num == 0)
34579 if (dispatch_window_list->next)
34582 return dispatch_window_list;
34585 dispatch_window_list->next = dispatch_window_list1;
34586 dispatch_window_list1->prev = dispatch_window_list;
34588 return dispatch_window_list1;
34591 /* Increment the number of immediate operands of an instruction. */
34594 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
34599 switch ( GET_CODE (*in_rtx))
34604 (imm_values->imm)++;
34605 if (x86_64_immediate_operand (*in_rtx, SImode))
34606 (imm_values->imm32)++;
34608 (imm_values->imm64)++;
34612 (imm_values->imm)++;
34613 (imm_values->imm64)++;
34617 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
34619 (imm_values->imm)++;
34620 (imm_values->imm32)++;
34631 /* Compute number of immediate operands of an instruction. */
34634 find_constant (rtx in_rtx, imm_info *imm_values)
34636 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
34637 (rtx_function) find_constant_1, (void *) imm_values);
34640 /* Return total size of immediate operands of an instruction along with number
34641 of corresponding immediate-operands. It initializes its parameters to zero
34642 befor calling FIND_CONSTANT.
34643 INSN is the input instruction. IMM is the total of immediates.
34644 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34648 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
34650 imm_info imm_values = {0, 0, 0};
34652 find_constant (insn, &imm_values);
34653 *imm = imm_values.imm;
34654 *imm32 = imm_values.imm32;
34655 *imm64 = imm_values.imm64;
34656 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
34659 /* This function indicates if an operand of an instruction is an
34663 has_immediate (rtx insn)
34665 int num_imm_operand;
34666 int num_imm32_operand;
34667 int num_imm64_operand;
34670 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34671 &num_imm64_operand);
34675 /* Return single or double path for instructions. */
34677 static enum insn_path
34678 get_insn_path (rtx insn)
34680 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
34682 if ((int)path == 0)
34683 return path_single;
34685 if ((int)path == 1)
34686 return path_double;
34691 /* Return insn dispatch group. */
34693 static enum dispatch_group
34694 get_insn_group (rtx insn)
34696 enum dispatch_group group = get_mem_group (insn);
34700 if (is_branch (insn))
34701 return disp_branch;
34706 if (has_immediate (insn))
34709 if (is_prefetch (insn))
34710 return disp_prefetch;
34712 return disp_no_group;
34715 /* Count number of GROUP restricted instructions in a dispatch
34716 window WINDOW_LIST. */
34719 count_num_restricted (rtx insn, dispatch_windows *window_list)
34721 enum dispatch_group group = get_insn_group (insn);
34723 int num_imm_operand;
34724 int num_imm32_operand;
34725 int num_imm64_operand;
34727 if (group == disp_no_group)
34730 if (group == disp_imm)
34732 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34733 &num_imm64_operand);
34734 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
34735 || num_imm_operand + window_list->num_imm > MAX_IMM
34736 || (num_imm32_operand > 0
34737 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
34738 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
34739 || (num_imm64_operand > 0
34740 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
34741 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
34742 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
34743 && num_imm64_operand > 0
34744 && ((window_list->num_imm_64 > 0
34745 && window_list->num_insn >= 2)
34746 || window_list->num_insn >= 3)))
34752 if ((group == disp_load_store
34753 && (window_list->num_loads >= MAX_LOAD
34754 || window_list->num_stores >= MAX_STORE))
34755 || ((group == disp_load
34756 || group == disp_prefetch)
34757 && window_list->num_loads >= MAX_LOAD)
34758 || (group == disp_store
34759 && window_list->num_stores >= MAX_STORE))
34765 /* This function returns true if insn satisfies dispatch rules on the
34766 last window scheduled. */
34769 fits_dispatch_window (rtx insn)
34771 dispatch_windows *window_list = dispatch_window_list;
34772 dispatch_windows *window_list_next = dispatch_window_list->next;
34773 unsigned int num_restrict;
34774 enum dispatch_group group = get_insn_group (insn);
34775 enum insn_path path = get_insn_path (insn);
34778 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34779 instructions should be given the lowest priority in the
34780 scheduling process in Haifa scheduler to make sure they will be
34781 scheduled in the same dispatch window as the refrence to them. */
34782 if (group == disp_jcc || group == disp_cmp)
34785 /* Check nonrestricted. */
34786 if (group == disp_no_group || group == disp_branch)
34789 /* Get last dispatch window. */
34790 if (window_list_next)
34791 window_list = window_list_next;
34793 if (window_list->window_num == 1)
34795 sum = window_list->prev->window_size + window_list->window_size;
34798 || (min_insn_size (insn) + sum) >= 48)
34799 /* Window 1 is full. Go for next window. */
34803 num_restrict = count_num_restricted (insn, window_list);
34805 if (num_restrict > num_allowable_groups[group])
34808 /* See if it fits in the first window. */
34809 if (window_list->window_num == 0)
34811 /* The first widow should have only single and double path
34813 if (path == path_double
34814 && (window_list->num_uops + 2) > MAX_INSN)
34816 else if (path != path_single)
34822 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34823 dispatch window WINDOW_LIST. */
34826 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
34828 int byte_len = min_insn_size (insn);
34829 int num_insn = window_list->num_insn;
34831 sched_insn_info *window = window_list->window;
34832 enum dispatch_group group = get_insn_group (insn);
34833 enum insn_path path = get_insn_path (insn);
34834 int num_imm_operand;
34835 int num_imm32_operand;
34836 int num_imm64_operand;
34838 if (!window_list->violation && group != disp_cmp
34839 && !fits_dispatch_window (insn))
34840 window_list->violation = true;
34842 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34843 &num_imm64_operand);
34845 /* Initialize window with new instruction. */
34846 window[num_insn].insn = insn;
34847 window[num_insn].byte_len = byte_len;
34848 window[num_insn].group = group;
34849 window[num_insn].path = path;
34850 window[num_insn].imm_bytes = imm_size;
34852 window_list->window_size += byte_len;
34853 window_list->num_insn = num_insn + 1;
34854 window_list->num_uops = window_list->num_uops + num_uops;
34855 window_list->imm_size += imm_size;
34856 window_list->num_imm += num_imm_operand;
34857 window_list->num_imm_32 += num_imm32_operand;
34858 window_list->num_imm_64 += num_imm64_operand;
34860 if (group == disp_store)
34861 window_list->num_stores += 1;
34862 else if (group == disp_load
34863 || group == disp_prefetch)
34864 window_list->num_loads += 1;
34865 else if (group == disp_load_store)
34867 window_list->num_stores += 1;
34868 window_list->num_loads += 1;
34872 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34873 If the total bytes of instructions or the number of instructions in
34874 the window exceed allowable, it allocates a new window. */
34877 add_to_dispatch_window (rtx insn)
34880 dispatch_windows *window_list;
34881 dispatch_windows *next_list;
34882 dispatch_windows *window0_list;
34883 enum insn_path path;
34884 enum dispatch_group insn_group;
34892 if (INSN_CODE (insn) < 0)
34895 byte_len = min_insn_size (insn);
34896 window_list = dispatch_window_list;
34897 next_list = window_list->next;
34898 path = get_insn_path (insn);
34899 insn_group = get_insn_group (insn);
34901 /* Get the last dispatch window. */
34903 window_list = dispatch_window_list->next;
34905 if (path == path_single)
34907 else if (path == path_double)
34910 insn_num_uops = (int) path;
34912 /* If current window is full, get a new window.
34913 Window number zero is full, if MAX_INSN uops are scheduled in it.
34914 Window number one is full, if window zero's bytes plus window
34915 one's bytes is 32, or if the bytes of the new instruction added
34916 to the total makes it greater than 48, or it has already MAX_INSN
34917 instructions in it. */
34918 num_insn = window_list->num_insn;
34919 num_uops = window_list->num_uops;
34920 window_num = window_list->window_num;
34921 insn_fits = fits_dispatch_window (insn);
34923 if (num_insn >= MAX_INSN
34924 || num_uops + insn_num_uops > MAX_INSN
34927 window_num = ~window_num & 1;
34928 window_list = allocate_next_window (window_num);
34931 if (window_num == 0)
34933 add_insn_window (insn, window_list, insn_num_uops);
34934 if (window_list->num_insn >= MAX_INSN
34935 && insn_group == disp_branch)
34937 process_end_window ();
34941 else if (window_num == 1)
34943 window0_list = window_list->prev;
34944 sum = window0_list->window_size + window_list->window_size;
34946 || (byte_len + sum) >= 48)
34948 process_end_window ();
34949 window_list = dispatch_window_list;
34952 add_insn_window (insn, window_list, insn_num_uops);
34955 gcc_unreachable ();
34957 if (is_end_basic_block (insn_group))
34959 /* End of basic block is reached do end-basic-block process. */
34960 process_end_window ();
34965 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34967 DEBUG_FUNCTION static void
34968 debug_dispatch_window_file (FILE *file, int window_num)
34970 dispatch_windows *list;
34973 if (window_num == 0)
34974 list = dispatch_window_list;
34976 list = dispatch_window_list1;
34978 fprintf (file, "Window #%d:\n", list->window_num);
34979 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
34980 list->num_insn, list->num_uops, list->window_size);
34981 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34982 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
34984 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
34986 fprintf (file, " insn info:\n");
34988 for (i = 0; i < MAX_INSN; i++)
34990 if (!list->window[i].insn)
34992 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
34993 i, group_name[list->window[i].group],
34994 i, (void *)list->window[i].insn,
34995 i, list->window[i].path,
34996 i, list->window[i].byte_len,
34997 i, list->window[i].imm_bytes);
35001 /* Print to stdout a dispatch window. */
35003 DEBUG_FUNCTION void
35004 debug_dispatch_window (int window_num)
35006 debug_dispatch_window_file (stdout, window_num);
35009 /* Print INSN dispatch information to FILE. */
35011 DEBUG_FUNCTION static void
35012 debug_insn_dispatch_info_file (FILE *file, rtx insn)
35015 enum insn_path path;
35016 enum dispatch_group group;
35018 int num_imm_operand;
35019 int num_imm32_operand;
35020 int num_imm64_operand;
35022 if (INSN_CODE (insn) < 0)
35025 byte_len = min_insn_size (insn);
35026 path = get_insn_path (insn);
35027 group = get_insn_group (insn);
35028 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
35029 &num_imm64_operand);
35031 fprintf (file, " insn info:\n");
35032 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
35033 group_name[group], path, byte_len);
35034 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
35035 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
35038 /* Print to STDERR the status of the ready list with respect to
35039 dispatch windows. */
35041 DEBUG_FUNCTION void
35042 debug_ready_dispatch (void)
35045 int no_ready = number_in_ready ();
35047 fprintf (stdout, "Number of ready: %d\n", no_ready);
35049 for (i = 0; i < no_ready; i++)
35050 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
35053 /* This routine is the driver of the dispatch scheduler. */
35056 do_dispatch (rtx insn, int mode)
35058 if (mode == DISPATCH_INIT)
35059 init_dispatch_sched ();
35060 else if (mode == ADD_TO_DISPATCH_WINDOW)
35061 add_to_dispatch_window (insn);
35064 /* Return TRUE if Dispatch Scheduling is supported. */
35067 has_dispatch (rtx insn, int action)
35069 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
35075 case IS_DISPATCH_ON:
35080 return is_cmp (insn);
35082 case DISPATCH_VIOLATION:
35083 return dispatch_violation ();
35085 case FITS_DISPATCH_WINDOW:
35086 return fits_dispatch_window (insn);
35092 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
35093 place emms and femms instructions. */
35095 static enum machine_mode
35096 ix86_preferred_simd_mode (enum machine_mode mode)
35113 if (TARGET_AVX && !flag_prefer_avx128)
35119 if (!TARGET_VECTORIZE_DOUBLE)
35121 else if (TARGET_AVX && !flag_prefer_avx128)
35123 else if (TARGET_SSE2)
35132 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
35135 static unsigned int
35136 ix86_autovectorize_vector_sizes (void)
35138 return (TARGET_AVX && !flag_prefer_avx128) ? 32 | 16 : 0;
35141 /* Initialize the GCC target structure. */
35142 #undef TARGET_RETURN_IN_MEMORY
35143 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
35145 #undef TARGET_LEGITIMIZE_ADDRESS
35146 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
35148 #undef TARGET_ATTRIBUTE_TABLE
35149 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
35150 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35151 # undef TARGET_MERGE_DECL_ATTRIBUTES
35152 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
35155 #undef TARGET_COMP_TYPE_ATTRIBUTES
35156 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
35158 #undef TARGET_INIT_BUILTINS
35159 #define TARGET_INIT_BUILTINS ix86_init_builtins
35160 #undef TARGET_BUILTIN_DECL
35161 #define TARGET_BUILTIN_DECL ix86_builtin_decl
35162 #undef TARGET_EXPAND_BUILTIN
35163 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
35165 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
35166 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
35167 ix86_builtin_vectorized_function
35169 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
35170 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
35172 #undef TARGET_BUILTIN_RECIPROCAL
35173 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
35175 #undef TARGET_ASM_FUNCTION_EPILOGUE
35176 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
35178 #undef TARGET_ENCODE_SECTION_INFO
35179 #ifndef SUBTARGET_ENCODE_SECTION_INFO
35180 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
35182 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
35185 #undef TARGET_ASM_OPEN_PAREN
35186 #define TARGET_ASM_OPEN_PAREN ""
35187 #undef TARGET_ASM_CLOSE_PAREN
35188 #define TARGET_ASM_CLOSE_PAREN ""
35190 #undef TARGET_ASM_BYTE_OP
35191 #define TARGET_ASM_BYTE_OP ASM_BYTE
35193 #undef TARGET_ASM_ALIGNED_HI_OP
35194 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
35195 #undef TARGET_ASM_ALIGNED_SI_OP
35196 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
35198 #undef TARGET_ASM_ALIGNED_DI_OP
35199 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
35202 #undef TARGET_PROFILE_BEFORE_PROLOGUE
35203 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
35205 #undef TARGET_ASM_UNALIGNED_HI_OP
35206 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
35207 #undef TARGET_ASM_UNALIGNED_SI_OP
35208 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
35209 #undef TARGET_ASM_UNALIGNED_DI_OP
35210 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
35212 #undef TARGET_PRINT_OPERAND
35213 #define TARGET_PRINT_OPERAND ix86_print_operand
35214 #undef TARGET_PRINT_OPERAND_ADDRESS
35215 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
35216 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
35217 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
35218 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
35219 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
35221 #undef TARGET_SCHED_INIT_GLOBAL
35222 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
35223 #undef TARGET_SCHED_ADJUST_COST
35224 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
35225 #undef TARGET_SCHED_ISSUE_RATE
35226 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
35227 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
35228 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
35229 ia32_multipass_dfa_lookahead
35231 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
35232 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
35235 #undef TARGET_HAVE_TLS
35236 #define TARGET_HAVE_TLS true
35238 #undef TARGET_CANNOT_FORCE_CONST_MEM
35239 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
35240 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
35241 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
35243 #undef TARGET_DELEGITIMIZE_ADDRESS
35244 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
35246 #undef TARGET_MS_BITFIELD_LAYOUT_P
35247 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
35250 #undef TARGET_BINDS_LOCAL_P
35251 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
35253 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35254 #undef TARGET_BINDS_LOCAL_P
35255 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
35258 #undef TARGET_ASM_OUTPUT_MI_THUNK
35259 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
35260 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
35261 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
35263 #undef TARGET_ASM_FILE_START
35264 #define TARGET_ASM_FILE_START x86_file_start
35266 #undef TARGET_DEFAULT_TARGET_FLAGS
35267 #define TARGET_DEFAULT_TARGET_FLAGS \
35269 | TARGET_SUBTARGET_DEFAULT \
35270 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
35272 #undef TARGET_HANDLE_OPTION
35273 #define TARGET_HANDLE_OPTION ix86_handle_option
35275 #undef TARGET_OPTION_OVERRIDE
35276 #define TARGET_OPTION_OVERRIDE ix86_option_override
35277 #undef TARGET_OPTION_OPTIMIZATION_TABLE
35278 #define TARGET_OPTION_OPTIMIZATION_TABLE ix86_option_optimization_table
35279 #undef TARGET_OPTION_INIT_STRUCT
35280 #define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
35282 #undef TARGET_REGISTER_MOVE_COST
35283 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
35284 #undef TARGET_MEMORY_MOVE_COST
35285 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
35286 #undef TARGET_RTX_COSTS
35287 #define TARGET_RTX_COSTS ix86_rtx_costs
35288 #undef TARGET_ADDRESS_COST
35289 #define TARGET_ADDRESS_COST ix86_address_cost
35291 #undef TARGET_FIXED_CONDITION_CODE_REGS
35292 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
35293 #undef TARGET_CC_MODES_COMPATIBLE
35294 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
35296 #undef TARGET_MACHINE_DEPENDENT_REORG
35297 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
35299 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
35300 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
35302 #undef TARGET_BUILD_BUILTIN_VA_LIST
35303 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
35305 #undef TARGET_ENUM_VA_LIST_P
35306 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
35308 #undef TARGET_FN_ABI_VA_LIST
35309 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
35311 #undef TARGET_CANONICAL_VA_LIST_TYPE
35312 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
35314 #undef TARGET_EXPAND_BUILTIN_VA_START
35315 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
35317 #undef TARGET_MD_ASM_CLOBBERS
35318 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
35320 #undef TARGET_PROMOTE_PROTOTYPES
35321 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
35322 #undef TARGET_STRUCT_VALUE_RTX
35323 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
35324 #undef TARGET_SETUP_INCOMING_VARARGS
35325 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
35326 #undef TARGET_MUST_PASS_IN_STACK
35327 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
35328 #undef TARGET_FUNCTION_ARG_ADVANCE
35329 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
35330 #undef TARGET_FUNCTION_ARG
35331 #define TARGET_FUNCTION_ARG ix86_function_arg
35332 #undef TARGET_FUNCTION_ARG_BOUNDARY
35333 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
35334 #undef TARGET_PASS_BY_REFERENCE
35335 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
35336 #undef TARGET_INTERNAL_ARG_POINTER
35337 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
35338 #undef TARGET_UPDATE_STACK_BOUNDARY
35339 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
35340 #undef TARGET_GET_DRAP_RTX
35341 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
35342 #undef TARGET_STRICT_ARGUMENT_NAMING
35343 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
35344 #undef TARGET_STATIC_CHAIN
35345 #define TARGET_STATIC_CHAIN ix86_static_chain
35346 #undef TARGET_TRAMPOLINE_INIT
35347 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
35348 #undef TARGET_RETURN_POPS_ARGS
35349 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
35351 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
35352 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
35354 #undef TARGET_SCALAR_MODE_SUPPORTED_P
35355 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
35357 #undef TARGET_VECTOR_MODE_SUPPORTED_P
35358 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
35360 #undef TARGET_C_MODE_FOR_SUFFIX
35361 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
35364 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
35365 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
35368 #ifdef SUBTARGET_INSERT_ATTRIBUTES
35369 #undef TARGET_INSERT_ATTRIBUTES
35370 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
35373 #undef TARGET_MANGLE_TYPE
35374 #define TARGET_MANGLE_TYPE ix86_mangle_type
35376 #undef TARGET_STACK_PROTECT_FAIL
35377 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
35379 #undef TARGET_SUPPORTS_SPLIT_STACK
35380 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
35382 #undef TARGET_FUNCTION_VALUE
35383 #define TARGET_FUNCTION_VALUE ix86_function_value
35385 #undef TARGET_FUNCTION_VALUE_REGNO_P
35386 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
35388 #undef TARGET_SECONDARY_RELOAD
35389 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
35391 #undef TARGET_PREFERRED_RELOAD_CLASS
35392 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
35393 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
35394 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
35395 #undef TARGET_CLASS_LIKELY_SPILLED_P
35396 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
35398 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
35399 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
35400 ix86_builtin_vectorization_cost
35401 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
35402 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
35403 ix86_vectorize_builtin_vec_perm
35404 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
35405 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
35406 ix86_vectorize_builtin_vec_perm_ok
35407 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
35408 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
35409 ix86_preferred_simd_mode
35410 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
35411 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
35412 ix86_autovectorize_vector_sizes
35414 #undef TARGET_SET_CURRENT_FUNCTION
35415 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
35417 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
35418 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
35420 #undef TARGET_OPTION_SAVE
35421 #define TARGET_OPTION_SAVE ix86_function_specific_save
35423 #undef TARGET_OPTION_RESTORE
35424 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
35426 #undef TARGET_OPTION_PRINT
35427 #define TARGET_OPTION_PRINT ix86_function_specific_print
35429 #undef TARGET_CAN_INLINE_P
35430 #define TARGET_CAN_INLINE_P ix86_can_inline_p
35432 #undef TARGET_EXPAND_TO_RTL_HOOK
35433 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
35435 #undef TARGET_LEGITIMATE_ADDRESS_P
35436 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
35438 #undef TARGET_LEGITIMATE_CONSTANT_P
35439 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
35441 #undef TARGET_FRAME_POINTER_REQUIRED
35442 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
35444 #undef TARGET_CAN_ELIMINATE
35445 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
35447 #undef TARGET_EXTRA_LIVE_ON_ENTRY
35448 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
35450 #undef TARGET_ASM_CODE_END
35451 #define TARGET_ASM_CODE_END ix86_code_end
35453 #undef TARGET_CONDITIONAL_REGISTER_USAGE
35454 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
35457 #undef TARGET_INIT_LIBFUNCS
35458 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
35461 struct gcc_target targetm = TARGET_INITIALIZER;
35463 #include "gt-i386.h"