1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
63 enum upper_128bits_state
70 typedef struct block_info_def
72 /* State of the upper 128bits of AVX registers at exit. */
73 enum upper_128bits_state state;
74 /* TRUE if state of the upper 128bits of AVX registers is unchanged
77 /* TRUE if block has been processed. */
79 /* TRUE if block has been scanned. */
81 /* Previous state of the upper 128bits of AVX registers at entry. */
82 enum upper_128bits_state prev;
85 #define BLOCK_INFO(B) ((block_info) (B)->aux)
87 enum call_avx256_state
89 /* Callee returns 256bit AVX register. */
90 callee_return_avx256 = -1,
91 /* Callee returns and passes 256bit AVX register. */
92 callee_return_pass_avx256,
93 /* Callee passes 256bit AVX register. */
95 /* Callee doesn't return nor passe 256bit AVX register, or no
96 256bit AVX register in function return. */
98 /* vzeroupper intrinsic. */
102 /* Check if a 256bit AVX register is referenced in stores. */
105 check_avx256_stores (rtx dest, const_rtx set, void *data)
108 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
109 || (GET_CODE (set) == SET
110 && REG_P (SET_SRC (set))
111 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
113 enum upper_128bits_state *state
114 = (enum upper_128bits_state *) data;
119 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
120 in basic block BB. Delete it if upper 128bit AVX registers are
121 unused. If it isn't deleted, move it to just before a jump insn.
123 STATE is state of the upper 128bits of AVX registers at entry. */
126 move_or_delete_vzeroupper_2 (basic_block bb,
127 enum upper_128bits_state state)
130 rtx vzeroupper_insn = NULL_RTX;
135 if (BLOCK_INFO (bb)->unchanged)
138 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
141 BLOCK_INFO (bb)->state = state;
145 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
148 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
149 bb->index, BLOCK_INFO (bb)->state);
153 BLOCK_INFO (bb)->prev = state;
156 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
161 /* BB_END changes when it is deleted. */
162 bb_end = BB_END (bb);
164 while (insn != bb_end)
166 insn = NEXT_INSN (insn);
168 if (!NONDEBUG_INSN_P (insn))
171 /* Move vzeroupper before jump/call. */
172 if (JUMP_P (insn) || CALL_P (insn))
174 if (!vzeroupper_insn)
177 if (PREV_INSN (insn) != vzeroupper_insn)
181 fprintf (dump_file, "Move vzeroupper after:\n");
182 print_rtl_single (dump_file, PREV_INSN (insn));
183 fprintf (dump_file, "before:\n");
184 print_rtl_single (dump_file, insn);
186 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
189 vzeroupper_insn = NULL_RTX;
193 pat = PATTERN (insn);
195 /* Check insn for vzeroupper intrinsic. */
196 if (GET_CODE (pat) == UNSPEC_VOLATILE
197 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
201 /* Found vzeroupper intrinsic. */
202 fprintf (dump_file, "Found vzeroupper:\n");
203 print_rtl_single (dump_file, insn);
208 /* Check insn for vzeroall intrinsic. */
209 if (GET_CODE (pat) == PARALLEL
210 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
211 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
216 /* Delete pending vzeroupper insertion. */
219 delete_insn (vzeroupper_insn);
220 vzeroupper_insn = NULL_RTX;
223 else if (state != used)
225 note_stores (pat, check_avx256_stores, &state);
232 /* Process vzeroupper intrinsic. */
233 avx256 = INTVAL (XVECEXP (pat, 0, 0));
237 /* Since the upper 128bits are cleared, callee must not pass
238 256bit AVX register. We only need to check if callee
239 returns 256bit AVX register. */
240 if (avx256 == callee_return_avx256)
246 /* Remove unnecessary vzeroupper since upper 128bits are
250 fprintf (dump_file, "Delete redundant vzeroupper:\n");
251 print_rtl_single (dump_file, insn);
257 /* Set state to UNUSED if callee doesn't return 256bit AVX
259 if (avx256 != callee_return_pass_avx256)
262 if (avx256 == callee_return_pass_avx256
263 || avx256 == callee_pass_avx256)
265 /* Must remove vzeroupper since callee passes in 256bit
269 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
270 print_rtl_single (dump_file, insn);
276 vzeroupper_insn = insn;
282 BLOCK_INFO (bb)->state = state;
283 BLOCK_INFO (bb)->unchanged = unchanged;
284 BLOCK_INFO (bb)->scanned = true;
287 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
288 bb->index, unchanged ? "unchanged" : "changed",
292 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
293 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
294 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
298 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
302 enum upper_128bits_state state, old_state, new_state;
306 fprintf (dump_file, " Process [bb %i]: status: %d\n",
307 block->index, BLOCK_INFO (block)->processed);
309 if (BLOCK_INFO (block)->processed)
314 /* Check all predecessor edges of this block. */
315 seen_unknown = false;
316 FOR_EACH_EDGE (e, ei, block->preds)
320 switch (BLOCK_INFO (e->src)->state)
323 if (!unknown_is_unused)
337 old_state = BLOCK_INFO (block)->state;
338 move_or_delete_vzeroupper_2 (block, state);
339 new_state = BLOCK_INFO (block)->state;
341 if (state != unknown || new_state == used)
342 BLOCK_INFO (block)->processed = true;
344 /* Need to rescan if the upper 128bits of AVX registers are changed
346 if (new_state != old_state)
348 if (new_state == used)
349 cfun->machine->rescan_vzeroupper_p = 1;
356 /* Go through the instruction stream looking for vzeroupper. Delete
357 it if upper 128bit AVX registers are unused. If it isn't deleted,
358 move it to just before a jump insn. */
361 move_or_delete_vzeroupper (void)
366 fibheap_t worklist, pending, fibheap_swap;
367 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
372 /* Set up block info for each basic block. */
373 alloc_aux_for_blocks (sizeof (struct block_info_def));
375 /* Process outgoing edges of entry point. */
377 fprintf (dump_file, "Process outgoing edges of entry point\n");
379 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
381 move_or_delete_vzeroupper_2 (e->dest,
382 cfun->machine->caller_pass_avx256_p
384 BLOCK_INFO (e->dest)->processed = true;
387 /* Compute reverse completion order of depth first search of the CFG
388 so that the data-flow runs faster. */
389 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
390 bb_order = XNEWVEC (int, last_basic_block);
391 pre_and_rev_post_order_compute (NULL, rc_order, false);
392 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
393 bb_order[rc_order[i]] = i;
396 worklist = fibheap_new ();
397 pending = fibheap_new ();
398 visited = sbitmap_alloc (last_basic_block);
399 in_worklist = sbitmap_alloc (last_basic_block);
400 in_pending = sbitmap_alloc (last_basic_block);
401 sbitmap_zero (in_worklist);
403 /* Don't check outgoing edges of entry point. */
404 sbitmap_ones (in_pending);
406 if (BLOCK_INFO (bb)->processed)
407 RESET_BIT (in_pending, bb->index);
410 move_or_delete_vzeroupper_1 (bb, false);
411 fibheap_insert (pending, bb_order[bb->index], bb);
415 fprintf (dump_file, "Check remaining basic blocks\n");
417 while (!fibheap_empty (pending))
419 fibheap_swap = pending;
421 worklist = fibheap_swap;
422 sbitmap_swap = in_pending;
423 in_pending = in_worklist;
424 in_worklist = sbitmap_swap;
426 sbitmap_zero (visited);
428 cfun->machine->rescan_vzeroupper_p = 0;
430 while (!fibheap_empty (worklist))
432 bb = (basic_block) fibheap_extract_min (worklist);
433 RESET_BIT (in_worklist, bb->index);
434 gcc_assert (!TEST_BIT (visited, bb->index));
435 if (!TEST_BIT (visited, bb->index))
439 SET_BIT (visited, bb->index);
441 if (move_or_delete_vzeroupper_1 (bb, false))
442 FOR_EACH_EDGE (e, ei, bb->succs)
444 if (e->dest == EXIT_BLOCK_PTR
445 || BLOCK_INFO (e->dest)->processed)
448 if (TEST_BIT (visited, e->dest->index))
450 if (!TEST_BIT (in_pending, e->dest->index))
452 /* Send E->DEST to next round. */
453 SET_BIT (in_pending, e->dest->index);
454 fibheap_insert (pending,
455 bb_order[e->dest->index],
459 else if (!TEST_BIT (in_worklist, e->dest->index))
461 /* Add E->DEST to current round. */
462 SET_BIT (in_worklist, e->dest->index);
463 fibheap_insert (worklist, bb_order[e->dest->index],
470 if (!cfun->machine->rescan_vzeroupper_p)
475 fibheap_delete (worklist);
476 fibheap_delete (pending);
477 sbitmap_free (visited);
478 sbitmap_free (in_worklist);
479 sbitmap_free (in_pending);
482 fprintf (dump_file, "Process remaining basic blocks\n");
485 move_or_delete_vzeroupper_1 (bb, true);
487 free_aux_for_blocks ();
490 static rtx legitimize_dllimport_symbol (rtx, bool);
492 #ifndef CHECK_STACK_LIMIT
493 #define CHECK_STACK_LIMIT (-1)
496 /* Return index of given mode in mult and division cost tables. */
497 #define MODE_INDEX(mode) \
498 ((mode) == QImode ? 0 \
499 : (mode) == HImode ? 1 \
500 : (mode) == SImode ? 2 \
501 : (mode) == DImode ? 3 \
504 /* Processor costs (relative to an add) */
505 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
506 #define COSTS_N_BYTES(N) ((N) * 2)
508 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
511 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
512 COSTS_N_BYTES (2), /* cost of an add instruction */
513 COSTS_N_BYTES (3), /* cost of a lea instruction */
514 COSTS_N_BYTES (2), /* variable shift costs */
515 COSTS_N_BYTES (3), /* constant shift costs */
516 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
517 COSTS_N_BYTES (3), /* HI */
518 COSTS_N_BYTES (3), /* SI */
519 COSTS_N_BYTES (3), /* DI */
520 COSTS_N_BYTES (5)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
523 COSTS_N_BYTES (3), /* HI */
524 COSTS_N_BYTES (3), /* SI */
525 COSTS_N_BYTES (3), /* DI */
526 COSTS_N_BYTES (5)}, /* other */
527 COSTS_N_BYTES (3), /* cost of movsx */
528 COSTS_N_BYTES (3), /* cost of movzx */
529 0, /* "large" insn */
531 2, /* cost for loading QImode using movzbl */
532 {2, 2, 2}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 2, 2}, /* cost of storing integer registers */
536 2, /* cost of reg,reg fld/fst */
537 {2, 2, 2}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {2, 2, 2}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 3, /* cost of moving MMX register */
542 {3, 3}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {3, 3}, /* cost of storing MMX registers
545 in SImode and DImode */
546 3, /* cost of moving SSE register */
547 {3, 3, 3}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {3, 3, 3}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 3, /* MMX or SSE register to integer */
552 0, /* size of l1 cache */
553 0, /* size of l2 cache */
554 0, /* size of prefetch block */
555 0, /* number of parallel prefetches */
557 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
558 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
559 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
560 COSTS_N_BYTES (2), /* cost of FABS instruction. */
561 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
562 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
563 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
564 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
565 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
566 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
567 1, /* scalar_stmt_cost. */
568 1, /* scalar load_cost. */
569 1, /* scalar_store_cost. */
570 1, /* vec_stmt_cost. */
571 1, /* vec_to_scalar_cost. */
572 1, /* scalar_to_vec_cost. */
573 1, /* vec_align_load_cost. */
574 1, /* vec_unalign_load_cost. */
575 1, /* vec_store_cost. */
576 1, /* cond_taken_branch_cost. */
577 1, /* cond_not_taken_branch_cost. */
580 /* Processor costs (relative to an add) */
582 struct processor_costs i386_cost = { /* 386 specific costs */
583 COSTS_N_INSNS (1), /* cost of an add instruction */
584 COSTS_N_INSNS (1), /* cost of a lea instruction */
585 COSTS_N_INSNS (3), /* variable shift costs */
586 COSTS_N_INSNS (2), /* constant shift costs */
587 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
588 COSTS_N_INSNS (6), /* HI */
589 COSTS_N_INSNS (6), /* SI */
590 COSTS_N_INSNS (6), /* DI */
591 COSTS_N_INSNS (6)}, /* other */
592 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
593 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
594 COSTS_N_INSNS (23), /* HI */
595 COSTS_N_INSNS (23), /* SI */
596 COSTS_N_INSNS (23), /* DI */
597 COSTS_N_INSNS (23)}, /* other */
598 COSTS_N_INSNS (3), /* cost of movsx */
599 COSTS_N_INSNS (2), /* cost of movzx */
600 15, /* "large" insn */
602 4, /* cost for loading QImode using movzbl */
603 {2, 4, 2}, /* cost of loading integer registers
604 in QImode, HImode and SImode.
605 Relative to reg-reg move (2). */
606 {2, 4, 2}, /* cost of storing integer registers */
607 2, /* cost of reg,reg fld/fst */
608 {8, 8, 8}, /* cost of loading fp registers
609 in SFmode, DFmode and XFmode */
610 {8, 8, 8}, /* cost of storing fp registers
611 in SFmode, DFmode and XFmode */
612 2, /* cost of moving MMX register */
613 {4, 8}, /* cost of loading MMX registers
614 in SImode and DImode */
615 {4, 8}, /* cost of storing MMX registers
616 in SImode and DImode */
617 2, /* cost of moving SSE register */
618 {4, 8, 16}, /* cost of loading SSE registers
619 in SImode, DImode and TImode */
620 {4, 8, 16}, /* cost of storing SSE registers
621 in SImode, DImode and TImode */
622 3, /* MMX or SSE register to integer */
623 0, /* size of l1 cache */
624 0, /* size of l2 cache */
625 0, /* size of prefetch block */
626 0, /* number of parallel prefetches */
628 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
629 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
630 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
631 COSTS_N_INSNS (22), /* cost of FABS instruction. */
632 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
633 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
634 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
635 DUMMY_STRINGOP_ALGS},
636 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
637 DUMMY_STRINGOP_ALGS},
638 1, /* scalar_stmt_cost. */
639 1, /* scalar load_cost. */
640 1, /* scalar_store_cost. */
641 1, /* vec_stmt_cost. */
642 1, /* vec_to_scalar_cost. */
643 1, /* scalar_to_vec_cost. */
644 1, /* vec_align_load_cost. */
645 2, /* vec_unalign_load_cost. */
646 1, /* vec_store_cost. */
647 3, /* cond_taken_branch_cost. */
648 1, /* cond_not_taken_branch_cost. */
652 struct processor_costs i486_cost = { /* 486 specific costs */
653 COSTS_N_INSNS (1), /* cost of an add instruction */
654 COSTS_N_INSNS (1), /* cost of a lea instruction */
655 COSTS_N_INSNS (3), /* variable shift costs */
656 COSTS_N_INSNS (2), /* constant shift costs */
657 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
658 COSTS_N_INSNS (12), /* HI */
659 COSTS_N_INSNS (12), /* SI */
660 COSTS_N_INSNS (12), /* DI */
661 COSTS_N_INSNS (12)}, /* other */
662 1, /* cost of multiply per each bit set */
663 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
664 COSTS_N_INSNS (40), /* HI */
665 COSTS_N_INSNS (40), /* SI */
666 COSTS_N_INSNS (40), /* DI */
667 COSTS_N_INSNS (40)}, /* other */
668 COSTS_N_INSNS (3), /* cost of movsx */
669 COSTS_N_INSNS (2), /* cost of movzx */
670 15, /* "large" insn */
672 4, /* cost for loading QImode using movzbl */
673 {2, 4, 2}, /* cost of loading integer registers
674 in QImode, HImode and SImode.
675 Relative to reg-reg move (2). */
676 {2, 4, 2}, /* cost of storing integer registers */
677 2, /* cost of reg,reg fld/fst */
678 {8, 8, 8}, /* cost of loading fp registers
679 in SFmode, DFmode and XFmode */
680 {8, 8, 8}, /* cost of storing fp registers
681 in SFmode, DFmode and XFmode */
682 2, /* cost of moving MMX register */
683 {4, 8}, /* cost of loading MMX registers
684 in SImode and DImode */
685 {4, 8}, /* cost of storing MMX registers
686 in SImode and DImode */
687 2, /* cost of moving SSE register */
688 {4, 8, 16}, /* cost of loading SSE registers
689 in SImode, DImode and TImode */
690 {4, 8, 16}, /* cost of storing SSE registers
691 in SImode, DImode and TImode */
692 3, /* MMX or SSE register to integer */
693 4, /* size of l1 cache. 486 has 8kB cache
694 shared for code and data, so 4kB is
695 not really precise. */
696 4, /* size of l2 cache */
697 0, /* size of prefetch block */
698 0, /* number of parallel prefetches */
700 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
701 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
702 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
703 COSTS_N_INSNS (3), /* cost of FABS instruction. */
704 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
705 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
706 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
707 DUMMY_STRINGOP_ALGS},
708 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
709 DUMMY_STRINGOP_ALGS},
710 1, /* scalar_stmt_cost. */
711 1, /* scalar load_cost. */
712 1, /* scalar_store_cost. */
713 1, /* vec_stmt_cost. */
714 1, /* vec_to_scalar_cost. */
715 1, /* scalar_to_vec_cost. */
716 1, /* vec_align_load_cost. */
717 2, /* vec_unalign_load_cost. */
718 1, /* vec_store_cost. */
719 3, /* cond_taken_branch_cost. */
720 1, /* cond_not_taken_branch_cost. */
724 struct processor_costs pentium_cost = {
725 COSTS_N_INSNS (1), /* cost of an add instruction */
726 COSTS_N_INSNS (1), /* cost of a lea instruction */
727 COSTS_N_INSNS (4), /* variable shift costs */
728 COSTS_N_INSNS (1), /* constant shift costs */
729 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
730 COSTS_N_INSNS (11), /* HI */
731 COSTS_N_INSNS (11), /* SI */
732 COSTS_N_INSNS (11), /* DI */
733 COSTS_N_INSNS (11)}, /* other */
734 0, /* cost of multiply per each bit set */
735 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
736 COSTS_N_INSNS (25), /* HI */
737 COSTS_N_INSNS (25), /* SI */
738 COSTS_N_INSNS (25), /* DI */
739 COSTS_N_INSNS (25)}, /* other */
740 COSTS_N_INSNS (3), /* cost of movsx */
741 COSTS_N_INSNS (2), /* cost of movzx */
742 8, /* "large" insn */
744 6, /* cost for loading QImode using movzbl */
745 {2, 4, 2}, /* cost of loading integer registers
746 in QImode, HImode and SImode.
747 Relative to reg-reg move (2). */
748 {2, 4, 2}, /* cost of storing integer registers */
749 2, /* cost of reg,reg fld/fst */
750 {2, 2, 6}, /* cost of loading fp registers
751 in SFmode, DFmode and XFmode */
752 {4, 4, 6}, /* cost of storing fp registers
753 in SFmode, DFmode and XFmode */
754 8, /* cost of moving MMX register */
755 {8, 8}, /* cost of loading MMX registers
756 in SImode and DImode */
757 {8, 8}, /* cost of storing MMX registers
758 in SImode and DImode */
759 2, /* cost of moving SSE register */
760 {4, 8, 16}, /* cost of loading SSE registers
761 in SImode, DImode and TImode */
762 {4, 8, 16}, /* cost of storing SSE registers
763 in SImode, DImode and TImode */
764 3, /* MMX or SSE register to integer */
765 8, /* size of l1 cache. */
766 8, /* size of l2 cache */
767 0, /* size of prefetch block */
768 0, /* number of parallel prefetches */
770 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
771 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
772 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
773 COSTS_N_INSNS (1), /* cost of FABS instruction. */
774 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
775 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
776 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
777 DUMMY_STRINGOP_ALGS},
778 {{libcall, {{-1, rep_prefix_4_byte}}},
779 DUMMY_STRINGOP_ALGS},
780 1, /* scalar_stmt_cost. */
781 1, /* scalar load_cost. */
782 1, /* scalar_store_cost. */
783 1, /* vec_stmt_cost. */
784 1, /* vec_to_scalar_cost. */
785 1, /* scalar_to_vec_cost. */
786 1, /* vec_align_load_cost. */
787 2, /* vec_unalign_load_cost. */
788 1, /* vec_store_cost. */
789 3, /* cond_taken_branch_cost. */
790 1, /* cond_not_taken_branch_cost. */
794 struct processor_costs pentiumpro_cost = {
795 COSTS_N_INSNS (1), /* cost of an add instruction */
796 COSTS_N_INSNS (1), /* cost of a lea instruction */
797 COSTS_N_INSNS (1), /* variable shift costs */
798 COSTS_N_INSNS (1), /* constant shift costs */
799 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
800 COSTS_N_INSNS (4), /* HI */
801 COSTS_N_INSNS (4), /* SI */
802 COSTS_N_INSNS (4), /* DI */
803 COSTS_N_INSNS (4)}, /* other */
804 0, /* cost of multiply per each bit set */
805 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
806 COSTS_N_INSNS (17), /* HI */
807 COSTS_N_INSNS (17), /* SI */
808 COSTS_N_INSNS (17), /* DI */
809 COSTS_N_INSNS (17)}, /* other */
810 COSTS_N_INSNS (1), /* cost of movsx */
811 COSTS_N_INSNS (1), /* cost of movzx */
812 8, /* "large" insn */
814 2, /* cost for loading QImode using movzbl */
815 {4, 4, 4}, /* cost of loading integer registers
816 in QImode, HImode and SImode.
817 Relative to reg-reg move (2). */
818 {2, 2, 2}, /* cost of storing integer registers */
819 2, /* cost of reg,reg fld/fst */
820 {2, 2, 6}, /* cost of loading fp registers
821 in SFmode, DFmode and XFmode */
822 {4, 4, 6}, /* cost of storing fp registers
823 in SFmode, DFmode and XFmode */
824 2, /* cost of moving MMX register */
825 {2, 2}, /* cost of loading MMX registers
826 in SImode and DImode */
827 {2, 2}, /* cost of storing MMX registers
828 in SImode and DImode */
829 2, /* cost of moving SSE register */
830 {2, 2, 8}, /* cost of loading SSE registers
831 in SImode, DImode and TImode */
832 {2, 2, 8}, /* cost of storing SSE registers
833 in SImode, DImode and TImode */
834 3, /* MMX or SSE register to integer */
835 8, /* size of l1 cache. */
836 256, /* size of l2 cache */
837 32, /* size of prefetch block */
838 6, /* number of parallel prefetches */
840 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
841 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
842 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
843 COSTS_N_INSNS (2), /* cost of FABS instruction. */
844 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
845 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
846 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
847 (we ensure the alignment). For small blocks inline loop is still a
848 noticeable win, for bigger blocks either rep movsl or rep movsb is
849 way to go. Rep movsb has apparently more expensive startup time in CPU,
850 but after 4K the difference is down in the noise. */
851 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
852 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
853 DUMMY_STRINGOP_ALGS},
854 {{rep_prefix_4_byte, {{1024, unrolled_loop},
855 {8192, rep_prefix_4_byte}, {-1, libcall}}},
856 DUMMY_STRINGOP_ALGS},
857 1, /* scalar_stmt_cost. */
858 1, /* scalar load_cost. */
859 1, /* scalar_store_cost. */
860 1, /* vec_stmt_cost. */
861 1, /* vec_to_scalar_cost. */
862 1, /* scalar_to_vec_cost. */
863 1, /* vec_align_load_cost. */
864 2, /* vec_unalign_load_cost. */
865 1, /* vec_store_cost. */
866 3, /* cond_taken_branch_cost. */
867 1, /* cond_not_taken_branch_cost. */
871 struct processor_costs geode_cost = {
872 COSTS_N_INSNS (1), /* cost of an add instruction */
873 COSTS_N_INSNS (1), /* cost of a lea instruction */
874 COSTS_N_INSNS (2), /* variable shift costs */
875 COSTS_N_INSNS (1), /* constant shift costs */
876 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
877 COSTS_N_INSNS (4), /* HI */
878 COSTS_N_INSNS (7), /* SI */
879 COSTS_N_INSNS (7), /* DI */
880 COSTS_N_INSNS (7)}, /* other */
881 0, /* cost of multiply per each bit set */
882 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
883 COSTS_N_INSNS (23), /* HI */
884 COSTS_N_INSNS (39), /* SI */
885 COSTS_N_INSNS (39), /* DI */
886 COSTS_N_INSNS (39)}, /* other */
887 COSTS_N_INSNS (1), /* cost of movsx */
888 COSTS_N_INSNS (1), /* cost of movzx */
889 8, /* "large" insn */
891 1, /* cost for loading QImode using movzbl */
892 {1, 1, 1}, /* cost of loading integer registers
893 in QImode, HImode and SImode.
894 Relative to reg-reg move (2). */
895 {1, 1, 1}, /* cost of storing integer registers */
896 1, /* cost of reg,reg fld/fst */
897 {1, 1, 1}, /* cost of loading fp registers
898 in SFmode, DFmode and XFmode */
899 {4, 6, 6}, /* cost of storing fp registers
900 in SFmode, DFmode and XFmode */
902 1, /* cost of moving MMX register */
903 {1, 1}, /* cost of loading MMX registers
904 in SImode and DImode */
905 {1, 1}, /* cost of storing MMX registers
906 in SImode and DImode */
907 1, /* cost of moving SSE register */
908 {1, 1, 1}, /* cost of loading SSE registers
909 in SImode, DImode and TImode */
910 {1, 1, 1}, /* cost of storing SSE registers
911 in SImode, DImode and TImode */
912 1, /* MMX or SSE register to integer */
913 64, /* size of l1 cache. */
914 128, /* size of l2 cache. */
915 32, /* size of prefetch block */
916 1, /* number of parallel prefetches */
918 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
919 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
920 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
921 COSTS_N_INSNS (1), /* cost of FABS instruction. */
922 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
923 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
924 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
925 DUMMY_STRINGOP_ALGS},
926 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
927 DUMMY_STRINGOP_ALGS},
928 1, /* scalar_stmt_cost. */
929 1, /* scalar load_cost. */
930 1, /* scalar_store_cost. */
931 1, /* vec_stmt_cost. */
932 1, /* vec_to_scalar_cost. */
933 1, /* scalar_to_vec_cost. */
934 1, /* vec_align_load_cost. */
935 2, /* vec_unalign_load_cost. */
936 1, /* vec_store_cost. */
937 3, /* cond_taken_branch_cost. */
938 1, /* cond_not_taken_branch_cost. */
942 struct processor_costs k6_cost = {
943 COSTS_N_INSNS (1), /* cost of an add instruction */
944 COSTS_N_INSNS (2), /* cost of a lea instruction */
945 COSTS_N_INSNS (1), /* variable shift costs */
946 COSTS_N_INSNS (1), /* constant shift costs */
947 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
948 COSTS_N_INSNS (3), /* HI */
949 COSTS_N_INSNS (3), /* SI */
950 COSTS_N_INSNS (3), /* DI */
951 COSTS_N_INSNS (3)}, /* other */
952 0, /* cost of multiply per each bit set */
953 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
954 COSTS_N_INSNS (18), /* HI */
955 COSTS_N_INSNS (18), /* SI */
956 COSTS_N_INSNS (18), /* DI */
957 COSTS_N_INSNS (18)}, /* other */
958 COSTS_N_INSNS (2), /* cost of movsx */
959 COSTS_N_INSNS (2), /* cost of movzx */
960 8, /* "large" insn */
962 3, /* cost for loading QImode using movzbl */
963 {4, 5, 4}, /* cost of loading integer registers
964 in QImode, HImode and SImode.
965 Relative to reg-reg move (2). */
966 {2, 3, 2}, /* cost of storing integer registers */
967 4, /* cost of reg,reg fld/fst */
968 {6, 6, 6}, /* cost of loading fp registers
969 in SFmode, DFmode and XFmode */
970 {4, 4, 4}, /* cost of storing fp registers
971 in SFmode, DFmode and XFmode */
972 2, /* cost of moving MMX register */
973 {2, 2}, /* cost of loading MMX registers
974 in SImode and DImode */
975 {2, 2}, /* cost of storing MMX registers
976 in SImode and DImode */
977 2, /* cost of moving SSE register */
978 {2, 2, 8}, /* cost of loading SSE registers
979 in SImode, DImode and TImode */
980 {2, 2, 8}, /* cost of storing SSE registers
981 in SImode, DImode and TImode */
982 6, /* MMX or SSE register to integer */
983 32, /* size of l1 cache. */
984 32, /* size of l2 cache. Some models
985 have integrated l2 cache, but
986 optimizing for k6 is not important
987 enough to worry about that. */
988 32, /* size of prefetch block */
989 1, /* number of parallel prefetches */
991 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
992 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
993 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
994 COSTS_N_INSNS (2), /* cost of FABS instruction. */
995 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
996 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
997 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
998 DUMMY_STRINGOP_ALGS},
999 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1000 DUMMY_STRINGOP_ALGS},
1001 1, /* scalar_stmt_cost. */
1002 1, /* scalar load_cost. */
1003 1, /* scalar_store_cost. */
1004 1, /* vec_stmt_cost. */
1005 1, /* vec_to_scalar_cost. */
1006 1, /* scalar_to_vec_cost. */
1007 1, /* vec_align_load_cost. */
1008 2, /* vec_unalign_load_cost. */
1009 1, /* vec_store_cost. */
1010 3, /* cond_taken_branch_cost. */
1011 1, /* cond_not_taken_branch_cost. */
1015 struct processor_costs athlon_cost = {
1016 COSTS_N_INSNS (1), /* cost of an add instruction */
1017 COSTS_N_INSNS (2), /* cost of a lea instruction */
1018 COSTS_N_INSNS (1), /* variable shift costs */
1019 COSTS_N_INSNS (1), /* constant shift costs */
1020 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1021 COSTS_N_INSNS (5), /* HI */
1022 COSTS_N_INSNS (5), /* SI */
1023 COSTS_N_INSNS (5), /* DI */
1024 COSTS_N_INSNS (5)}, /* other */
1025 0, /* cost of multiply per each bit set */
1026 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1027 COSTS_N_INSNS (26), /* HI */
1028 COSTS_N_INSNS (42), /* SI */
1029 COSTS_N_INSNS (74), /* DI */
1030 COSTS_N_INSNS (74)}, /* other */
1031 COSTS_N_INSNS (1), /* cost of movsx */
1032 COSTS_N_INSNS (1), /* cost of movzx */
1033 8, /* "large" insn */
1035 4, /* cost for loading QImode using movzbl */
1036 {3, 4, 3}, /* cost of loading integer registers
1037 in QImode, HImode and SImode.
1038 Relative to reg-reg move (2). */
1039 {3, 4, 3}, /* cost of storing integer registers */
1040 4, /* cost of reg,reg fld/fst */
1041 {4, 4, 12}, /* cost of loading fp registers
1042 in SFmode, DFmode and XFmode */
1043 {6, 6, 8}, /* cost of storing fp registers
1044 in SFmode, DFmode and XFmode */
1045 2, /* cost of moving MMX register */
1046 {4, 4}, /* cost of loading MMX registers
1047 in SImode and DImode */
1048 {4, 4}, /* cost of storing MMX registers
1049 in SImode and DImode */
1050 2, /* cost of moving SSE register */
1051 {4, 4, 6}, /* cost of loading SSE registers
1052 in SImode, DImode and TImode */
1053 {4, 4, 5}, /* cost of storing SSE registers
1054 in SImode, DImode and TImode */
1055 5, /* MMX or SSE register to integer */
1056 64, /* size of l1 cache. */
1057 256, /* size of l2 cache. */
1058 64, /* size of prefetch block */
1059 6, /* number of parallel prefetches */
1060 5, /* Branch cost */
1061 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1062 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1063 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1064 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1065 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1066 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1067 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1068 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1069 128 bytes for memset. */
1070 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1071 DUMMY_STRINGOP_ALGS},
1072 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1073 DUMMY_STRINGOP_ALGS},
1074 1, /* scalar_stmt_cost. */
1075 1, /* scalar load_cost. */
1076 1, /* scalar_store_cost. */
1077 1, /* vec_stmt_cost. */
1078 1, /* vec_to_scalar_cost. */
1079 1, /* scalar_to_vec_cost. */
1080 1, /* vec_align_load_cost. */
1081 2, /* vec_unalign_load_cost. */
1082 1, /* vec_store_cost. */
1083 3, /* cond_taken_branch_cost. */
1084 1, /* cond_not_taken_branch_cost. */
1088 struct processor_costs k8_cost = {
1089 COSTS_N_INSNS (1), /* cost of an add instruction */
1090 COSTS_N_INSNS (2), /* cost of a lea instruction */
1091 COSTS_N_INSNS (1), /* variable shift costs */
1092 COSTS_N_INSNS (1), /* constant shift costs */
1093 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1094 COSTS_N_INSNS (4), /* HI */
1095 COSTS_N_INSNS (3), /* SI */
1096 COSTS_N_INSNS (4), /* DI */
1097 COSTS_N_INSNS (5)}, /* other */
1098 0, /* cost of multiply per each bit set */
1099 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1100 COSTS_N_INSNS (26), /* HI */
1101 COSTS_N_INSNS (42), /* SI */
1102 COSTS_N_INSNS (74), /* DI */
1103 COSTS_N_INSNS (74)}, /* other */
1104 COSTS_N_INSNS (1), /* cost of movsx */
1105 COSTS_N_INSNS (1), /* cost of movzx */
1106 8, /* "large" insn */
1108 4, /* cost for loading QImode using movzbl */
1109 {3, 4, 3}, /* cost of loading integer registers
1110 in QImode, HImode and SImode.
1111 Relative to reg-reg move (2). */
1112 {3, 4, 3}, /* cost of storing integer registers */
1113 4, /* cost of reg,reg fld/fst */
1114 {4, 4, 12}, /* cost of loading fp registers
1115 in SFmode, DFmode and XFmode */
1116 {6, 6, 8}, /* cost of storing fp registers
1117 in SFmode, DFmode and XFmode */
1118 2, /* cost of moving MMX register */
1119 {3, 3}, /* cost of loading MMX registers
1120 in SImode and DImode */
1121 {4, 4}, /* cost of storing MMX registers
1122 in SImode and DImode */
1123 2, /* cost of moving SSE register */
1124 {4, 3, 6}, /* cost of loading SSE registers
1125 in SImode, DImode and TImode */
1126 {4, 4, 5}, /* cost of storing SSE registers
1127 in SImode, DImode and TImode */
1128 5, /* MMX or SSE register to integer */
1129 64, /* size of l1 cache. */
1130 512, /* size of l2 cache. */
1131 64, /* size of prefetch block */
1132 /* New AMD processors never drop prefetches; if they cannot be performed
1133 immediately, they are queued. We set number of simultaneous prefetches
1134 to a large constant to reflect this (it probably is not a good idea not
1135 to limit number of prefetches at all, as their execution also takes some
1137 100, /* number of parallel prefetches */
1138 3, /* Branch cost */
1139 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1140 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1141 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1142 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1143 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1144 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1145 /* K8 has optimized REP instruction for medium sized blocks, but for very
1146 small blocks it is better to use loop. For large blocks, libcall can
1147 do nontemporary accesses and beat inline considerably. */
1148 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1149 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1150 {{libcall, {{8, loop}, {24, unrolled_loop},
1151 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1152 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1153 4, /* scalar_stmt_cost. */
1154 2, /* scalar load_cost. */
1155 2, /* scalar_store_cost. */
1156 5, /* vec_stmt_cost. */
1157 0, /* vec_to_scalar_cost. */
1158 2, /* scalar_to_vec_cost. */
1159 2, /* vec_align_load_cost. */
1160 3, /* vec_unalign_load_cost. */
1161 3, /* vec_store_cost. */
1162 3, /* cond_taken_branch_cost. */
1163 2, /* cond_not_taken_branch_cost. */
1166 struct processor_costs amdfam10_cost = {
1167 COSTS_N_INSNS (1), /* cost of an add instruction */
1168 COSTS_N_INSNS (2), /* cost of a lea instruction */
1169 COSTS_N_INSNS (1), /* variable shift costs */
1170 COSTS_N_INSNS (1), /* constant shift costs */
1171 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1172 COSTS_N_INSNS (4), /* HI */
1173 COSTS_N_INSNS (3), /* SI */
1174 COSTS_N_INSNS (4), /* DI */
1175 COSTS_N_INSNS (5)}, /* other */
1176 0, /* cost of multiply per each bit set */
1177 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1178 COSTS_N_INSNS (35), /* HI */
1179 COSTS_N_INSNS (51), /* SI */
1180 COSTS_N_INSNS (83), /* DI */
1181 COSTS_N_INSNS (83)}, /* other */
1182 COSTS_N_INSNS (1), /* cost of movsx */
1183 COSTS_N_INSNS (1), /* cost of movzx */
1184 8, /* "large" insn */
1186 4, /* cost for loading QImode using movzbl */
1187 {3, 4, 3}, /* cost of loading integer registers
1188 in QImode, HImode and SImode.
1189 Relative to reg-reg move (2). */
1190 {3, 4, 3}, /* cost of storing integer registers */
1191 4, /* cost of reg,reg fld/fst */
1192 {4, 4, 12}, /* cost of loading fp registers
1193 in SFmode, DFmode and XFmode */
1194 {6, 6, 8}, /* cost of storing fp registers
1195 in SFmode, DFmode and XFmode */
1196 2, /* cost of moving MMX register */
1197 {3, 3}, /* cost of loading MMX registers
1198 in SImode and DImode */
1199 {4, 4}, /* cost of storing MMX registers
1200 in SImode and DImode */
1201 2, /* cost of moving SSE register */
1202 {4, 4, 3}, /* cost of loading SSE registers
1203 in SImode, DImode and TImode */
1204 {4, 4, 5}, /* cost of storing SSE registers
1205 in SImode, DImode and TImode */
1206 3, /* MMX or SSE register to integer */
1208 MOVD reg64, xmmreg Double FSTORE 4
1209 MOVD reg32, xmmreg Double FSTORE 4
1211 MOVD reg64, xmmreg Double FADD 3
1213 MOVD reg32, xmmreg Double FADD 3
1215 64, /* size of l1 cache. */
1216 512, /* size of l2 cache. */
1217 64, /* size of prefetch block */
1218 /* New AMD processors never drop prefetches; if they cannot be performed
1219 immediately, they are queued. We set number of simultaneous prefetches
1220 to a large constant to reflect this (it probably is not a good idea not
1221 to limit number of prefetches at all, as their execution also takes some
1223 100, /* number of parallel prefetches */
1224 2, /* Branch cost */
1225 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1226 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1227 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1228 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1229 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1230 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1232 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1233 very small blocks it is better to use loop. For large blocks, libcall can
1234 do nontemporary accesses and beat inline considerably. */
1235 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1236 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1237 {{libcall, {{8, loop}, {24, unrolled_loop},
1238 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1239 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1240 4, /* scalar_stmt_cost. */
1241 2, /* scalar load_cost. */
1242 2, /* scalar_store_cost. */
1243 6, /* vec_stmt_cost. */
1244 0, /* vec_to_scalar_cost. */
1245 2, /* scalar_to_vec_cost. */
1246 2, /* vec_align_load_cost. */
1247 2, /* vec_unalign_load_cost. */
1248 2, /* vec_store_cost. */
1249 2, /* cond_taken_branch_cost. */
1250 1, /* cond_not_taken_branch_cost. */
1253 struct processor_costs bdver1_cost = {
1254 COSTS_N_INSNS (1), /* cost of an add instruction */
1255 COSTS_N_INSNS (1), /* cost of a lea instruction */
1256 COSTS_N_INSNS (1), /* variable shift costs */
1257 COSTS_N_INSNS (1), /* constant shift costs */
1258 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1259 COSTS_N_INSNS (4), /* HI */
1260 COSTS_N_INSNS (4), /* SI */
1261 COSTS_N_INSNS (6), /* DI */
1262 COSTS_N_INSNS (6)}, /* other */
1263 0, /* cost of multiply per each bit set */
1264 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1265 COSTS_N_INSNS (35), /* HI */
1266 COSTS_N_INSNS (51), /* SI */
1267 COSTS_N_INSNS (83), /* DI */
1268 COSTS_N_INSNS (83)}, /* other */
1269 COSTS_N_INSNS (1), /* cost of movsx */
1270 COSTS_N_INSNS (1), /* cost of movzx */
1271 8, /* "large" insn */
1273 4, /* cost for loading QImode using movzbl */
1274 {5, 5, 4}, /* cost of loading integer registers
1275 in QImode, HImode and SImode.
1276 Relative to reg-reg move (2). */
1277 {4, 4, 4}, /* cost of storing integer registers */
1278 2, /* cost of reg,reg fld/fst */
1279 {5, 5, 12}, /* cost of loading fp registers
1280 in SFmode, DFmode and XFmode */
1281 {4, 4, 8}, /* cost of storing fp registers
1282 in SFmode, DFmode and XFmode */
1283 2, /* cost of moving MMX register */
1284 {4, 4}, /* cost of loading MMX registers
1285 in SImode and DImode */
1286 {4, 4}, /* cost of storing MMX registers
1287 in SImode and DImode */
1288 2, /* cost of moving SSE register */
1289 {4, 4, 4}, /* cost of loading SSE registers
1290 in SImode, DImode and TImode */
1291 {4, 4, 4}, /* cost of storing SSE registers
1292 in SImode, DImode and TImode */
1293 2, /* MMX or SSE register to integer */
1295 MOVD reg64, xmmreg Double FSTORE 4
1296 MOVD reg32, xmmreg Double FSTORE 4
1298 MOVD reg64, xmmreg Double FADD 3
1300 MOVD reg32, xmmreg Double FADD 3
1302 16, /* size of l1 cache. */
1303 2048, /* size of l2 cache. */
1304 64, /* size of prefetch block */
1305 /* New AMD processors never drop prefetches; if they cannot be performed
1306 immediately, they are queued. We set number of simultaneous prefetches
1307 to a large constant to reflect this (it probably is not a good idea not
1308 to limit number of prefetches at all, as their execution also takes some
1310 100, /* number of parallel prefetches */
1311 2, /* Branch cost */
1312 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1313 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1314 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1315 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1316 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1317 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1319 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1320 very small blocks it is better to use loop. For large blocks, libcall
1321 can do nontemporary accesses and beat inline considerably. */
1322 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1323 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1324 {{libcall, {{8, loop}, {24, unrolled_loop},
1325 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1326 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1327 6, /* scalar_stmt_cost. */
1328 4, /* scalar load_cost. */
1329 4, /* scalar_store_cost. */
1330 6, /* vec_stmt_cost. */
1331 0, /* vec_to_scalar_cost. */
1332 2, /* scalar_to_vec_cost. */
1333 4, /* vec_align_load_cost. */
1334 4, /* vec_unalign_load_cost. */
1335 4, /* vec_store_cost. */
1336 2, /* cond_taken_branch_cost. */
1337 1, /* cond_not_taken_branch_cost. */
1340 struct processor_costs btver1_cost = {
1341 COSTS_N_INSNS (1), /* cost of an add instruction */
1342 COSTS_N_INSNS (2), /* cost of a lea instruction */
1343 COSTS_N_INSNS (1), /* variable shift costs */
1344 COSTS_N_INSNS (1), /* constant shift costs */
1345 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1346 COSTS_N_INSNS (4), /* HI */
1347 COSTS_N_INSNS (3), /* SI */
1348 COSTS_N_INSNS (4), /* DI */
1349 COSTS_N_INSNS (5)}, /* other */
1350 0, /* cost of multiply per each bit set */
1351 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1352 COSTS_N_INSNS (35), /* HI */
1353 COSTS_N_INSNS (51), /* SI */
1354 COSTS_N_INSNS (83), /* DI */
1355 COSTS_N_INSNS (83)}, /* other */
1356 COSTS_N_INSNS (1), /* cost of movsx */
1357 COSTS_N_INSNS (1), /* cost of movzx */
1358 8, /* "large" insn */
1360 4, /* cost for loading QImode using movzbl */
1361 {3, 4, 3}, /* cost of loading integer registers
1362 in QImode, HImode and SImode.
1363 Relative to reg-reg move (2). */
1364 {3, 4, 3}, /* cost of storing integer registers */
1365 4, /* cost of reg,reg fld/fst */
1366 {4, 4, 12}, /* cost of loading fp registers
1367 in SFmode, DFmode and XFmode */
1368 {6, 6, 8}, /* cost of storing fp registers
1369 in SFmode, DFmode and XFmode */
1370 2, /* cost of moving MMX register */
1371 {3, 3}, /* cost of loading MMX registers
1372 in SImode and DImode */
1373 {4, 4}, /* cost of storing MMX registers
1374 in SImode and DImode */
1375 2, /* cost of moving SSE register */
1376 {4, 4, 3}, /* cost of loading SSE registers
1377 in SImode, DImode and TImode */
1378 {4, 4, 5}, /* cost of storing SSE registers
1379 in SImode, DImode and TImode */
1380 3, /* MMX or SSE register to integer */
1382 MOVD reg64, xmmreg Double FSTORE 4
1383 MOVD reg32, xmmreg Double FSTORE 4
1385 MOVD reg64, xmmreg Double FADD 3
1387 MOVD reg32, xmmreg Double FADD 3
1389 32, /* size of l1 cache. */
1390 512, /* size of l2 cache. */
1391 64, /* size of prefetch block */
1392 100, /* number of parallel prefetches */
1393 2, /* Branch cost */
1394 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1395 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1396 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1397 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1398 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1399 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1401 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1402 very small blocks it is better to use loop. For large blocks, libcall can
1403 do nontemporary accesses and beat inline considerably. */
1404 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1405 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1406 {{libcall, {{8, loop}, {24, unrolled_loop},
1407 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1408 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1409 4, /* scalar_stmt_cost. */
1410 2, /* scalar load_cost. */
1411 2, /* scalar_store_cost. */
1412 6, /* vec_stmt_cost. */
1413 0, /* vec_to_scalar_cost. */
1414 2, /* scalar_to_vec_cost. */
1415 2, /* vec_align_load_cost. */
1416 2, /* vec_unalign_load_cost. */
1417 2, /* vec_store_cost. */
1418 2, /* cond_taken_branch_cost. */
1419 1, /* cond_not_taken_branch_cost. */
1423 struct processor_costs pentium4_cost = {
1424 COSTS_N_INSNS (1), /* cost of an add instruction */
1425 COSTS_N_INSNS (3), /* cost of a lea instruction */
1426 COSTS_N_INSNS (4), /* variable shift costs */
1427 COSTS_N_INSNS (4), /* constant shift costs */
1428 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1429 COSTS_N_INSNS (15), /* HI */
1430 COSTS_N_INSNS (15), /* SI */
1431 COSTS_N_INSNS (15), /* DI */
1432 COSTS_N_INSNS (15)}, /* other */
1433 0, /* cost of multiply per each bit set */
1434 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1435 COSTS_N_INSNS (56), /* HI */
1436 COSTS_N_INSNS (56), /* SI */
1437 COSTS_N_INSNS (56), /* DI */
1438 COSTS_N_INSNS (56)}, /* other */
1439 COSTS_N_INSNS (1), /* cost of movsx */
1440 COSTS_N_INSNS (1), /* cost of movzx */
1441 16, /* "large" insn */
1443 2, /* cost for loading QImode using movzbl */
1444 {4, 5, 4}, /* cost of loading integer registers
1445 in QImode, HImode and SImode.
1446 Relative to reg-reg move (2). */
1447 {2, 3, 2}, /* cost of storing integer registers */
1448 2, /* cost of reg,reg fld/fst */
1449 {2, 2, 6}, /* cost of loading fp registers
1450 in SFmode, DFmode and XFmode */
1451 {4, 4, 6}, /* cost of storing fp registers
1452 in SFmode, DFmode and XFmode */
1453 2, /* cost of moving MMX register */
1454 {2, 2}, /* cost of loading MMX registers
1455 in SImode and DImode */
1456 {2, 2}, /* cost of storing MMX registers
1457 in SImode and DImode */
1458 12, /* cost of moving SSE register */
1459 {12, 12, 12}, /* cost of loading SSE registers
1460 in SImode, DImode and TImode */
1461 {2, 2, 8}, /* cost of storing SSE registers
1462 in SImode, DImode and TImode */
1463 10, /* MMX or SSE register to integer */
1464 8, /* size of l1 cache. */
1465 256, /* size of l2 cache. */
1466 64, /* size of prefetch block */
1467 6, /* number of parallel prefetches */
1468 2, /* Branch cost */
1469 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1470 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1471 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1474 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1475 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1476 DUMMY_STRINGOP_ALGS},
1477 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1479 DUMMY_STRINGOP_ALGS},
1480 1, /* scalar_stmt_cost. */
1481 1, /* scalar load_cost. */
1482 1, /* scalar_store_cost. */
1483 1, /* vec_stmt_cost. */
1484 1, /* vec_to_scalar_cost. */
1485 1, /* scalar_to_vec_cost. */
1486 1, /* vec_align_load_cost. */
1487 2, /* vec_unalign_load_cost. */
1488 1, /* vec_store_cost. */
1489 3, /* cond_taken_branch_cost. */
1490 1, /* cond_not_taken_branch_cost. */
1494 struct processor_costs nocona_cost = {
1495 COSTS_N_INSNS (1), /* cost of an add instruction */
1496 COSTS_N_INSNS (1), /* cost of a lea instruction */
1497 COSTS_N_INSNS (1), /* variable shift costs */
1498 COSTS_N_INSNS (1), /* constant shift costs */
1499 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1500 COSTS_N_INSNS (10), /* HI */
1501 COSTS_N_INSNS (10), /* SI */
1502 COSTS_N_INSNS (10), /* DI */
1503 COSTS_N_INSNS (10)}, /* other */
1504 0, /* cost of multiply per each bit set */
1505 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1506 COSTS_N_INSNS (66), /* HI */
1507 COSTS_N_INSNS (66), /* SI */
1508 COSTS_N_INSNS (66), /* DI */
1509 COSTS_N_INSNS (66)}, /* other */
1510 COSTS_N_INSNS (1), /* cost of movsx */
1511 COSTS_N_INSNS (1), /* cost of movzx */
1512 16, /* "large" insn */
1513 17, /* MOVE_RATIO */
1514 4, /* cost for loading QImode using movzbl */
1515 {4, 4, 4}, /* cost of loading integer registers
1516 in QImode, HImode and SImode.
1517 Relative to reg-reg move (2). */
1518 {4, 4, 4}, /* cost of storing integer registers */
1519 3, /* cost of reg,reg fld/fst */
1520 {12, 12, 12}, /* cost of loading fp registers
1521 in SFmode, DFmode and XFmode */
1522 {4, 4, 4}, /* cost of storing fp registers
1523 in SFmode, DFmode and XFmode */
1524 6, /* cost of moving MMX register */
1525 {12, 12}, /* cost of loading MMX registers
1526 in SImode and DImode */
1527 {12, 12}, /* cost of storing MMX registers
1528 in SImode and DImode */
1529 6, /* cost of moving SSE register */
1530 {12, 12, 12}, /* cost of loading SSE registers
1531 in SImode, DImode and TImode */
1532 {12, 12, 12}, /* cost of storing SSE registers
1533 in SImode, DImode and TImode */
1534 8, /* MMX or SSE register to integer */
1535 8, /* size of l1 cache. */
1536 1024, /* size of l2 cache. */
1537 128, /* size of prefetch block */
1538 8, /* number of parallel prefetches */
1539 1, /* Branch cost */
1540 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1541 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1542 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1543 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1544 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1545 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1546 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1547 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1548 {100000, unrolled_loop}, {-1, libcall}}}},
1549 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1551 {libcall, {{24, loop}, {64, unrolled_loop},
1552 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1553 1, /* scalar_stmt_cost. */
1554 1, /* scalar load_cost. */
1555 1, /* scalar_store_cost. */
1556 1, /* vec_stmt_cost. */
1557 1, /* vec_to_scalar_cost. */
1558 1, /* scalar_to_vec_cost. */
1559 1, /* vec_align_load_cost. */
1560 2, /* vec_unalign_load_cost. */
1561 1, /* vec_store_cost. */
1562 3, /* cond_taken_branch_cost. */
1563 1, /* cond_not_taken_branch_cost. */
1567 struct processor_costs atom_cost = {
1568 COSTS_N_INSNS (1), /* cost of an add instruction */
1569 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1570 COSTS_N_INSNS (1), /* variable shift costs */
1571 COSTS_N_INSNS (1), /* constant shift costs */
1572 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1573 COSTS_N_INSNS (4), /* HI */
1574 COSTS_N_INSNS (3), /* SI */
1575 COSTS_N_INSNS (4), /* DI */
1576 COSTS_N_INSNS (2)}, /* other */
1577 0, /* cost of multiply per each bit set */
1578 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1579 COSTS_N_INSNS (26), /* HI */
1580 COSTS_N_INSNS (42), /* SI */
1581 COSTS_N_INSNS (74), /* DI */
1582 COSTS_N_INSNS (74)}, /* other */
1583 COSTS_N_INSNS (1), /* cost of movsx */
1584 COSTS_N_INSNS (1), /* cost of movzx */
1585 8, /* "large" insn */
1586 17, /* MOVE_RATIO */
1587 2, /* cost for loading QImode using movzbl */
1588 {4, 4, 4}, /* cost of loading integer registers
1589 in QImode, HImode and SImode.
1590 Relative to reg-reg move (2). */
1591 {4, 4, 4}, /* cost of storing integer registers */
1592 4, /* cost of reg,reg fld/fst */
1593 {12, 12, 12}, /* cost of loading fp registers
1594 in SFmode, DFmode and XFmode */
1595 {6, 6, 8}, /* cost of storing fp registers
1596 in SFmode, DFmode and XFmode */
1597 2, /* cost of moving MMX register */
1598 {8, 8}, /* cost of loading MMX registers
1599 in SImode and DImode */
1600 {8, 8}, /* cost of storing MMX registers
1601 in SImode and DImode */
1602 2, /* cost of moving SSE register */
1603 {8, 8, 8}, /* cost of loading SSE registers
1604 in SImode, DImode and TImode */
1605 {8, 8, 8}, /* cost of storing SSE registers
1606 in SImode, DImode and TImode */
1607 5, /* MMX or SSE register to integer */
1608 32, /* size of l1 cache. */
1609 256, /* size of l2 cache. */
1610 64, /* size of prefetch block */
1611 6, /* number of parallel prefetches */
1612 3, /* Branch cost */
1613 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1614 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1615 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1616 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1617 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1618 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1619 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1620 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1621 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1622 {{libcall, {{8, loop}, {15, unrolled_loop},
1623 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1624 {libcall, {{24, loop}, {32, unrolled_loop},
1625 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1626 1, /* scalar_stmt_cost. */
1627 1, /* scalar load_cost. */
1628 1, /* scalar_store_cost. */
1629 1, /* vec_stmt_cost. */
1630 1, /* vec_to_scalar_cost. */
1631 1, /* scalar_to_vec_cost. */
1632 1, /* vec_align_load_cost. */
1633 2, /* vec_unalign_load_cost. */
1634 1, /* vec_store_cost. */
1635 3, /* cond_taken_branch_cost. */
1636 1, /* cond_not_taken_branch_cost. */
1639 /* Generic64 should produce code tuned for Nocona and K8. */
1641 struct processor_costs generic64_cost = {
1642 COSTS_N_INSNS (1), /* cost of an add instruction */
1643 /* On all chips taken into consideration lea is 2 cycles and more. With
1644 this cost however our current implementation of synth_mult results in
1645 use of unnecessary temporary registers causing regression on several
1646 SPECfp benchmarks. */
1647 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1648 COSTS_N_INSNS (1), /* variable shift costs */
1649 COSTS_N_INSNS (1), /* constant shift costs */
1650 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1651 COSTS_N_INSNS (4), /* HI */
1652 COSTS_N_INSNS (3), /* SI */
1653 COSTS_N_INSNS (4), /* DI */
1654 COSTS_N_INSNS (2)}, /* other */
1655 0, /* cost of multiply per each bit set */
1656 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1657 COSTS_N_INSNS (26), /* HI */
1658 COSTS_N_INSNS (42), /* SI */
1659 COSTS_N_INSNS (74), /* DI */
1660 COSTS_N_INSNS (74)}, /* other */
1661 COSTS_N_INSNS (1), /* cost of movsx */
1662 COSTS_N_INSNS (1), /* cost of movzx */
1663 8, /* "large" insn */
1664 17, /* MOVE_RATIO */
1665 4, /* cost for loading QImode using movzbl */
1666 {4, 4, 4}, /* cost of loading integer registers
1667 in QImode, HImode and SImode.
1668 Relative to reg-reg move (2). */
1669 {4, 4, 4}, /* cost of storing integer registers */
1670 4, /* cost of reg,reg fld/fst */
1671 {12, 12, 12}, /* cost of loading fp registers
1672 in SFmode, DFmode and XFmode */
1673 {6, 6, 8}, /* cost of storing fp registers
1674 in SFmode, DFmode and XFmode */
1675 2, /* cost of moving MMX register */
1676 {8, 8}, /* cost of loading MMX registers
1677 in SImode and DImode */
1678 {8, 8}, /* cost of storing MMX registers
1679 in SImode and DImode */
1680 2, /* cost of moving SSE register */
1681 {8, 8, 8}, /* cost of loading SSE registers
1682 in SImode, DImode and TImode */
1683 {8, 8, 8}, /* cost of storing SSE registers
1684 in SImode, DImode and TImode */
1685 5, /* MMX or SSE register to integer */
1686 32, /* size of l1 cache. */
1687 512, /* size of l2 cache. */
1688 64, /* size of prefetch block */
1689 6, /* number of parallel prefetches */
1690 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1691 value is increased to perhaps more appropriate value of 5. */
1692 3, /* Branch cost */
1693 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1694 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1695 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1696 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1697 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1698 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1699 {DUMMY_STRINGOP_ALGS,
1700 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1701 {DUMMY_STRINGOP_ALGS,
1702 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1703 1, /* scalar_stmt_cost. */
1704 1, /* scalar load_cost. */
1705 1, /* scalar_store_cost. */
1706 1, /* vec_stmt_cost. */
1707 1, /* vec_to_scalar_cost. */
1708 1, /* scalar_to_vec_cost. */
1709 1, /* vec_align_load_cost. */
1710 2, /* vec_unalign_load_cost. */
1711 1, /* vec_store_cost. */
1712 3, /* cond_taken_branch_cost. */
1713 1, /* cond_not_taken_branch_cost. */
1716 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1719 struct processor_costs generic32_cost = {
1720 COSTS_N_INSNS (1), /* cost of an add instruction */
1721 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1722 COSTS_N_INSNS (1), /* variable shift costs */
1723 COSTS_N_INSNS (1), /* constant shift costs */
1724 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1725 COSTS_N_INSNS (4), /* HI */
1726 COSTS_N_INSNS (3), /* SI */
1727 COSTS_N_INSNS (4), /* DI */
1728 COSTS_N_INSNS (2)}, /* other */
1729 0, /* cost of multiply per each bit set */
1730 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1731 COSTS_N_INSNS (26), /* HI */
1732 COSTS_N_INSNS (42), /* SI */
1733 COSTS_N_INSNS (74), /* DI */
1734 COSTS_N_INSNS (74)}, /* other */
1735 COSTS_N_INSNS (1), /* cost of movsx */
1736 COSTS_N_INSNS (1), /* cost of movzx */
1737 8, /* "large" insn */
1738 17, /* MOVE_RATIO */
1739 4, /* cost for loading QImode using movzbl */
1740 {4, 4, 4}, /* cost of loading integer registers
1741 in QImode, HImode and SImode.
1742 Relative to reg-reg move (2). */
1743 {4, 4, 4}, /* cost of storing integer registers */
1744 4, /* cost of reg,reg fld/fst */
1745 {12, 12, 12}, /* cost of loading fp registers
1746 in SFmode, DFmode and XFmode */
1747 {6, 6, 8}, /* cost of storing fp registers
1748 in SFmode, DFmode and XFmode */
1749 2, /* cost of moving MMX register */
1750 {8, 8}, /* cost of loading MMX registers
1751 in SImode and DImode */
1752 {8, 8}, /* cost of storing MMX registers
1753 in SImode and DImode */
1754 2, /* cost of moving SSE register */
1755 {8, 8, 8}, /* cost of loading SSE registers
1756 in SImode, DImode and TImode */
1757 {8, 8, 8}, /* cost of storing SSE registers
1758 in SImode, DImode and TImode */
1759 5, /* MMX or SSE register to integer */
1760 32, /* size of l1 cache. */
1761 256, /* size of l2 cache. */
1762 64, /* size of prefetch block */
1763 6, /* number of parallel prefetches */
1764 3, /* Branch cost */
1765 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1766 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1767 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1768 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1769 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1770 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1771 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1772 DUMMY_STRINGOP_ALGS},
1773 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1774 DUMMY_STRINGOP_ALGS},
1775 1, /* scalar_stmt_cost. */
1776 1, /* scalar load_cost. */
1777 1, /* scalar_store_cost. */
1778 1, /* vec_stmt_cost. */
1779 1, /* vec_to_scalar_cost. */
1780 1, /* scalar_to_vec_cost. */
1781 1, /* vec_align_load_cost. */
1782 2, /* vec_unalign_load_cost. */
1783 1, /* vec_store_cost. */
1784 3, /* cond_taken_branch_cost. */
1785 1, /* cond_not_taken_branch_cost. */
1788 const struct processor_costs *ix86_cost = &pentium_cost;
1790 /* Processor feature/optimization bitmasks. */
1791 #define m_386 (1<<PROCESSOR_I386)
1792 #define m_486 (1<<PROCESSOR_I486)
1793 #define m_PENT (1<<PROCESSOR_PENTIUM)
1794 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1795 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1796 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1797 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1798 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1799 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1800 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1801 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1802 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1803 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1804 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1805 #define m_ATOM (1<<PROCESSOR_ATOM)
1807 #define m_GEODE (1<<PROCESSOR_GEODE)
1808 #define m_K6 (1<<PROCESSOR_K6)
1809 #define m_K6_GEODE (m_K6 | m_GEODE)
1810 #define m_K8 (1<<PROCESSOR_K8)
1811 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1812 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1813 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1814 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1815 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1816 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1 | m_BTVER1)
1818 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1819 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1821 /* Generic instruction choice should be common subset of supported CPUs
1822 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1823 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1825 /* Feature tests against the various tunings. */
1826 unsigned char ix86_tune_features[X86_TUNE_LAST];
1828 /* Feature tests against the various tunings used to create ix86_tune_features
1829 based on the processor mask. */
1830 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1831 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1832 negatively, so enabling for Generic64 seems like good code size
1833 tradeoff. We can't enable it for 32bit generic because it does not
1834 work well with PPro base chips. */
1835 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2I7_64 | m_GENERIC64,
1837 /* X86_TUNE_PUSH_MEMORY */
1838 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1839 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1841 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1844 /* X86_TUNE_UNROLL_STRLEN */
1845 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1846 | m_CORE2I7 | m_GENERIC,
1848 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1849 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1850 | m_CORE2I7 | m_GENERIC,
1852 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1853 on simulation result. But after P4 was made, no performance benefit
1854 was observed with branch hints. It also increases the code size.
1855 As a result, icc never generates branch hints. */
1858 /* X86_TUNE_DOUBLE_WITH_ADD */
1861 /* X86_TUNE_USE_SAHF */
1862 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_BTVER1
1863 | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1865 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1866 partial dependencies. */
1867 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1868 | m_CORE2I7 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1870 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1871 register stalls on Generic32 compilation setting as well. However
1872 in current implementation the partial register stalls are not eliminated
1873 very well - they can be introduced via subregs synthesized by combine
1874 and can happen in caller/callee saving sequences. Because this option
1875 pays back little on PPro based chips and is in conflict with partial reg
1876 dependencies used by Athlon/P4 based chips, it is better to leave it off
1877 for generic32 for now. */
1880 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1881 m_CORE2I7 | m_GENERIC,
1883 /* X86_TUNE_USE_HIMODE_FIOP */
1884 m_386 | m_486 | m_K6_GEODE,
1886 /* X86_TUNE_USE_SIMODE_FIOP */
1887 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2I7 | m_GENERIC),
1889 /* X86_TUNE_USE_MOV0 */
1892 /* X86_TUNE_USE_CLTD */
1893 ~(m_PENT | m_ATOM | m_K6 | m_CORE2I7 | m_GENERIC),
1895 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1898 /* X86_TUNE_SPLIT_LONG_MOVES */
1901 /* X86_TUNE_READ_MODIFY_WRITE */
1904 /* X86_TUNE_READ_MODIFY */
1907 /* X86_TUNE_PROMOTE_QIMODE */
1908 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1909 | m_CORE2I7 | m_GENERIC /* | m_PENT4 ? */,
1911 /* X86_TUNE_FAST_PREFIX */
1912 ~(m_PENT | m_486 | m_386),
1914 /* X86_TUNE_SINGLE_STRINGOP */
1915 m_386 | m_PENT4 | m_NOCONA,
1917 /* X86_TUNE_QIMODE_MATH */
1920 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1921 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1922 might be considered for Generic32 if our scheme for avoiding partial
1923 stalls was more effective. */
1926 /* X86_TUNE_PROMOTE_QI_REGS */
1929 /* X86_TUNE_PROMOTE_HI_REGS */
1932 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1933 over esp addition. */
1934 m_386 | m_486 | m_PENT | m_PPRO,
1936 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1937 over esp addition. */
1940 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1941 over esp subtraction. */
1942 m_386 | m_486 | m_PENT | m_K6_GEODE,
1944 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1945 over esp subtraction. */
1946 m_PENT | m_K6_GEODE,
1948 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1949 for DFmode copies */
1950 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
1951 | m_GENERIC | m_GEODE),
1953 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1954 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1956 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1957 conflict here in between PPro/Pentium4 based chips that thread 128bit
1958 SSE registers as single units versus K8 based chips that divide SSE
1959 registers to two 64bit halves. This knob promotes all store destinations
1960 to be 128bit to allow register renaming on 128bit SSE units, but usually
1961 results in one extra microop on 64bit SSE units. Experimental results
1962 shows that disabling this option on P4 brings over 20% SPECfp regression,
1963 while enabling it on K8 brings roughly 2.4% regression that can be partly
1964 masked by careful scheduling of moves. */
1965 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 | m_GENERIC
1966 | m_AMDFAM10 | m_BDVER1,
1968 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1969 m_AMDFAM10 | m_BDVER1 | m_BTVER1 | m_COREI7,
1971 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1972 m_BDVER1 | m_COREI7,
1974 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1977 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1978 are resolved on SSE register parts instead of whole registers, so we may
1979 maintain just lower part of scalar values in proper format leaving the
1980 upper part undefined. */
1983 /* X86_TUNE_SSE_TYPELESS_STORES */
1986 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1987 m_PPRO | m_PENT4 | m_NOCONA,
1989 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1990 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1992 /* X86_TUNE_PROLOGUE_USING_MOVE */
1993 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1995 /* X86_TUNE_EPILOGUE_USING_MOVE */
1996 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1998 /* X86_TUNE_SHIFT1 */
2001 /* X86_TUNE_USE_FFREEP */
2004 /* X86_TUNE_INTER_UNIT_MOVES */
2005 ~(m_AMD_MULTIPLE | m_GENERIC),
2007 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2008 ~(m_AMDFAM10 | m_BDVER1),
2010 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2011 than 4 branch instructions in the 16 byte window. */
2012 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2I7
2015 /* X86_TUNE_SCHEDULE */
2016 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2I7
2019 /* X86_TUNE_USE_BT */
2020 m_AMD_MULTIPLE | m_ATOM | m_CORE2I7 | m_GENERIC,
2022 /* X86_TUNE_USE_INCDEC */
2023 ~(m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC | m_ATOM),
2025 /* X86_TUNE_PAD_RETURNS */
2026 m_AMD_MULTIPLE | m_CORE2I7 | m_GENERIC,
2028 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2031 /* X86_TUNE_EXT_80387_CONSTANTS */
2032 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
2033 | m_CORE2I7 | m_GENERIC,
2035 /* X86_TUNE_SHORTEN_X87_SSE */
2038 /* X86_TUNE_AVOID_VECTOR_DECODE */
2039 m_K8 | m_CORE2I7_64 | m_GENERIC64,
2041 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2042 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2045 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2046 vector path on AMD machines. */
2047 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2049 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2051 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2053 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2057 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2058 but one byte longer. */
2061 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2062 operand that cannot be represented using a modRM byte. The XOR
2063 replacement is long decoded, so this split helps here as well. */
2066 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2068 m_AMDFAM10 | m_CORE2I7 | m_GENERIC,
2070 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2071 from integer to FP. */
2074 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2075 with a subsequent conditional jump instruction into a single
2076 compare-and-branch uop. */
2079 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2080 will impact LEA instruction selection. */
2083 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2088 /* Feature tests against the various architecture variations. */
2089 unsigned char ix86_arch_features[X86_ARCH_LAST];
2091 /* Feature tests against the various architecture variations, used to create
2092 ix86_arch_features based on the processor mask. */
2093 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2094 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2095 ~(m_386 | m_486 | m_PENT | m_K6),
2097 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2100 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2103 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2106 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2110 static const unsigned int x86_accumulate_outgoing_args
2111 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
2114 static const unsigned int x86_arch_always_fancy_math_387
2115 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
2116 | m_NOCONA | m_CORE2I7 | m_GENERIC;
2118 static enum stringop_alg stringop_alg = no_stringop;
2120 /* In case the average insn count for single function invocation is
2121 lower than this constant, emit fast (but longer) prologue and
2123 #define FAST_PROLOGUE_INSN_COUNT 20
2125 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2126 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2127 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2128 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2130 /* Array of the smallest class containing reg number REGNO, indexed by
2131 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2133 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2135 /* ax, dx, cx, bx */
2136 AREG, DREG, CREG, BREG,
2137 /* si, di, bp, sp */
2138 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2140 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2141 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2144 /* flags, fpsr, fpcr, frame */
2145 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2147 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2150 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2153 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2154 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2155 /* SSE REX registers */
2156 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2160 /* The "default" register map used in 32bit mode. */
2162 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2164 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2165 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2166 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2167 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2168 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2169 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2170 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2173 /* The "default" register map used in 64bit mode. */
2175 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2177 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2178 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2179 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2180 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2181 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2182 8,9,10,11,12,13,14,15, /* extended integer registers */
2183 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2186 /* Define the register numbers to be used in Dwarf debugging information.
2187 The SVR4 reference port C compiler uses the following register numbers
2188 in its Dwarf output code:
2189 0 for %eax (gcc regno = 0)
2190 1 for %ecx (gcc regno = 2)
2191 2 for %edx (gcc regno = 1)
2192 3 for %ebx (gcc regno = 3)
2193 4 for %esp (gcc regno = 7)
2194 5 for %ebp (gcc regno = 6)
2195 6 for %esi (gcc regno = 4)
2196 7 for %edi (gcc regno = 5)
2197 The following three DWARF register numbers are never generated by
2198 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2199 believes these numbers have these meanings.
2200 8 for %eip (no gcc equivalent)
2201 9 for %eflags (gcc regno = 17)
2202 10 for %trapno (no gcc equivalent)
2203 It is not at all clear how we should number the FP stack registers
2204 for the x86 architecture. If the version of SDB on x86/svr4 were
2205 a bit less brain dead with respect to floating-point then we would
2206 have a precedent to follow with respect to DWARF register numbers
2207 for x86 FP registers, but the SDB on x86/svr4 is so completely
2208 broken with respect to FP registers that it is hardly worth thinking
2209 of it as something to strive for compatibility with.
2210 The version of x86/svr4 SDB I have at the moment does (partially)
2211 seem to believe that DWARF register number 11 is associated with
2212 the x86 register %st(0), but that's about all. Higher DWARF
2213 register numbers don't seem to be associated with anything in
2214 particular, and even for DWARF regno 11, SDB only seems to under-
2215 stand that it should say that a variable lives in %st(0) (when
2216 asked via an `=' command) if we said it was in DWARF regno 11,
2217 but SDB still prints garbage when asked for the value of the
2218 variable in question (via a `/' command).
2219 (Also note that the labels SDB prints for various FP stack regs
2220 when doing an `x' command are all wrong.)
2221 Note that these problems generally don't affect the native SVR4
2222 C compiler because it doesn't allow the use of -O with -g and
2223 because when it is *not* optimizing, it allocates a memory
2224 location for each floating-point variable, and the memory
2225 location is what gets described in the DWARF AT_location
2226 attribute for the variable in question.
2227 Regardless of the severe mental illness of the x86/svr4 SDB, we
2228 do something sensible here and we use the following DWARF
2229 register numbers. Note that these are all stack-top-relative
2231 11 for %st(0) (gcc regno = 8)
2232 12 for %st(1) (gcc regno = 9)
2233 13 for %st(2) (gcc regno = 10)
2234 14 for %st(3) (gcc regno = 11)
2235 15 for %st(4) (gcc regno = 12)
2236 16 for %st(5) (gcc regno = 13)
2237 17 for %st(6) (gcc regno = 14)
2238 18 for %st(7) (gcc regno = 15)
2240 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2242 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2243 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2244 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2245 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2246 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2247 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2248 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2251 /* Define parameter passing and return registers. */
2253 static int const x86_64_int_parameter_registers[6] =
2255 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2258 static int const x86_64_ms_abi_int_parameter_registers[4] =
2260 CX_REG, DX_REG, R8_REG, R9_REG
2263 static int const x86_64_int_return_registers[4] =
2265 AX_REG, DX_REG, DI_REG, SI_REG
2268 /* Define the structure for the machine field in struct function. */
2270 struct GTY(()) stack_local_entry {
2271 unsigned short mode;
2274 struct stack_local_entry *next;
2277 /* Structure describing stack frame layout.
2278 Stack grows downward:
2284 saved static chain if ix86_static_chain_on_stack
2286 saved frame pointer if frame_pointer_needed
2287 <- HARD_FRAME_POINTER
2293 <- sse_regs_save_offset
2296 [va_arg registers] |
2300 [padding2] | = to_allocate
2309 int outgoing_arguments_size;
2310 HOST_WIDE_INT frame;
2312 /* The offsets relative to ARG_POINTER. */
2313 HOST_WIDE_INT frame_pointer_offset;
2314 HOST_WIDE_INT hard_frame_pointer_offset;
2315 HOST_WIDE_INT stack_pointer_offset;
2316 HOST_WIDE_INT hfp_save_offset;
2317 HOST_WIDE_INT reg_save_offset;
2318 HOST_WIDE_INT sse_reg_save_offset;
2320 /* When save_regs_using_mov is set, emit prologue using
2321 move instead of push instructions. */
2322 bool save_regs_using_mov;
2325 /* Code model option. */
2326 enum cmodel ix86_cmodel;
2328 enum asm_dialect ix86_asm_dialect = ASM_ATT;
2330 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
2332 /* Which unit we are generating floating point math for. */
2333 enum fpmath_unit ix86_fpmath;
2335 /* Which cpu are we scheduling for. */
2336 enum attr_cpu ix86_schedule;
2338 /* Which cpu are we optimizing for. */
2339 enum processor_type ix86_tune;
2341 /* Which instruction set architecture to use. */
2342 enum processor_type ix86_arch;
2344 /* true if sse prefetch instruction is not NOOP. */
2345 int x86_prefetch_sse;
2347 /* ix86_regparm_string as a number */
2348 static int ix86_regparm;
2350 /* -mstackrealign option */
2351 static const char ix86_force_align_arg_pointer_string[]
2352 = "force_align_arg_pointer";
2354 static rtx (*ix86_gen_leave) (void);
2355 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2356 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2357 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2358 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2359 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2360 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2361 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2362 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2363 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2365 /* Preferred alignment for stack boundary in bits. */
2366 unsigned int ix86_preferred_stack_boundary;
2368 /* Alignment for incoming stack boundary in bits specified at
2370 static unsigned int ix86_user_incoming_stack_boundary;
2372 /* Default alignment for incoming stack boundary in bits. */
2373 static unsigned int ix86_default_incoming_stack_boundary;
2375 /* Alignment for incoming stack boundary in bits. */
2376 unsigned int ix86_incoming_stack_boundary;
2378 /* The abi used by target. */
2379 enum calling_abi ix86_abi;
2381 /* Values 1-5: see jump.c */
2382 int ix86_branch_cost;
2384 /* Calling abi specific va_list type nodes. */
2385 static GTY(()) tree sysv_va_list_type_node;
2386 static GTY(()) tree ms_va_list_type_node;
2388 /* Variables which are this size or smaller are put in the data/bss
2389 or ldata/lbss sections. */
2391 int ix86_section_threshold = 65536;
2393 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2394 char internal_label_prefix[16];
2395 int internal_label_prefix_len;
2397 /* Fence to use after loop using movnt. */
2400 /* Register class used for passing given 64bit part of the argument.
2401 These represent classes as documented by the PS ABI, with the exception
2402 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2403 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2405 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2406 whenever possible (upper half does contain padding). */
2407 enum x86_64_reg_class
2410 X86_64_INTEGER_CLASS,
2411 X86_64_INTEGERSI_CLASS,
2418 X86_64_COMPLEX_X87_CLASS,
2422 #define MAX_CLASSES 4
2424 /* Table of constants used by fldpi, fldln2, etc.... */
2425 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2426 static bool ext_80387_constants_init = 0;
2429 static struct machine_function * ix86_init_machine_status (void);
2430 static rtx ix86_function_value (const_tree, const_tree, bool);
2431 static bool ix86_function_value_regno_p (const unsigned int);
2432 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2434 static rtx ix86_static_chain (const_tree, bool);
2435 static int ix86_function_regparm (const_tree, const_tree);
2436 static void ix86_compute_frame_layout (struct ix86_frame *);
2437 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2439 static void ix86_add_new_builtins (int);
2440 static rtx ix86_expand_vec_perm_builtin (tree);
2441 static tree ix86_canonical_va_list_type (tree);
2442 static void predict_jump (int);
2443 static unsigned int split_stack_prologue_scratch_regno (void);
2444 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2446 enum ix86_function_specific_strings
2448 IX86_FUNCTION_SPECIFIC_ARCH,
2449 IX86_FUNCTION_SPECIFIC_TUNE,
2450 IX86_FUNCTION_SPECIFIC_FPMATH,
2451 IX86_FUNCTION_SPECIFIC_MAX
2454 static char *ix86_target_string (int, int, const char *, const char *,
2455 const char *, bool);
2456 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2457 static void ix86_function_specific_save (struct cl_target_option *);
2458 static void ix86_function_specific_restore (struct cl_target_option *);
2459 static void ix86_function_specific_print (FILE *, int,
2460 struct cl_target_option *);
2461 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2462 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2463 static bool ix86_can_inline_p (tree, tree);
2464 static void ix86_set_current_function (tree);
2465 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2467 static enum calling_abi ix86_function_abi (const_tree);
2470 #ifndef SUBTARGET32_DEFAULT_CPU
2471 #define SUBTARGET32_DEFAULT_CPU "i386"
2474 /* The svr4 ABI for the i386 says that records and unions are returned
2476 #ifndef DEFAULT_PCC_STRUCT_RETURN
2477 #define DEFAULT_PCC_STRUCT_RETURN 1
2480 /* Whether -mtune= or -march= were specified */
2481 static int ix86_tune_defaulted;
2482 static int ix86_arch_specified;
2484 /* A mask of ix86_isa_flags that includes bit X if X
2485 was set or cleared on the command line. */
2486 static int ix86_isa_flags_explicit;
2488 /* Define a set of ISAs which are available when a given ISA is
2489 enabled. MMX and SSE ISAs are handled separately. */
2491 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2492 #define OPTION_MASK_ISA_3DNOW_SET \
2493 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2495 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2496 #define OPTION_MASK_ISA_SSE2_SET \
2497 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2498 #define OPTION_MASK_ISA_SSE3_SET \
2499 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2500 #define OPTION_MASK_ISA_SSSE3_SET \
2501 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2502 #define OPTION_MASK_ISA_SSE4_1_SET \
2503 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2504 #define OPTION_MASK_ISA_SSE4_2_SET \
2505 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2506 #define OPTION_MASK_ISA_AVX_SET \
2507 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2508 #define OPTION_MASK_ISA_FMA_SET \
2509 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2511 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2513 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2515 #define OPTION_MASK_ISA_SSE4A_SET \
2516 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2517 #define OPTION_MASK_ISA_FMA4_SET \
2518 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2519 | OPTION_MASK_ISA_AVX_SET)
2520 #define OPTION_MASK_ISA_XOP_SET \
2521 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2522 #define OPTION_MASK_ISA_LWP_SET \
2525 /* AES and PCLMUL need SSE2 because they use xmm registers */
2526 #define OPTION_MASK_ISA_AES_SET \
2527 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2528 #define OPTION_MASK_ISA_PCLMUL_SET \
2529 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2531 #define OPTION_MASK_ISA_ABM_SET \
2532 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2534 #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
2535 #define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
2536 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2537 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2538 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2539 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2540 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2542 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2543 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2544 #define OPTION_MASK_ISA_F16C_SET \
2545 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2547 /* Define a set of ISAs which aren't available when a given ISA is
2548 disabled. MMX and SSE ISAs are handled separately. */
2550 #define OPTION_MASK_ISA_MMX_UNSET \
2551 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2552 #define OPTION_MASK_ISA_3DNOW_UNSET \
2553 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2554 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2556 #define OPTION_MASK_ISA_SSE_UNSET \
2557 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2558 #define OPTION_MASK_ISA_SSE2_UNSET \
2559 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2560 #define OPTION_MASK_ISA_SSE3_UNSET \
2561 (OPTION_MASK_ISA_SSE3 \
2562 | OPTION_MASK_ISA_SSSE3_UNSET \
2563 | OPTION_MASK_ISA_SSE4A_UNSET )
2564 #define OPTION_MASK_ISA_SSSE3_UNSET \
2565 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2566 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2567 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2568 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2569 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2570 #define OPTION_MASK_ISA_AVX_UNSET \
2571 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2572 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2573 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2575 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2577 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2579 #define OPTION_MASK_ISA_SSE4A_UNSET \
2580 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2582 #define OPTION_MASK_ISA_FMA4_UNSET \
2583 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2584 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2585 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2587 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2588 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2589 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2590 #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
2591 #define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
2592 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2593 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2594 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2595 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2596 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2598 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2599 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2600 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2602 /* Vectorization library interface and handlers. */
2603 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2605 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2606 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2608 /* Processor target table, indexed by processor number */
2611 const struct processor_costs *cost; /* Processor costs */
2612 const int align_loop; /* Default alignments. */
2613 const int align_loop_max_skip;
2614 const int align_jump;
2615 const int align_jump_max_skip;
2616 const int align_func;
2619 static const struct ptt processor_target_table[PROCESSOR_max] =
2621 {&i386_cost, 4, 3, 4, 3, 4},
2622 {&i486_cost, 16, 15, 16, 15, 16},
2623 {&pentium_cost, 16, 7, 16, 7, 16},
2624 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2625 {&geode_cost, 0, 0, 0, 0, 0},
2626 {&k6_cost, 32, 7, 32, 7, 32},
2627 {&athlon_cost, 16, 7, 16, 7, 16},
2628 {&pentium4_cost, 0, 0, 0, 0, 0},
2629 {&k8_cost, 16, 7, 16, 7, 16},
2630 {&nocona_cost, 0, 0, 0, 0, 0},
2631 /* Core 2 32-bit. */
2632 {&generic32_cost, 16, 10, 16, 10, 16},
2633 /* Core 2 64-bit. */
2634 {&generic64_cost, 16, 10, 16, 10, 16},
2635 /* Core i7 32-bit. */
2636 {&generic32_cost, 16, 10, 16, 10, 16},
2637 /* Core i7 64-bit. */
2638 {&generic64_cost, 16, 10, 16, 10, 16},
2639 {&generic32_cost, 16, 7, 16, 7, 16},
2640 {&generic64_cost, 16, 10, 16, 10, 16},
2641 {&amdfam10_cost, 32, 24, 32, 7, 32},
2642 {&bdver1_cost, 32, 24, 32, 7, 32},
2643 {&btver1_cost, 32, 24, 32, 7, 32},
2644 {&atom_cost, 16, 7, 16, 7, 16}
2647 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2676 /* Return true if a red-zone is in use. */
2679 ix86_using_red_zone (void)
2681 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2684 /* Implement TARGET_HANDLE_OPTION. */
2687 ix86_handle_option (struct gcc_options *opts, struct gcc_options *opts_set,
2688 const struct cl_decoded_option *decoded,
2689 location_t loc ATTRIBUTE_UNUSED)
2691 size_t code = decoded->opt_index;
2692 int value = decoded->value;
2694 gcc_assert (opts == &global_options);
2695 gcc_assert (opts_set == &global_options_set);
2702 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2703 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2707 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2708 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2715 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2716 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2720 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2721 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2731 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2732 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2736 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2737 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2744 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2745 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2749 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2750 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2757 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2758 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2762 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2763 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2770 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2771 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2775 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2776 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2783 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2784 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2788 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2789 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2796 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2797 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2801 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2802 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2809 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2810 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2814 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2815 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2822 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2823 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2827 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2828 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2833 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2834 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2838 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2839 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2845 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2846 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2850 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2851 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2858 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2859 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2863 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2864 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2871 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2872 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2876 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2877 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2884 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2885 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2889 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2890 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2897 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2898 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2902 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2903 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2910 ix86_isa_flags |= OPTION_MASK_ISA_BMI_SET;
2911 ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_SET;
2915 ix86_isa_flags &= ~OPTION_MASK_ISA_BMI_UNSET;
2916 ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_UNSET;
2923 ix86_isa_flags |= OPTION_MASK_ISA_TBM_SET;
2924 ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_SET;
2928 ix86_isa_flags &= ~OPTION_MASK_ISA_TBM_UNSET;
2929 ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_UNSET;
2936 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2937 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2941 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2942 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2949 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2950 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2954 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2955 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2962 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2963 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2967 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2968 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2975 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2976 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2980 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2981 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2988 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2989 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2993 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2994 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
3001 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
3002 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
3006 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
3007 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
3014 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
3015 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
3019 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
3020 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
3027 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
3028 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
3032 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
3033 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
3040 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
3041 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
3045 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
3046 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
3053 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
3054 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
3058 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
3059 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
3068 /* Return a string that documents the current -m options. The caller is
3069 responsible for freeing the string. */
3072 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
3073 const char *fpmath, bool add_nl_p)
3075 struct ix86_target_opts
3077 const char *option; /* option string */
3078 int mask; /* isa mask options */
3081 /* This table is ordered so that options like -msse4.2 that imply
3082 preceding options while match those first. */
3083 static struct ix86_target_opts isa_opts[] =
3085 { "-m64", OPTION_MASK_ISA_64BIT },
3086 { "-mfma4", OPTION_MASK_ISA_FMA4 },
3087 { "-mfma", OPTION_MASK_ISA_FMA },
3088 { "-mxop", OPTION_MASK_ISA_XOP },
3089 { "-mlwp", OPTION_MASK_ISA_LWP },
3090 { "-msse4a", OPTION_MASK_ISA_SSE4A },
3091 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
3092 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
3093 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
3094 { "-msse3", OPTION_MASK_ISA_SSE3 },
3095 { "-msse2", OPTION_MASK_ISA_SSE2 },
3096 { "-msse", OPTION_MASK_ISA_SSE },
3097 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
3098 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
3099 { "-mmmx", OPTION_MASK_ISA_MMX },
3100 { "-mabm", OPTION_MASK_ISA_ABM },
3101 { "-mbmi", OPTION_MASK_ISA_BMI },
3102 { "-mtbm", OPTION_MASK_ISA_TBM },
3103 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
3104 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
3105 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
3106 { "-maes", OPTION_MASK_ISA_AES },
3107 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
3108 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
3109 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
3110 { "-mf16c", OPTION_MASK_ISA_F16C },
3114 static struct ix86_target_opts flag_opts[] =
3116 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
3117 { "-m80387", MASK_80387 },
3118 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
3119 { "-malign-double", MASK_ALIGN_DOUBLE },
3120 { "-mcld", MASK_CLD },
3121 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
3122 { "-mieee-fp", MASK_IEEE_FP },
3123 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
3124 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
3125 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
3126 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
3127 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
3128 { "-mno-push-args", MASK_NO_PUSH_ARGS },
3129 { "-mno-red-zone", MASK_NO_RED_ZONE },
3130 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
3131 { "-mrecip", MASK_RECIP },
3132 { "-mrtd", MASK_RTD },
3133 { "-msseregparm", MASK_SSEREGPARM },
3134 { "-mstack-arg-probe", MASK_STACK_PROBE },
3135 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
3136 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
3137 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
3138 { "-mvzeroupper", MASK_VZEROUPPER },
3141 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
3144 char target_other[40];
3153 memset (opts, '\0', sizeof (opts));
3155 /* Add -march= option. */
3158 opts[num][0] = "-march=";
3159 opts[num++][1] = arch;
3162 /* Add -mtune= option. */
3165 opts[num][0] = "-mtune=";
3166 opts[num++][1] = tune;
3169 /* Pick out the options in isa options. */
3170 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
3172 if ((isa & isa_opts[i].mask) != 0)
3174 opts[num++][0] = isa_opts[i].option;
3175 isa &= ~ isa_opts[i].mask;
3179 if (isa && add_nl_p)
3181 opts[num++][0] = isa_other;
3182 sprintf (isa_other, "(other isa: %#x)", isa);
3185 /* Add flag options. */
3186 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
3188 if ((flags & flag_opts[i].mask) != 0)
3190 opts[num++][0] = flag_opts[i].option;
3191 flags &= ~ flag_opts[i].mask;
3195 if (flags && add_nl_p)
3197 opts[num++][0] = target_other;
3198 sprintf (target_other, "(other flags: %#x)", flags);
3201 /* Add -fpmath= option. */
3204 opts[num][0] = "-mfpmath=";
3205 opts[num++][1] = fpmath;
3212 gcc_assert (num < ARRAY_SIZE (opts));
3214 /* Size the string. */
3216 sep_len = (add_nl_p) ? 3 : 1;
3217 for (i = 0; i < num; i++)
3220 for (j = 0; j < 2; j++)
3222 len += strlen (opts[i][j]);
3225 /* Build the string. */
3226 ret = ptr = (char *) xmalloc (len);
3229 for (i = 0; i < num; i++)
3233 for (j = 0; j < 2; j++)
3234 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3241 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3249 for (j = 0; j < 2; j++)
3252 memcpy (ptr, opts[i][j], len2[j]);
3254 line_len += len2[j];
3259 gcc_assert (ret + len >= ptr);
3264 /* Return TRUE if software prefetching is beneficial for the
3268 software_prefetching_beneficial_p (void)
3272 case PROCESSOR_GEODE:
3274 case PROCESSOR_ATHLON:
3276 case PROCESSOR_AMDFAM10:
3277 case PROCESSOR_BTVER1:
3285 /* Return true, if profiling code should be emitted before
3286 prologue. Otherwise it returns false.
3287 Note: For x86 with "hotfix" it is sorried. */
3289 ix86_profile_before_prologue (void)
3291 return flag_fentry != 0;
3294 /* Function that is callable from the debugger to print the current
3297 ix86_debug_options (void)
3299 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3300 ix86_arch_string, ix86_tune_string,
3301 ix86_fpmath_string, true);
3305 fprintf (stderr, "%s\n\n", opts);
3309 fputs ("<no options>\n\n", stderr);
3314 /* Override various settings based on options. If MAIN_ARGS_P, the
3315 options are from the command line, otherwise they are from
3319 ix86_option_override_internal (bool main_args_p)
3322 unsigned int ix86_arch_mask, ix86_tune_mask;
3323 const bool ix86_tune_specified = (ix86_tune_string != NULL);
3328 /* Comes from final.c -- no real reason to change it. */
3329 #define MAX_CODE_ALIGN 16
3337 PTA_PREFETCH_SSE = 1 << 4,
3339 PTA_3DNOW_A = 1 << 6,
3343 PTA_POPCNT = 1 << 10,
3345 PTA_SSE4A = 1 << 12,
3346 PTA_NO_SAHF = 1 << 13,
3347 PTA_SSE4_1 = 1 << 14,
3348 PTA_SSE4_2 = 1 << 15,
3350 PTA_PCLMUL = 1 << 17,
3353 PTA_MOVBE = 1 << 20,
3357 PTA_FSGSBASE = 1 << 24,
3358 PTA_RDRND = 1 << 25,
3362 /* if this reaches 32, need to widen struct pta flags below */
3367 const char *const name; /* processor name or nickname. */
3368 const enum processor_type processor;
3369 const enum attr_cpu schedule;
3370 const unsigned /*enum pta_flags*/ flags;
3372 const processor_alias_table[] =
3374 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3375 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3376 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3377 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3378 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3379 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3380 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3381 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3382 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
3383 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3384 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3385 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
3386 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3388 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3390 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3391 PTA_MMX | PTA_SSE | PTA_SSE2},
3392 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3393 PTA_MMX |PTA_SSE | PTA_SSE2},
3394 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3395 PTA_MMX | PTA_SSE | PTA_SSE2},
3396 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3397 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
3398 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3399 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3400 | PTA_CX16 | PTA_NO_SAHF},
3401 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
3402 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3403 | PTA_SSSE3 | PTA_CX16},
3404 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
3405 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3406 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
3407 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
3408 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3409 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
3410 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
3411 {"atom", PROCESSOR_ATOM, CPU_ATOM,
3412 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3413 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
3414 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3415 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
3416 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3417 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3418 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3419 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3420 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3421 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3422 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3423 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3424 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3425 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3426 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3427 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3428 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3429 {"x86-64", PROCESSOR_K8, CPU_K8,
3430 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3431 {"k8", PROCESSOR_K8, CPU_K8,
3432 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3433 | PTA_SSE2 | PTA_NO_SAHF},
3434 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3435 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3436 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3437 {"opteron", PROCESSOR_K8, CPU_K8,
3438 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3439 | PTA_SSE2 | PTA_NO_SAHF},
3440 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3441 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3442 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3443 {"athlon64", PROCESSOR_K8, CPU_K8,
3444 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3445 | PTA_SSE2 | PTA_NO_SAHF},
3446 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3447 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3448 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3449 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3450 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3451 | PTA_SSE2 | PTA_NO_SAHF},
3452 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3453 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3454 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3455 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3456 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3457 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3458 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3459 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3460 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3461 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3462 | PTA_XOP | PTA_LWP},
3463 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3464 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3465 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3466 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3467 0 /* flags are only used for -march switch. */ },
3468 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3469 PTA_64BIT /* flags are only used for -march switch. */ },
3472 int const pta_size = ARRAY_SIZE (processor_alias_table);
3474 /* Set up prefix/suffix so the error messages refer to either the command
3475 line argument, or the attribute(target). */
3484 prefix = "option(\"";
3489 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3490 SUBTARGET_OVERRIDE_OPTIONS;
3493 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3494 SUBSUBTARGET_OVERRIDE_OPTIONS;
3497 /* -fPIC is the default for x86_64. */
3498 if (TARGET_MACHO && TARGET_64BIT)
3501 /* Need to check -mtune=generic first. */
3502 if (ix86_tune_string)
3504 if (!strcmp (ix86_tune_string, "generic")
3505 || !strcmp (ix86_tune_string, "i686")
3506 /* As special support for cross compilers we read -mtune=native
3507 as -mtune=generic. With native compilers we won't see the
3508 -mtune=native, as it was changed by the driver. */
3509 || !strcmp (ix86_tune_string, "native"))
3512 ix86_tune_string = "generic64";
3514 ix86_tune_string = "generic32";
3516 /* If this call is for setting the option attribute, allow the
3517 generic32/generic64 that was previously set. */
3518 else if (!main_args_p
3519 && (!strcmp (ix86_tune_string, "generic32")
3520 || !strcmp (ix86_tune_string, "generic64")))
3522 else if (!strncmp (ix86_tune_string, "generic", 7))
3523 error ("bad value (%s) for %stune=%s %s",
3524 ix86_tune_string, prefix, suffix, sw);
3525 else if (!strcmp (ix86_tune_string, "x86-64"))
3526 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3527 "%stune=k8%s or %stune=generic%s instead as appropriate",
3528 prefix, suffix, prefix, suffix, prefix, suffix);
3532 if (ix86_arch_string)
3533 ix86_tune_string = ix86_arch_string;
3534 if (!ix86_tune_string)
3536 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3537 ix86_tune_defaulted = 1;
3540 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3541 need to use a sensible tune option. */
3542 if (!strcmp (ix86_tune_string, "generic")
3543 || !strcmp (ix86_tune_string, "x86-64")
3544 || !strcmp (ix86_tune_string, "i686"))
3547 ix86_tune_string = "generic64";
3549 ix86_tune_string = "generic32";
3553 if (ix86_stringop_string)
3555 if (!strcmp (ix86_stringop_string, "rep_byte"))
3556 stringop_alg = rep_prefix_1_byte;
3557 else if (!strcmp (ix86_stringop_string, "libcall"))
3558 stringop_alg = libcall;
3559 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3560 stringop_alg = rep_prefix_4_byte;
3561 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3563 /* rep; movq isn't available in 32-bit code. */
3564 stringop_alg = rep_prefix_8_byte;
3565 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3566 stringop_alg = loop_1_byte;
3567 else if (!strcmp (ix86_stringop_string, "loop"))
3568 stringop_alg = loop;
3569 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3570 stringop_alg = unrolled_loop;
3572 error ("bad value (%s) for %sstringop-strategy=%s %s",
3573 ix86_stringop_string, prefix, suffix, sw);
3576 if (!ix86_arch_string)
3577 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3579 ix86_arch_specified = 1;
3581 /* Validate -mabi= value. */
3582 if (ix86_abi_string)
3584 if (strcmp (ix86_abi_string, "sysv") == 0)
3585 ix86_abi = SYSV_ABI;
3586 else if (strcmp (ix86_abi_string, "ms") == 0)
3589 error ("unknown ABI (%s) for %sabi=%s %s",
3590 ix86_abi_string, prefix, suffix, sw);
3593 ix86_abi = DEFAULT_ABI;
3595 if (ix86_cmodel_string != 0)
3597 if (!strcmp (ix86_cmodel_string, "small"))
3598 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3599 else if (!strcmp (ix86_cmodel_string, "medium"))
3600 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3601 else if (!strcmp (ix86_cmodel_string, "large"))
3602 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3604 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3605 else if (!strcmp (ix86_cmodel_string, "32"))
3606 ix86_cmodel = CM_32;
3607 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3608 ix86_cmodel = CM_KERNEL;
3610 error ("bad value (%s) for %scmodel=%s %s",
3611 ix86_cmodel_string, prefix, suffix, sw);
3615 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3616 use of rip-relative addressing. This eliminates fixups that
3617 would otherwise be needed if this object is to be placed in a
3618 DLL, and is essentially just as efficient as direct addressing. */
3619 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3620 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3621 else if (TARGET_64BIT)
3622 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3624 ix86_cmodel = CM_32;
3626 if (ix86_asm_string != 0)
3629 && !strcmp (ix86_asm_string, "intel"))
3630 ix86_asm_dialect = ASM_INTEL;
3631 else if (!strcmp (ix86_asm_string, "att"))
3632 ix86_asm_dialect = ASM_ATT;
3634 error ("bad value (%s) for %sasm=%s %s",
3635 ix86_asm_string, prefix, suffix, sw);
3637 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3638 error ("code model %qs not supported in the %s bit mode",
3639 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3640 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3641 sorry ("%i-bit mode not compiled in",
3642 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3644 for (i = 0; i < pta_size; i++)
3645 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3647 ix86_schedule = processor_alias_table[i].schedule;
3648 ix86_arch = processor_alias_table[i].processor;
3649 /* Default cpu tuning to the architecture. */
3650 ix86_tune = ix86_arch;
3652 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3653 error ("CPU you selected does not support x86-64 "
3656 if (processor_alias_table[i].flags & PTA_MMX
3657 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3658 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3659 if (processor_alias_table[i].flags & PTA_3DNOW
3660 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3661 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3662 if (processor_alias_table[i].flags & PTA_3DNOW_A
3663 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3664 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3665 if (processor_alias_table[i].flags & PTA_SSE
3666 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3667 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3668 if (processor_alias_table[i].flags & PTA_SSE2
3669 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3670 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3671 if (processor_alias_table[i].flags & PTA_SSE3
3672 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3673 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3674 if (processor_alias_table[i].flags & PTA_SSSE3
3675 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3676 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3677 if (processor_alias_table[i].flags & PTA_SSE4_1
3678 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3679 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3680 if (processor_alias_table[i].flags & PTA_SSE4_2
3681 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3682 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3683 if (processor_alias_table[i].flags & PTA_AVX
3684 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3685 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3686 if (processor_alias_table[i].flags & PTA_FMA
3687 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3688 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3689 if (processor_alias_table[i].flags & PTA_SSE4A
3690 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3691 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3692 if (processor_alias_table[i].flags & PTA_FMA4
3693 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3694 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3695 if (processor_alias_table[i].flags & PTA_XOP
3696 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3697 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3698 if (processor_alias_table[i].flags & PTA_LWP
3699 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3700 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3701 if (processor_alias_table[i].flags & PTA_ABM
3702 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3703 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3704 if (processor_alias_table[i].flags & PTA_BMI
3705 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3706 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3707 if (processor_alias_table[i].flags & PTA_TBM
3708 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3709 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3710 if (processor_alias_table[i].flags & PTA_CX16
3711 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3712 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3713 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3714 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3715 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3716 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3717 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3718 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3719 if (processor_alias_table[i].flags & PTA_MOVBE
3720 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3721 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3722 if (processor_alias_table[i].flags & PTA_AES
3723 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3724 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3725 if (processor_alias_table[i].flags & PTA_PCLMUL
3726 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3727 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3728 if (processor_alias_table[i].flags & PTA_FSGSBASE
3729 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3730 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3731 if (processor_alias_table[i].flags & PTA_RDRND
3732 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3733 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3734 if (processor_alias_table[i].flags & PTA_F16C
3735 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3736 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3737 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3738 x86_prefetch_sse = true;
3743 if (!strcmp (ix86_arch_string, "generic"))
3744 error ("generic CPU can be used only for %stune=%s %s",
3745 prefix, suffix, sw);
3746 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3747 error ("bad value (%s) for %sarch=%s %s",
3748 ix86_arch_string, prefix, suffix, sw);
3750 ix86_arch_mask = 1u << ix86_arch;
3751 for (i = 0; i < X86_ARCH_LAST; ++i)
3752 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3754 for (i = 0; i < pta_size; i++)
3755 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3757 ix86_schedule = processor_alias_table[i].schedule;
3758 ix86_tune = processor_alias_table[i].processor;
3761 if (!(processor_alias_table[i].flags & PTA_64BIT))
3763 if (ix86_tune_defaulted)
3765 ix86_tune_string = "x86-64";
3766 for (i = 0; i < pta_size; i++)
3767 if (! strcmp (ix86_tune_string,
3768 processor_alias_table[i].name))
3770 ix86_schedule = processor_alias_table[i].schedule;
3771 ix86_tune = processor_alias_table[i].processor;
3774 error ("CPU you selected does not support x86-64 "
3780 /* Adjust tuning when compiling for 32-bit ABI. */
3783 case PROCESSOR_GENERIC64:
3784 ix86_tune = PROCESSOR_GENERIC32;
3785 ix86_schedule = CPU_PENTIUMPRO;
3788 case PROCESSOR_CORE2_64:
3789 ix86_tune = PROCESSOR_CORE2_32;
3792 case PROCESSOR_COREI7_64:
3793 ix86_tune = PROCESSOR_COREI7_32;
3800 /* Intel CPUs have always interpreted SSE prefetch instructions as
3801 NOPs; so, we can enable SSE prefetch instructions even when
3802 -mtune (rather than -march) points us to a processor that has them.
3803 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3804 higher processors. */
3806 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3807 x86_prefetch_sse = true;
3811 if (ix86_tune_specified && i == pta_size)
3812 error ("bad value (%s) for %stune=%s %s",
3813 ix86_tune_string, prefix, suffix, sw);
3815 ix86_tune_mask = 1u << ix86_tune;
3816 for (i = 0; i < X86_TUNE_LAST; ++i)
3817 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3819 #ifndef USE_IX86_FRAME_POINTER
3820 #define USE_IX86_FRAME_POINTER 0
3823 #ifndef USE_X86_64_FRAME_POINTER
3824 #define USE_X86_64_FRAME_POINTER 0
3827 /* Set the default values for switches whose default depends on TARGET_64BIT
3828 in case they weren't overwritten by command line options. */
3831 if (optimize > 1 && !global_options_set.x_flag_zee)
3833 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3834 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3835 if (flag_asynchronous_unwind_tables == 2)
3836 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3837 if (flag_pcc_struct_return == 2)
3838 flag_pcc_struct_return = 0;
3842 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3843 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3844 if (flag_asynchronous_unwind_tables == 2)
3845 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3846 if (flag_pcc_struct_return == 2)
3847 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3851 ix86_cost = &ix86_size_cost;
3853 ix86_cost = processor_target_table[ix86_tune].cost;
3855 /* Arrange to set up i386_stack_locals for all functions. */
3856 init_machine_status = ix86_init_machine_status;
3858 /* Validate -mregparm= value. */
3859 if (ix86_regparm_string)
3862 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3863 i = atoi (ix86_regparm_string);
3864 if (i < 0 || i > REGPARM_MAX)
3865 error ("%sregparm=%d%s is not between 0 and %d",
3866 prefix, i, suffix, REGPARM_MAX);
3871 ix86_regparm = REGPARM_MAX;
3873 /* If the user has provided any of the -malign-* options,
3874 warn and use that value only if -falign-* is not set.
3875 Remove this code in GCC 3.2 or later. */
3876 if (ix86_align_loops_string)
3878 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3879 prefix, suffix, suffix);
3880 if (align_loops == 0)
3882 i = atoi (ix86_align_loops_string);
3883 if (i < 0 || i > MAX_CODE_ALIGN)
3884 error ("%salign-loops=%d%s is not between 0 and %d",
3885 prefix, i, suffix, MAX_CODE_ALIGN);
3887 align_loops = 1 << i;
3891 if (ix86_align_jumps_string)
3893 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3894 prefix, suffix, suffix);
3895 if (align_jumps == 0)
3897 i = atoi (ix86_align_jumps_string);
3898 if (i < 0 || i > MAX_CODE_ALIGN)
3899 error ("%salign-loops=%d%s is not between 0 and %d",
3900 prefix, i, suffix, MAX_CODE_ALIGN);
3902 align_jumps = 1 << i;
3906 if (ix86_align_funcs_string)
3908 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3909 prefix, suffix, suffix);
3910 if (align_functions == 0)
3912 i = atoi (ix86_align_funcs_string);
3913 if (i < 0 || i > MAX_CODE_ALIGN)
3914 error ("%salign-loops=%d%s is not between 0 and %d",
3915 prefix, i, suffix, MAX_CODE_ALIGN);
3917 align_functions = 1 << i;
3921 /* Default align_* from the processor table. */
3922 if (align_loops == 0)
3924 align_loops = processor_target_table[ix86_tune].align_loop;
3925 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3927 if (align_jumps == 0)
3929 align_jumps = processor_target_table[ix86_tune].align_jump;
3930 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3932 if (align_functions == 0)
3934 align_functions = processor_target_table[ix86_tune].align_func;
3937 /* Validate -mbranch-cost= value, or provide default. */
3938 ix86_branch_cost = ix86_cost->branch_cost;
3939 if (ix86_branch_cost_string)
3941 i = atoi (ix86_branch_cost_string);
3943 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3945 ix86_branch_cost = i;
3947 if (ix86_section_threshold_string)
3949 i = atoi (ix86_section_threshold_string);
3951 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3953 ix86_section_threshold = i;
3956 if (ix86_tls_dialect_string)
3958 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3959 ix86_tls_dialect = TLS_DIALECT_GNU;
3960 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3961 ix86_tls_dialect = TLS_DIALECT_GNU2;
3963 error ("bad value (%s) for %stls-dialect=%s %s",
3964 ix86_tls_dialect_string, prefix, suffix, sw);
3967 if (ix87_precision_string)
3969 i = atoi (ix87_precision_string);
3970 if (i != 32 && i != 64 && i != 80)
3971 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3976 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3978 /* Enable by default the SSE and MMX builtins. Do allow the user to
3979 explicitly disable any of these. In particular, disabling SSE and
3980 MMX for kernel code is extremely useful. */
3981 if (!ix86_arch_specified)
3983 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3984 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3987 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3991 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3993 if (!ix86_arch_specified)
3995 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3997 /* i386 ABI does not specify red zone. It still makes sense to use it
3998 when programmer takes care to stack from being destroyed. */
3999 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
4000 target_flags |= MASK_NO_RED_ZONE;
4003 /* Keep nonleaf frame pointers. */
4004 if (flag_omit_frame_pointer)
4005 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
4006 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
4007 flag_omit_frame_pointer = 1;
4009 /* If we're doing fast math, we don't care about comparison order
4010 wrt NaNs. This lets us use a shorter comparison sequence. */
4011 if (flag_finite_math_only)
4012 target_flags &= ~MASK_IEEE_FP;
4014 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
4015 since the insns won't need emulation. */
4016 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
4017 target_flags &= ~MASK_NO_FANCY_MATH_387;
4019 /* Likewise, if the target doesn't have a 387, or we've specified
4020 software floating point, don't use 387 inline intrinsics. */
4022 target_flags |= MASK_NO_FANCY_MATH_387;
4024 /* Turn on MMX builtins for -msse. */
4027 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
4028 x86_prefetch_sse = true;
4031 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
4032 if (TARGET_SSE4_2 || TARGET_ABM)
4033 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
4035 /* Validate -mpreferred-stack-boundary= value or default it to
4036 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4037 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4038 if (ix86_preferred_stack_boundary_string)
4040 int min = (TARGET_64BIT ? 4 : 2);
4041 int max = (TARGET_SEH ? 4 : 12);
4043 i = atoi (ix86_preferred_stack_boundary_string);
4044 if (i < min || i > max)
4047 error ("%spreferred-stack-boundary%s is not supported "
4048 "for this target", prefix, suffix);
4050 error ("%spreferred-stack-boundary=%d%s is not between %d and %d",
4051 prefix, i, suffix, min, max);
4054 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
4057 /* Set the default value for -mstackrealign. */
4058 if (ix86_force_align_arg_pointer == -1)
4059 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4061 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4063 /* Validate -mincoming-stack-boundary= value or default it to
4064 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4065 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4066 if (ix86_incoming_stack_boundary_string)
4068 i = atoi (ix86_incoming_stack_boundary_string);
4069 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
4070 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4071 i, TARGET_64BIT ? 4 : 2);
4074 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
4075 ix86_incoming_stack_boundary
4076 = ix86_user_incoming_stack_boundary;
4080 /* Accept -msseregparm only if at least SSE support is enabled. */
4081 if (TARGET_SSEREGPARM
4083 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4085 ix86_fpmath = TARGET_FPMATH_DEFAULT;
4086 if (ix86_fpmath_string != 0)
4088 if (! strcmp (ix86_fpmath_string, "387"))
4089 ix86_fpmath = FPMATH_387;
4090 else if (! strcmp (ix86_fpmath_string, "sse"))
4094 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4095 ix86_fpmath = FPMATH_387;
4098 ix86_fpmath = FPMATH_SSE;
4100 else if (! strcmp (ix86_fpmath_string, "387,sse")
4101 || ! strcmp (ix86_fpmath_string, "387+sse")
4102 || ! strcmp (ix86_fpmath_string, "sse,387")
4103 || ! strcmp (ix86_fpmath_string, "sse+387")
4104 || ! strcmp (ix86_fpmath_string, "both"))
4108 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4109 ix86_fpmath = FPMATH_387;
4111 else if (!TARGET_80387)
4113 warning (0, "387 instruction set disabled, using SSE arithmetics");
4114 ix86_fpmath = FPMATH_SSE;
4117 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4120 error ("bad value (%s) for %sfpmath=%s %s",
4121 ix86_fpmath_string, prefix, suffix, sw);
4124 /* If the i387 is disabled, then do not return values in it. */
4126 target_flags &= ~MASK_FLOAT_RETURNS;
4128 /* Use external vectorized library in vectorizing intrinsics. */
4129 if (ix86_veclibabi_string)
4131 if (strcmp (ix86_veclibabi_string, "svml") == 0)
4132 ix86_veclib_handler = ix86_veclibabi_svml;
4133 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
4134 ix86_veclib_handler = ix86_veclibabi_acml;
4136 error ("unknown vectorization library ABI type (%s) for "
4137 "%sveclibabi=%s %s", ix86_veclibabi_string,
4138 prefix, suffix, sw);
4141 if ((!USE_IX86_FRAME_POINTER
4142 || (x86_accumulate_outgoing_args & ix86_tune_mask))
4143 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4145 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4147 /* ??? Unwind info is not correct around the CFG unless either a frame
4148 pointer is present or M_A_O_A is set. Fixing this requires rewriting
4149 unwind info generation to be aware of the CFG and propagating states
4151 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
4152 || flag_exceptions || flag_non_call_exceptions)
4153 && flag_omit_frame_pointer
4154 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4156 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4157 warning (0, "unwind tables currently require either a frame pointer "
4158 "or %saccumulate-outgoing-args%s for correctness",
4160 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4163 /* If stack probes are required, the space used for large function
4164 arguments on the stack must also be probed, so enable
4165 -maccumulate-outgoing-args so this happens in the prologue. */
4166 if (TARGET_STACK_PROBE
4167 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4169 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4170 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4171 "for correctness", prefix, suffix);
4172 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4175 /* For sane SSE instruction set generation we need fcomi instruction.
4176 It is safe to enable all CMOVE instructions. */
4180 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4183 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4184 p = strchr (internal_label_prefix, 'X');
4185 internal_label_prefix_len = p - internal_label_prefix;
4189 /* When scheduling description is not available, disable scheduler pass
4190 so it won't slow down the compilation and make x87 code slower. */
4191 if (!TARGET_SCHEDULE)
4192 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
4194 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4195 ix86_cost->simultaneous_prefetches,
4196 global_options.x_param_values,
4197 global_options_set.x_param_values);
4198 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
4199 global_options.x_param_values,
4200 global_options_set.x_param_values);
4201 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
4202 global_options.x_param_values,
4203 global_options_set.x_param_values);
4204 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
4205 global_options.x_param_values,
4206 global_options_set.x_param_values);
4208 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4209 if (flag_prefetch_loop_arrays < 0
4212 && software_prefetching_beneficial_p ())
4213 flag_prefetch_loop_arrays = 1;
4215 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4216 can be optimized to ap = __builtin_next_arg (0). */
4217 if (!TARGET_64BIT && !flag_split_stack)
4218 targetm.expand_builtin_va_start = NULL;
4222 ix86_gen_leave = gen_leave_rex64;
4223 ix86_gen_add3 = gen_adddi3;
4224 ix86_gen_sub3 = gen_subdi3;
4225 ix86_gen_sub3_carry = gen_subdi3_carry;
4226 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4227 ix86_gen_monitor = gen_sse3_monitor64;
4228 ix86_gen_andsp = gen_anddi3;
4229 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4230 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4231 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4235 ix86_gen_leave = gen_leave;
4236 ix86_gen_add3 = gen_addsi3;
4237 ix86_gen_sub3 = gen_subsi3;
4238 ix86_gen_sub3_carry = gen_subsi3_carry;
4239 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4240 ix86_gen_monitor = gen_sse3_monitor;
4241 ix86_gen_andsp = gen_andsi3;
4242 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4243 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4244 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4248 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4250 target_flags |= MASK_CLD & ~target_flags_explicit;
4253 if (!TARGET_64BIT && flag_pic)
4255 if (flag_fentry > 0)
4256 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4260 else if (TARGET_SEH)
4262 if (flag_fentry == 0)
4263 sorry ("-mno-fentry isn%'t compatible with SEH");
4266 else if (flag_fentry < 0)
4268 #if defined(PROFILE_BEFORE_PROLOGUE)
4275 /* Save the initial options in case the user does function specific options */
4277 target_option_default_node = target_option_current_node
4278 = build_target_option_node ();
4282 /* When not optimize for size, enable vzeroupper optimization for
4283 TARGET_AVX with -fexpensive-optimizations. */
4285 && flag_expensive_optimizations
4286 && !(target_flags_explicit & MASK_VZEROUPPER))
4287 target_flags |= MASK_VZEROUPPER;
4291 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4292 target_flags &= ~MASK_VZEROUPPER;
4296 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4299 function_pass_avx256_p (const_rtx val)
4304 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
4307 if (GET_CODE (val) == PARALLEL)
4312 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
4314 r = XVECEXP (val, 0, i);
4315 if (GET_CODE (r) == EXPR_LIST
4317 && REG_P (XEXP (r, 0))
4318 && (GET_MODE (XEXP (r, 0)) == OImode
4319 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
4327 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4330 ix86_option_override (void)
4332 ix86_option_override_internal (true);
4335 /* Update register usage after having seen the compiler flags. */
4338 ix86_conditional_register_usage (void)
4343 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4345 if (fixed_regs[i] > 1)
4346 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
4347 if (call_used_regs[i] > 1)
4348 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
4351 /* The PIC register, if it exists, is fixed. */
4352 j = PIC_OFFSET_TABLE_REGNUM;
4353 if (j != INVALID_REGNUM)
4354 fixed_regs[j] = call_used_regs[j] = 1;
4356 /* The MS_ABI changes the set of call-used registers. */
4357 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
4359 call_used_regs[SI_REG] = 0;
4360 call_used_regs[DI_REG] = 0;
4361 call_used_regs[XMM6_REG] = 0;
4362 call_used_regs[XMM7_REG] = 0;
4363 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4364 call_used_regs[i] = 0;
4367 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4368 other call-clobbered regs for 64-bit. */
4371 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4373 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4374 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4375 && call_used_regs[i])
4376 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4379 /* If MMX is disabled, squash the registers. */
4381 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4382 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4383 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4385 /* If SSE is disabled, squash the registers. */
4387 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4388 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4389 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4391 /* If the FPU is disabled, squash the registers. */
4392 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4393 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4394 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4395 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4397 /* If 32-bit, squash the 64-bit registers. */
4400 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4402 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4408 /* Save the current options */
4411 ix86_function_specific_save (struct cl_target_option *ptr)
4413 ptr->arch = ix86_arch;
4414 ptr->schedule = ix86_schedule;
4415 ptr->tune = ix86_tune;
4416 ptr->fpmath = ix86_fpmath;
4417 ptr->branch_cost = ix86_branch_cost;
4418 ptr->tune_defaulted = ix86_tune_defaulted;
4419 ptr->arch_specified = ix86_arch_specified;
4420 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4421 ptr->ix86_target_flags_explicit = target_flags_explicit;
4423 /* The fields are char but the variables are not; make sure the
4424 values fit in the fields. */
4425 gcc_assert (ptr->arch == ix86_arch);
4426 gcc_assert (ptr->schedule == ix86_schedule);
4427 gcc_assert (ptr->tune == ix86_tune);
4428 gcc_assert (ptr->fpmath == ix86_fpmath);
4429 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4432 /* Restore the current options */
4435 ix86_function_specific_restore (struct cl_target_option *ptr)
4437 enum processor_type old_tune = ix86_tune;
4438 enum processor_type old_arch = ix86_arch;
4439 unsigned int ix86_arch_mask, ix86_tune_mask;
4442 ix86_arch = (enum processor_type) ptr->arch;
4443 ix86_schedule = (enum attr_cpu) ptr->schedule;
4444 ix86_tune = (enum processor_type) ptr->tune;
4445 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
4446 ix86_branch_cost = ptr->branch_cost;
4447 ix86_tune_defaulted = ptr->tune_defaulted;
4448 ix86_arch_specified = ptr->arch_specified;
4449 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
4450 target_flags_explicit = ptr->ix86_target_flags_explicit;
4452 /* Recreate the arch feature tests if the arch changed */
4453 if (old_arch != ix86_arch)
4455 ix86_arch_mask = 1u << ix86_arch;
4456 for (i = 0; i < X86_ARCH_LAST; ++i)
4457 ix86_arch_features[i]
4458 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4461 /* Recreate the tune optimization tests */
4462 if (old_tune != ix86_tune)
4464 ix86_tune_mask = 1u << ix86_tune;
4465 for (i = 0; i < X86_TUNE_LAST; ++i)
4466 ix86_tune_features[i]
4467 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4471 /* Print the current options */
4474 ix86_function_specific_print (FILE *file, int indent,
4475 struct cl_target_option *ptr)
4478 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4479 NULL, NULL, NULL, false);
4481 fprintf (file, "%*sarch = %d (%s)\n",
4484 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4485 ? cpu_names[ptr->arch]
4488 fprintf (file, "%*stune = %d (%s)\n",
4491 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4492 ? cpu_names[ptr->tune]
4495 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
4496 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
4497 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
4498 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4502 fprintf (file, "%*s%s\n", indent, "", target_string);
4503 free (target_string);
4508 /* Inner function to process the attribute((target(...))), take an argument and
4509 set the current options from the argument. If we have a list, recursively go
4513 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
4518 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4519 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4520 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4521 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4536 enum ix86_opt_type type;
4541 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4542 IX86_ATTR_ISA ("abm", OPT_mabm),
4543 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4544 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4545 IX86_ATTR_ISA ("aes", OPT_maes),
4546 IX86_ATTR_ISA ("avx", OPT_mavx),
4547 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4548 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4549 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4550 IX86_ATTR_ISA ("sse", OPT_msse),
4551 IX86_ATTR_ISA ("sse2", OPT_msse2),
4552 IX86_ATTR_ISA ("sse3", OPT_msse3),
4553 IX86_ATTR_ISA ("sse4", OPT_msse4),
4554 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4555 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4556 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4557 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4558 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4559 IX86_ATTR_ISA ("xop", OPT_mxop),
4560 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4561 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4562 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4563 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4565 /* string options */
4566 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4567 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
4568 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4571 IX86_ATTR_YES ("cld",
4575 IX86_ATTR_NO ("fancy-math-387",
4576 OPT_mfancy_math_387,
4577 MASK_NO_FANCY_MATH_387),
4579 IX86_ATTR_YES ("ieee-fp",
4583 IX86_ATTR_YES ("inline-all-stringops",
4584 OPT_minline_all_stringops,
4585 MASK_INLINE_ALL_STRINGOPS),
4587 IX86_ATTR_YES ("inline-stringops-dynamically",
4588 OPT_minline_stringops_dynamically,
4589 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4591 IX86_ATTR_NO ("align-stringops",
4592 OPT_mno_align_stringops,
4593 MASK_NO_ALIGN_STRINGOPS),
4595 IX86_ATTR_YES ("recip",
4601 /* If this is a list, recurse to get the options. */
4602 if (TREE_CODE (args) == TREE_LIST)
4606 for (; args; args = TREE_CHAIN (args))
4607 if (TREE_VALUE (args)
4608 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
4614 else if (TREE_CODE (args) != STRING_CST)
4617 /* Handle multiple arguments separated by commas. */
4618 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4620 while (next_optstr && *next_optstr != '\0')
4622 char *p = next_optstr;
4624 char *comma = strchr (next_optstr, ',');
4625 const char *opt_string;
4626 size_t len, opt_len;
4631 enum ix86_opt_type type = ix86_opt_unknown;
4637 len = comma - next_optstr;
4638 next_optstr = comma + 1;
4646 /* Recognize no-xxx. */
4647 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4656 /* Find the option. */
4659 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4661 type = attrs[i].type;
4662 opt_len = attrs[i].len;
4663 if (ch == attrs[i].string[0]
4664 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4665 && memcmp (p, attrs[i].string, opt_len) == 0)
4668 mask = attrs[i].mask;
4669 opt_string = attrs[i].string;
4674 /* Process the option. */
4677 error ("attribute(target(\"%s\")) is unknown", orig_p);
4681 else if (type == ix86_opt_isa)
4683 struct cl_decoded_option decoded;
4685 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4686 ix86_handle_option (&global_options, &global_options_set,
4687 &decoded, input_location);
4690 else if (type == ix86_opt_yes || type == ix86_opt_no)
4692 if (type == ix86_opt_no)
4693 opt_set_p = !opt_set_p;
4696 target_flags |= mask;
4698 target_flags &= ~mask;
4701 else if (type == ix86_opt_str)
4705 error ("option(\"%s\") was already specified", opt_string);
4709 p_strings[opt] = xstrdup (p + opt_len);
4719 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4722 ix86_valid_target_attribute_tree (tree args)
4724 const char *orig_arch_string = ix86_arch_string;
4725 const char *orig_tune_string = ix86_tune_string;
4726 const char *orig_fpmath_string = ix86_fpmath_string;
4727 int orig_tune_defaulted = ix86_tune_defaulted;
4728 int orig_arch_specified = ix86_arch_specified;
4729 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4732 struct cl_target_option *def
4733 = TREE_TARGET_OPTION (target_option_default_node);
4735 /* Process each of the options on the chain. */
4736 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4739 /* If the changed options are different from the default, rerun
4740 ix86_option_override_internal, and then save the options away.
4741 The string options are are attribute options, and will be undone
4742 when we copy the save structure. */
4743 if (ix86_isa_flags != def->x_ix86_isa_flags
4744 || target_flags != def->x_target_flags
4745 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4746 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4747 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4749 /* If we are using the default tune= or arch=, undo the string assigned,
4750 and use the default. */
4751 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4752 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4753 else if (!orig_arch_specified)
4754 ix86_arch_string = NULL;
4756 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4757 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4758 else if (orig_tune_defaulted)
4759 ix86_tune_string = NULL;
4761 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4762 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4763 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4764 else if (!TARGET_64BIT && TARGET_SSE)
4765 ix86_fpmath_string = "sse,387";
4767 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4768 ix86_option_override_internal (false);
4770 /* Add any builtin functions with the new isa if any. */
4771 ix86_add_new_builtins (ix86_isa_flags);
4773 /* Save the current options unless we are validating options for
4775 t = build_target_option_node ();
4777 ix86_arch_string = orig_arch_string;
4778 ix86_tune_string = orig_tune_string;
4779 ix86_fpmath_string = orig_fpmath_string;
4781 /* Free up memory allocated to hold the strings */
4782 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4783 if (option_strings[i])
4784 free (option_strings[i]);
4790 /* Hook to validate attribute((target("string"))). */
4793 ix86_valid_target_attribute_p (tree fndecl,
4794 tree ARG_UNUSED (name),
4796 int ARG_UNUSED (flags))
4798 struct cl_target_option cur_target;
4800 tree old_optimize = build_optimization_node ();
4801 tree new_target, new_optimize;
4802 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4804 /* If the function changed the optimization levels as well as setting target
4805 options, start with the optimizations specified. */
4806 if (func_optimize && func_optimize != old_optimize)
4807 cl_optimization_restore (&global_options,
4808 TREE_OPTIMIZATION (func_optimize));
4810 /* The target attributes may also change some optimization flags, so update
4811 the optimization options if necessary. */
4812 cl_target_option_save (&cur_target, &global_options);
4813 new_target = ix86_valid_target_attribute_tree (args);
4814 new_optimize = build_optimization_node ();
4821 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4823 if (old_optimize != new_optimize)
4824 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4827 cl_target_option_restore (&global_options, &cur_target);
4829 if (old_optimize != new_optimize)
4830 cl_optimization_restore (&global_options,
4831 TREE_OPTIMIZATION (old_optimize));
4837 /* Hook to determine if one function can safely inline another. */
4840 ix86_can_inline_p (tree caller, tree callee)
4843 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4844 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4846 /* If callee has no option attributes, then it is ok to inline. */
4850 /* If caller has no option attributes, but callee does then it is not ok to
4852 else if (!caller_tree)
4857 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4858 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4860 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4861 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4863 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4864 != callee_opts->x_ix86_isa_flags)
4867 /* See if we have the same non-isa options. */
4868 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4871 /* See if arch, tune, etc. are the same. */
4872 else if (caller_opts->arch != callee_opts->arch)
4875 else if (caller_opts->tune != callee_opts->tune)
4878 else if (caller_opts->fpmath != callee_opts->fpmath)
4881 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4892 /* Remember the last target of ix86_set_current_function. */
4893 static GTY(()) tree ix86_previous_fndecl;
4895 /* Establish appropriate back-end context for processing the function
4896 FNDECL. The argument might be NULL to indicate processing at top
4897 level, outside of any function scope. */
4899 ix86_set_current_function (tree fndecl)
4901 /* Only change the context if the function changes. This hook is called
4902 several times in the course of compiling a function, and we don't want to
4903 slow things down too much or call target_reinit when it isn't safe. */
4904 if (fndecl && fndecl != ix86_previous_fndecl)
4906 tree old_tree = (ix86_previous_fndecl
4907 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4910 tree new_tree = (fndecl
4911 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4914 ix86_previous_fndecl = fndecl;
4915 if (old_tree == new_tree)
4920 cl_target_option_restore (&global_options,
4921 TREE_TARGET_OPTION (new_tree));
4927 struct cl_target_option *def
4928 = TREE_TARGET_OPTION (target_option_current_node);
4930 cl_target_option_restore (&global_options, def);
4937 /* Return true if this goes in large data/bss. */
4940 ix86_in_large_data_p (tree exp)
4942 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4945 /* Functions are never large data. */
4946 if (TREE_CODE (exp) == FUNCTION_DECL)
4949 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4951 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4952 if (strcmp (section, ".ldata") == 0
4953 || strcmp (section, ".lbss") == 0)
4959 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4961 /* If this is an incomplete type with size 0, then we can't put it
4962 in data because it might be too big when completed. */
4963 if (!size || size > ix86_section_threshold)
4970 /* Switch to the appropriate section for output of DECL.
4971 DECL is either a `VAR_DECL' node or a constant of some sort.
4972 RELOC indicates whether forming the initial value of DECL requires
4973 link-time relocations. */
4975 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4979 x86_64_elf_select_section (tree decl, int reloc,
4980 unsigned HOST_WIDE_INT align)
4982 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4983 && ix86_in_large_data_p (decl))
4985 const char *sname = NULL;
4986 unsigned int flags = SECTION_WRITE;
4987 switch (categorize_decl_for_section (decl, reloc))
4992 case SECCAT_DATA_REL:
4993 sname = ".ldata.rel";
4995 case SECCAT_DATA_REL_LOCAL:
4996 sname = ".ldata.rel.local";
4998 case SECCAT_DATA_REL_RO:
4999 sname = ".ldata.rel.ro";
5001 case SECCAT_DATA_REL_RO_LOCAL:
5002 sname = ".ldata.rel.ro.local";
5006 flags |= SECTION_BSS;
5009 case SECCAT_RODATA_MERGE_STR:
5010 case SECCAT_RODATA_MERGE_STR_INIT:
5011 case SECCAT_RODATA_MERGE_CONST:
5015 case SECCAT_SRODATA:
5022 /* We don't split these for medium model. Place them into
5023 default sections and hope for best. */
5028 /* We might get called with string constants, but get_named_section
5029 doesn't like them as they are not DECLs. Also, we need to set
5030 flags in that case. */
5032 return get_section (sname, flags, NULL);
5033 return get_named_section (decl, sname, reloc);
5036 return default_elf_select_section (decl, reloc, align);
5039 /* Build up a unique section name, expressed as a
5040 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5041 RELOC indicates whether the initial value of EXP requires
5042 link-time relocations. */
5044 static void ATTRIBUTE_UNUSED
5045 x86_64_elf_unique_section (tree decl, int reloc)
5047 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5048 && ix86_in_large_data_p (decl))
5050 const char *prefix = NULL;
5051 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5052 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
5054 switch (categorize_decl_for_section (decl, reloc))
5057 case SECCAT_DATA_REL:
5058 case SECCAT_DATA_REL_LOCAL:
5059 case SECCAT_DATA_REL_RO:
5060 case SECCAT_DATA_REL_RO_LOCAL:
5061 prefix = one_only ? ".ld" : ".ldata";
5064 prefix = one_only ? ".lb" : ".lbss";
5067 case SECCAT_RODATA_MERGE_STR:
5068 case SECCAT_RODATA_MERGE_STR_INIT:
5069 case SECCAT_RODATA_MERGE_CONST:
5070 prefix = one_only ? ".lr" : ".lrodata";
5072 case SECCAT_SRODATA:
5079 /* We don't split these for medium model. Place them into
5080 default sections and hope for best. */
5085 const char *name, *linkonce;
5088 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5089 name = targetm.strip_name_encoding (name);
5091 /* If we're using one_only, then there needs to be a .gnu.linkonce
5092 prefix to the section name. */
5093 linkonce = one_only ? ".gnu.linkonce" : "";
5095 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5097 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5101 default_unique_section (decl, reloc);
5104 #ifdef COMMON_ASM_OP
5105 /* This says how to output assembler code to declare an
5106 uninitialized external linkage data object.
5108 For medium model x86-64 we need to use .largecomm opcode for
5111 x86_elf_aligned_common (FILE *file,
5112 const char *name, unsigned HOST_WIDE_INT size,
5115 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5116 && size > (unsigned int)ix86_section_threshold)
5117 fputs (".largecomm\t", file);
5119 fputs (COMMON_ASM_OP, file);
5120 assemble_name (file, name);
5121 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5122 size, align / BITS_PER_UNIT);
5126 /* Utility function for targets to use in implementing
5127 ASM_OUTPUT_ALIGNED_BSS. */
5130 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5131 const char *name, unsigned HOST_WIDE_INT size,
5134 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5135 && size > (unsigned int)ix86_section_threshold)
5136 switch_to_section (get_named_section (decl, ".lbss", 0));
5138 switch_to_section (bss_section);
5139 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5140 #ifdef ASM_DECLARE_OBJECT_NAME
5141 last_assemble_variable_decl = decl;
5142 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5144 /* Standard thing is just output label for the object. */
5145 ASM_OUTPUT_LABEL (file, name);
5146 #endif /* ASM_DECLARE_OBJECT_NAME */
5147 ASM_OUTPUT_SKIP (file, size ? size : 1);
5150 static const struct default_options ix86_option_optimization_table[] =
5152 /* Turn off -fschedule-insns by default. It tends to make the
5153 problem with not enough registers even worse. */
5154 #ifdef INSN_SCHEDULING
5155 { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
5158 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
5159 SUBTARGET_OPTIMIZATION_OPTIONS,
5161 { OPT_LEVELS_NONE, 0, NULL, 0 }
5164 /* Implement TARGET_OPTION_INIT_STRUCT. */
5167 ix86_option_init_struct (struct gcc_options *opts)
5170 /* The Darwin libraries never set errno, so we might as well
5171 avoid calling them when that's the only reason we would. */
5172 opts->x_flag_errno_math = 0;
5174 opts->x_flag_pcc_struct_return = 2;
5175 opts->x_flag_asynchronous_unwind_tables = 2;
5176 opts->x_flag_vect_cost_model = 1;
5179 /* Decide whether we must probe the stack before any space allocation
5180 on this target. It's essentially TARGET_STACK_PROBE except when
5181 -fstack-check causes the stack to be already probed differently. */
5184 ix86_target_stack_probe (void)
5186 /* Do not probe the stack twice if static stack checking is enabled. */
5187 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5190 return TARGET_STACK_PROBE;
5193 /* Decide whether we can make a sibling call to a function. DECL is the
5194 declaration of the function being targeted by the call and EXP is the
5195 CALL_EXPR representing the call. */
5198 ix86_function_ok_for_sibcall (tree decl, tree exp)
5200 tree type, decl_or_type;
5203 /* If we are generating position-independent code, we cannot sibcall
5204 optimize any indirect call, or a direct call to a global function,
5205 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5209 && (!decl || !targetm.binds_local_p (decl)))
5212 /* If we need to align the outgoing stack, then sibcalling would
5213 unalign the stack, which may break the called function. */
5214 if (ix86_minimum_incoming_stack_boundary (true)
5215 < PREFERRED_STACK_BOUNDARY)
5220 decl_or_type = decl;
5221 type = TREE_TYPE (decl);
5225 /* We're looking at the CALL_EXPR, we need the type of the function. */
5226 type = CALL_EXPR_FN (exp); /* pointer expression */
5227 type = TREE_TYPE (type); /* pointer type */
5228 type = TREE_TYPE (type); /* function type */
5229 decl_or_type = type;
5232 /* Check that the return value locations are the same. Like
5233 if we are returning floats on the 80387 register stack, we cannot
5234 make a sibcall from a function that doesn't return a float to a
5235 function that does or, conversely, from a function that does return
5236 a float to a function that doesn't; the necessary stack adjustment
5237 would not be executed. This is also the place we notice
5238 differences in the return value ABI. Note that it is ok for one
5239 of the functions to have void return type as long as the return
5240 value of the other is passed in a register. */
5241 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5242 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5244 if (STACK_REG_P (a) || STACK_REG_P (b))
5246 if (!rtx_equal_p (a, b))
5249 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5251 /* Disable sibcall if we need to generate vzeroupper after
5253 if (TARGET_VZEROUPPER
5254 && cfun->machine->callee_return_avx256_p
5255 && !cfun->machine->caller_return_avx256_p)
5258 else if (!rtx_equal_p (a, b))
5263 /* The SYSV ABI has more call-clobbered registers;
5264 disallow sibcalls from MS to SYSV. */
5265 if (cfun->machine->call_abi == MS_ABI
5266 && ix86_function_type_abi (type) == SYSV_ABI)
5271 /* If this call is indirect, we'll need to be able to use a
5272 call-clobbered register for the address of the target function.
5273 Make sure that all such registers are not used for passing
5274 parameters. Note that DLLIMPORT functions are indirect. */
5276 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5278 if (ix86_function_regparm (type, NULL) >= 3)
5280 /* ??? Need to count the actual number of registers to be used,
5281 not the possible number of registers. Fix later. */
5287 /* Otherwise okay. That also includes certain types of indirect calls. */
5291 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5292 and "sseregparm" calling convention attributes;
5293 arguments as in struct attribute_spec.handler. */
5296 ix86_handle_cconv_attribute (tree *node, tree name,
5298 int flags ATTRIBUTE_UNUSED,
5301 if (TREE_CODE (*node) != FUNCTION_TYPE
5302 && TREE_CODE (*node) != METHOD_TYPE
5303 && TREE_CODE (*node) != FIELD_DECL
5304 && TREE_CODE (*node) != TYPE_DECL)
5306 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5308 *no_add_attrs = true;
5312 /* Can combine regparm with all attributes but fastcall. */
5313 if (is_attribute_p ("regparm", name))
5317 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5319 error ("fastcall and regparm attributes are not compatible");
5322 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5324 error ("regparam and thiscall attributes are not compatible");
5327 cst = TREE_VALUE (args);
5328 if (TREE_CODE (cst) != INTEGER_CST)
5330 warning (OPT_Wattributes,
5331 "%qE attribute requires an integer constant argument",
5333 *no_add_attrs = true;
5335 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5337 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5339 *no_add_attrs = true;
5347 /* Do not warn when emulating the MS ABI. */
5348 if ((TREE_CODE (*node) != FUNCTION_TYPE
5349 && TREE_CODE (*node) != METHOD_TYPE)
5350 || ix86_function_type_abi (*node) != MS_ABI)
5351 warning (OPT_Wattributes, "%qE attribute ignored",
5353 *no_add_attrs = true;
5357 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5358 if (is_attribute_p ("fastcall", name))
5360 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5362 error ("fastcall and cdecl attributes are not compatible");
5364 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5366 error ("fastcall and stdcall attributes are not compatible");
5368 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5370 error ("fastcall and regparm attributes are not compatible");
5372 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5374 error ("fastcall and thiscall attributes are not compatible");
5378 /* Can combine stdcall with fastcall (redundant), regparm and
5380 else if (is_attribute_p ("stdcall", name))
5382 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5384 error ("stdcall and cdecl attributes are not compatible");
5386 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5388 error ("stdcall and fastcall attributes are not compatible");
5390 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5392 error ("stdcall and thiscall attributes are not compatible");
5396 /* Can combine cdecl with regparm and sseregparm. */
5397 else if (is_attribute_p ("cdecl", name))
5399 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5401 error ("stdcall and cdecl attributes are not compatible");
5403 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5405 error ("fastcall and cdecl attributes are not compatible");
5407 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5409 error ("cdecl and thiscall attributes are not compatible");
5412 else if (is_attribute_p ("thiscall", name))
5414 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5415 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5417 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5419 error ("stdcall and thiscall attributes are not compatible");
5421 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5423 error ("fastcall and thiscall attributes are not compatible");
5425 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5427 error ("cdecl and thiscall attributes are not compatible");
5431 /* Can combine sseregparm with all attributes. */
5436 /* Return 0 if the attributes for two types are incompatible, 1 if they
5437 are compatible, and 2 if they are nearly compatible (which causes a
5438 warning to be generated). */
5441 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5443 /* Check for mismatch of non-default calling convention. */
5444 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
5446 if (TREE_CODE (type1) != FUNCTION_TYPE
5447 && TREE_CODE (type1) != METHOD_TYPE)
5450 /* Check for mismatched fastcall/regparm types. */
5451 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
5452 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
5453 || (ix86_function_regparm (type1, NULL)
5454 != ix86_function_regparm (type2, NULL)))
5457 /* Check for mismatched sseregparm types. */
5458 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
5459 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
5462 /* Check for mismatched thiscall types. */
5463 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
5464 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
5467 /* Check for mismatched return types (cdecl vs stdcall). */
5468 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
5469 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
5475 /* Return the regparm value for a function with the indicated TYPE and DECL.
5476 DECL may be NULL when calling function indirectly
5477 or considering a libcall. */
5480 ix86_function_regparm (const_tree type, const_tree decl)
5486 return (ix86_function_type_abi (type) == SYSV_ABI
5487 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5489 regparm = ix86_regparm;
5490 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5493 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5497 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
5500 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
5503 /* Use register calling convention for local functions when possible. */
5505 && TREE_CODE (decl) == FUNCTION_DECL
5507 && !(profile_flag && !flag_fentry))
5509 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5510 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5511 if (i && i->local && i->can_change_signature)
5513 int local_regparm, globals = 0, regno;
5515 /* Make sure no regparm register is taken by a
5516 fixed register variable. */
5517 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5518 if (fixed_regs[local_regparm])
5521 /* We don't want to use regparm(3) for nested functions as
5522 these use a static chain pointer in the third argument. */
5523 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5526 /* In 32-bit mode save a register for the split stack. */
5527 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5530 /* Each fixed register usage increases register pressure,
5531 so less registers should be used for argument passing.
5532 This functionality can be overriden by an explicit
5534 for (regno = 0; regno <= DI_REG; regno++)
5535 if (fixed_regs[regno])
5539 = globals < local_regparm ? local_regparm - globals : 0;
5541 if (local_regparm > regparm)
5542 regparm = local_regparm;
5549 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5550 DFmode (2) arguments in SSE registers for a function with the
5551 indicated TYPE and DECL. DECL may be NULL when calling function
5552 indirectly or considering a libcall. Otherwise return 0. */
5555 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5557 gcc_assert (!TARGET_64BIT);
5559 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5560 by the sseregparm attribute. */
5561 if (TARGET_SSEREGPARM
5562 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5569 error ("calling %qD with attribute sseregparm without "
5570 "SSE/SSE2 enabled", decl);
5572 error ("calling %qT with attribute sseregparm without "
5573 "SSE/SSE2 enabled", type);
5581 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5582 (and DFmode for SSE2) arguments in SSE registers. */
5583 if (decl && TARGET_SSE_MATH && optimize
5584 && !(profile_flag && !flag_fentry))
5586 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5587 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5588 if (i && i->local && i->can_change_signature)
5589 return TARGET_SSE2 ? 2 : 1;
5595 /* Return true if EAX is live at the start of the function. Used by
5596 ix86_expand_prologue to determine if we need special help before
5597 calling allocate_stack_worker. */
5600 ix86_eax_live_at_start_p (void)
5602 /* Cheat. Don't bother working forward from ix86_function_regparm
5603 to the function type to whether an actual argument is located in
5604 eax. Instead just look at cfg info, which is still close enough
5605 to correct at this point. This gives false positives for broken
5606 functions that might use uninitialized data that happens to be
5607 allocated in eax, but who cares? */
5608 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5612 ix86_keep_aggregate_return_pointer (tree fntype)
5616 attr = lookup_attribute ("callee_pop_aggregate_return",
5617 TYPE_ATTRIBUTES (fntype));
5619 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5621 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5624 /* Value is the number of bytes of arguments automatically
5625 popped when returning from a subroutine call.
5626 FUNDECL is the declaration node of the function (as a tree),
5627 FUNTYPE is the data type of the function (as a tree),
5628 or for a library call it is an identifier node for the subroutine name.
5629 SIZE is the number of bytes of arguments passed on the stack.
5631 On the 80386, the RTD insn may be used to pop them if the number
5632 of args is fixed, but if the number is variable then the caller
5633 must pop them all. RTD can't be used for library calls now
5634 because the library is compiled with the Unix compiler.
5635 Use of RTD is a selectable option, since it is incompatible with
5636 standard Unix calling sequences. If the option is not selected,
5637 the caller must always pop the args.
5639 The attribute stdcall is equivalent to RTD on a per module basis. */
5642 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5646 /* None of the 64-bit ABIs pop arguments. */
5650 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
5652 /* Cdecl functions override -mrtd, and never pop the stack. */
5653 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5655 /* Stdcall and fastcall functions will pop the stack if not
5657 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5658 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5659 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5662 if (rtd && ! stdarg_p (funtype))
5666 /* Lose any fake structure return argument if it is passed on the stack. */
5667 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5668 && !ix86_keep_aggregate_return_pointer (funtype))
5670 int nregs = ix86_function_regparm (funtype, fundecl);
5672 return GET_MODE_SIZE (Pmode);
5678 /* Argument support functions. */
5680 /* Return true when register may be used to pass function parameters. */
5682 ix86_function_arg_regno_p (int regno)
5685 const int *parm_regs;
5690 return (regno < REGPARM_MAX
5691 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5693 return (regno < REGPARM_MAX
5694 || (TARGET_MMX && MMX_REGNO_P (regno)
5695 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5696 || (TARGET_SSE && SSE_REGNO_P (regno)
5697 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5702 if (SSE_REGNO_P (regno) && TARGET_SSE)
5707 if (TARGET_SSE && SSE_REGNO_P (regno)
5708 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5712 /* TODO: The function should depend on current function ABI but
5713 builtins.c would need updating then. Therefore we use the
5716 /* RAX is used as hidden argument to va_arg functions. */
5717 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5720 if (ix86_abi == MS_ABI)
5721 parm_regs = x86_64_ms_abi_int_parameter_registers;
5723 parm_regs = x86_64_int_parameter_registers;
5724 for (i = 0; i < (ix86_abi == MS_ABI
5725 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5726 if (regno == parm_regs[i])
5731 /* Return if we do not know how to pass TYPE solely in registers. */
5734 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5736 if (must_pass_in_stack_var_size_or_pad (mode, type))
5739 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5740 The layout_type routine is crafty and tries to trick us into passing
5741 currently unsupported vector types on the stack by using TImode. */
5742 return (!TARGET_64BIT && mode == TImode
5743 && type && TREE_CODE (type) != VECTOR_TYPE);
5746 /* It returns the size, in bytes, of the area reserved for arguments passed
5747 in registers for the function represented by fndecl dependent to the used
5750 ix86_reg_parm_stack_space (const_tree fndecl)
5752 enum calling_abi call_abi = SYSV_ABI;
5753 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5754 call_abi = ix86_function_abi (fndecl);
5756 call_abi = ix86_function_type_abi (fndecl);
5757 if (call_abi == MS_ABI)
5762 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5765 ix86_function_type_abi (const_tree fntype)
5767 if (TARGET_64BIT && fntype != NULL)
5769 enum calling_abi abi = ix86_abi;
5770 if (abi == SYSV_ABI)
5772 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5775 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5783 ix86_function_ms_hook_prologue (const_tree fn)
5785 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5787 if (decl_function_context (fn) != NULL_TREE)
5788 error_at (DECL_SOURCE_LOCATION (fn),
5789 "ms_hook_prologue is not compatible with nested function");
5796 static enum calling_abi
5797 ix86_function_abi (const_tree fndecl)
5801 return ix86_function_type_abi (TREE_TYPE (fndecl));
5804 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5807 ix86_cfun_abi (void)
5809 if (! cfun || ! TARGET_64BIT)
5811 return cfun->machine->call_abi;
5814 /* Write the extra assembler code needed to declare a function properly. */
5817 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5820 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5824 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5825 unsigned int filler_cc = 0xcccccccc;
5827 for (i = 0; i < filler_count; i += 4)
5828 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5831 #ifdef SUBTARGET_ASM_UNWIND_INIT
5832 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5835 ASM_OUTPUT_LABEL (asm_out_file, fname);
5837 /* Output magic byte marker, if hot-patch attribute is set. */
5842 /* leaq [%rsp + 0], %rsp */
5843 asm_fprintf (asm_out_file, ASM_BYTE
5844 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5848 /* movl.s %edi, %edi
5850 movl.s %esp, %ebp */
5851 asm_fprintf (asm_out_file, ASM_BYTE
5852 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5858 extern void init_regs (void);
5860 /* Implementation of call abi switching target hook. Specific to FNDECL
5861 the specific call register sets are set. See also
5862 ix86_conditional_register_usage for more details. */
5864 ix86_call_abi_override (const_tree fndecl)
5866 if (fndecl == NULL_TREE)
5867 cfun->machine->call_abi = ix86_abi;
5869 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5872 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5873 re-initialization of init_regs each time we switch function context since
5874 this is needed only during RTL expansion. */
5876 ix86_maybe_switch_abi (void)
5879 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5883 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5884 for a call to a function whose data type is FNTYPE.
5885 For a library call, FNTYPE is 0. */
5888 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5889 tree fntype, /* tree ptr for function decl */
5890 rtx libname, /* SYMBOL_REF of library name or 0 */
5894 struct cgraph_local_info *i;
5897 memset (cum, 0, sizeof (*cum));
5899 /* Initialize for the current callee. */
5902 cfun->machine->callee_pass_avx256_p = false;
5903 cfun->machine->callee_return_avx256_p = false;
5908 i = cgraph_local_info (fndecl);
5909 cum->call_abi = ix86_function_abi (fndecl);
5910 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5915 cum->call_abi = ix86_function_type_abi (fntype);
5917 fnret_type = TREE_TYPE (fntype);
5922 if (TARGET_VZEROUPPER && fnret_type)
5924 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5926 if (function_pass_avx256_p (fnret_value))
5928 /* The return value of this function uses 256bit AVX modes. */
5930 cfun->machine->callee_return_avx256_p = true;
5932 cfun->machine->caller_return_avx256_p = true;
5936 cum->caller = caller;
5938 /* Set up the number of registers to use for passing arguments. */
5940 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5941 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5942 "or subtarget optimization implying it");
5943 cum->nregs = ix86_regparm;
5946 cum->nregs = (cum->call_abi == SYSV_ABI
5947 ? X86_64_REGPARM_MAX
5948 : X86_64_MS_REGPARM_MAX);
5952 cum->sse_nregs = SSE_REGPARM_MAX;
5955 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5956 ? X86_64_SSE_REGPARM_MAX
5957 : X86_64_MS_SSE_REGPARM_MAX);
5961 cum->mmx_nregs = MMX_REGPARM_MAX;
5962 cum->warn_avx = true;
5963 cum->warn_sse = true;
5964 cum->warn_mmx = true;
5966 /* Because type might mismatch in between caller and callee, we need to
5967 use actual type of function for local calls.
5968 FIXME: cgraph_analyze can be told to actually record if function uses
5969 va_start so for local functions maybe_vaarg can be made aggressive
5971 FIXME: once typesytem is fixed, we won't need this code anymore. */
5972 if (i && i->local && i->can_change_signature)
5973 fntype = TREE_TYPE (fndecl);
5974 cum->maybe_vaarg = (fntype
5975 ? (!prototype_p (fntype) || stdarg_p (fntype))
5980 /* If there are variable arguments, then we won't pass anything
5981 in registers in 32-bit mode. */
5982 if (stdarg_p (fntype))
5993 /* Use ecx and edx registers if function has fastcall attribute,
5994 else look for regparm information. */
5997 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
6000 cum->fastcall = 1; /* Same first register as in fastcall. */
6002 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
6008 cum->nregs = ix86_function_regparm (fntype, fndecl);
6011 /* Set up the number of SSE registers used for passing SFmode
6012 and DFmode arguments. Warn for mismatching ABI. */
6013 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6017 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6018 But in the case of vector types, it is some vector mode.
6020 When we have only some of our vector isa extensions enabled, then there
6021 are some modes for which vector_mode_supported_p is false. For these
6022 modes, the generic vector support in gcc will choose some non-vector mode
6023 in order to implement the type. By computing the natural mode, we'll
6024 select the proper ABI location for the operand and not depend on whatever
6025 the middle-end decides to do with these vector types.
6027 The midde-end can't deal with the vector types > 16 bytes. In this
6028 case, we return the original mode and warn ABI change if CUM isn't
6031 static enum machine_mode
6032 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
6034 enum machine_mode mode = TYPE_MODE (type);
6036 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6038 HOST_WIDE_INT size = int_size_in_bytes (type);
6039 if ((size == 8 || size == 16 || size == 32)
6040 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6041 && TYPE_VECTOR_SUBPARTS (type) > 1)
6043 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6045 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6046 mode = MIN_MODE_VECTOR_FLOAT;
6048 mode = MIN_MODE_VECTOR_INT;
6050 /* Get the mode which has this inner mode and number of units. */
6051 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6052 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6053 && GET_MODE_INNER (mode) == innermode)
6055 if (size == 32 && !TARGET_AVX)
6057 static bool warnedavx;
6064 warning (0, "AVX vector argument without AVX "
6065 "enabled changes the ABI");
6067 return TYPE_MODE (type);
6080 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6081 this may not agree with the mode that the type system has chosen for the
6082 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6083 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6086 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6091 if (orig_mode != BLKmode)
6092 tmp = gen_rtx_REG (orig_mode, regno);
6095 tmp = gen_rtx_REG (mode, regno);
6096 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6097 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6103 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6104 of this code is to classify each 8bytes of incoming argument by the register
6105 class and assign registers accordingly. */
6107 /* Return the union class of CLASS1 and CLASS2.
6108 See the x86-64 PS ABI for details. */
6110 static enum x86_64_reg_class
6111 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6113 /* Rule #1: If both classes are equal, this is the resulting class. */
6114 if (class1 == class2)
6117 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6119 if (class1 == X86_64_NO_CLASS)
6121 if (class2 == X86_64_NO_CLASS)
6124 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6125 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6126 return X86_64_MEMORY_CLASS;
6128 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6129 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6130 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6131 return X86_64_INTEGERSI_CLASS;
6132 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6133 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6134 return X86_64_INTEGER_CLASS;
6136 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6138 if (class1 == X86_64_X87_CLASS
6139 || class1 == X86_64_X87UP_CLASS
6140 || class1 == X86_64_COMPLEX_X87_CLASS
6141 || class2 == X86_64_X87_CLASS
6142 || class2 == X86_64_X87UP_CLASS
6143 || class2 == X86_64_COMPLEX_X87_CLASS)
6144 return X86_64_MEMORY_CLASS;
6146 /* Rule #6: Otherwise class SSE is used. */
6147 return X86_64_SSE_CLASS;
6150 /* Classify the argument of type TYPE and mode MODE.
6151 CLASSES will be filled by the register class used to pass each word
6152 of the operand. The number of words is returned. In case the parameter
6153 should be passed in memory, 0 is returned. As a special case for zero
6154 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6156 BIT_OFFSET is used internally for handling records and specifies offset
6157 of the offset in bits modulo 256 to avoid overflow cases.
6159 See the x86-64 PS ABI for details.
6163 classify_argument (enum machine_mode mode, const_tree type,
6164 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6166 HOST_WIDE_INT bytes =
6167 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6168 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6170 /* Variable sized entities are always passed/returned in memory. */
6174 if (mode != VOIDmode
6175 && targetm.calls.must_pass_in_stack (mode, type))
6178 if (type && AGGREGATE_TYPE_P (type))
6182 enum x86_64_reg_class subclasses[MAX_CLASSES];
6184 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6188 for (i = 0; i < words; i++)
6189 classes[i] = X86_64_NO_CLASS;
6191 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6192 signalize memory class, so handle it as special case. */
6195 classes[0] = X86_64_NO_CLASS;
6199 /* Classify each field of record and merge classes. */
6200 switch (TREE_CODE (type))
6203 /* And now merge the fields of structure. */
6204 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6206 if (TREE_CODE (field) == FIELD_DECL)
6210 if (TREE_TYPE (field) == error_mark_node)
6213 /* Bitfields are always classified as integer. Handle them
6214 early, since later code would consider them to be
6215 misaligned integers. */
6216 if (DECL_BIT_FIELD (field))
6218 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6219 i < ((int_bit_position (field) + (bit_offset % 64))
6220 + tree_low_cst (DECL_SIZE (field), 0)
6223 merge_classes (X86_64_INTEGER_CLASS,
6230 type = TREE_TYPE (field);
6232 /* Flexible array member is ignored. */
6233 if (TYPE_MODE (type) == BLKmode
6234 && TREE_CODE (type) == ARRAY_TYPE
6235 && TYPE_SIZE (type) == NULL_TREE
6236 && TYPE_DOMAIN (type) != NULL_TREE
6237 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6242 if (!warned && warn_psabi)
6245 inform (input_location,
6246 "the ABI of passing struct with"
6247 " a flexible array member has"
6248 " changed in GCC 4.4");
6252 num = classify_argument (TYPE_MODE (type), type,
6254 (int_bit_position (field)
6255 + bit_offset) % 256);
6258 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6259 for (i = 0; i < num && (i + pos) < words; i++)
6261 merge_classes (subclasses[i], classes[i + pos]);
6268 /* Arrays are handled as small records. */
6271 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6272 TREE_TYPE (type), subclasses, bit_offset);
6276 /* The partial classes are now full classes. */
6277 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6278 subclasses[0] = X86_64_SSE_CLASS;
6279 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6280 && !((bit_offset % 64) == 0 && bytes == 4))
6281 subclasses[0] = X86_64_INTEGER_CLASS;
6283 for (i = 0; i < words; i++)
6284 classes[i] = subclasses[i % num];
6289 case QUAL_UNION_TYPE:
6290 /* Unions are similar to RECORD_TYPE but offset is always 0.
6292 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6294 if (TREE_CODE (field) == FIELD_DECL)
6298 if (TREE_TYPE (field) == error_mark_node)
6301 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6302 TREE_TYPE (field), subclasses,
6306 for (i = 0; i < num; i++)
6307 classes[i] = merge_classes (subclasses[i], classes[i]);
6318 /* When size > 16 bytes, if the first one isn't
6319 X86_64_SSE_CLASS or any other ones aren't
6320 X86_64_SSEUP_CLASS, everything should be passed in
6322 if (classes[0] != X86_64_SSE_CLASS)
6325 for (i = 1; i < words; i++)
6326 if (classes[i] != X86_64_SSEUP_CLASS)
6330 /* Final merger cleanup. */
6331 for (i = 0; i < words; i++)
6333 /* If one class is MEMORY, everything should be passed in
6335 if (classes[i] == X86_64_MEMORY_CLASS)
6338 /* The X86_64_SSEUP_CLASS should be always preceded by
6339 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6340 if (classes[i] == X86_64_SSEUP_CLASS
6341 && classes[i - 1] != X86_64_SSE_CLASS
6342 && classes[i - 1] != X86_64_SSEUP_CLASS)
6344 /* The first one should never be X86_64_SSEUP_CLASS. */
6345 gcc_assert (i != 0);
6346 classes[i] = X86_64_SSE_CLASS;
6349 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6350 everything should be passed in memory. */
6351 if (classes[i] == X86_64_X87UP_CLASS
6352 && (classes[i - 1] != X86_64_X87_CLASS))
6356 /* The first one should never be X86_64_X87UP_CLASS. */
6357 gcc_assert (i != 0);
6358 if (!warned && warn_psabi)
6361 inform (input_location,
6362 "the ABI of passing union with long double"
6363 " has changed in GCC 4.4");
6371 /* Compute alignment needed. We align all types to natural boundaries with
6372 exception of XFmode that is aligned to 64bits. */
6373 if (mode != VOIDmode && mode != BLKmode)
6375 int mode_alignment = GET_MODE_BITSIZE (mode);
6378 mode_alignment = 128;
6379 else if (mode == XCmode)
6380 mode_alignment = 256;
6381 if (COMPLEX_MODE_P (mode))
6382 mode_alignment /= 2;
6383 /* Misaligned fields are always returned in memory. */
6384 if (bit_offset % mode_alignment)
6388 /* for V1xx modes, just use the base mode */
6389 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6390 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6391 mode = GET_MODE_INNER (mode);
6393 /* Classification of atomic types. */
6398 classes[0] = X86_64_SSE_CLASS;
6401 classes[0] = X86_64_SSE_CLASS;
6402 classes[1] = X86_64_SSEUP_CLASS;
6412 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6416 classes[0] = X86_64_INTEGERSI_CLASS;
6419 else if (size <= 64)
6421 classes[0] = X86_64_INTEGER_CLASS;
6424 else if (size <= 64+32)
6426 classes[0] = X86_64_INTEGER_CLASS;
6427 classes[1] = X86_64_INTEGERSI_CLASS;
6430 else if (size <= 64+64)
6432 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6440 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6444 /* OImode shouldn't be used directly. */
6449 if (!(bit_offset % 64))
6450 classes[0] = X86_64_SSESF_CLASS;
6452 classes[0] = X86_64_SSE_CLASS;
6455 classes[0] = X86_64_SSEDF_CLASS;
6458 classes[0] = X86_64_X87_CLASS;
6459 classes[1] = X86_64_X87UP_CLASS;
6462 classes[0] = X86_64_SSE_CLASS;
6463 classes[1] = X86_64_SSEUP_CLASS;
6466 classes[0] = X86_64_SSE_CLASS;
6467 if (!(bit_offset % 64))
6473 if (!warned && warn_psabi)
6476 inform (input_location,
6477 "the ABI of passing structure with complex float"
6478 " member has changed in GCC 4.4");
6480 classes[1] = X86_64_SSESF_CLASS;
6484 classes[0] = X86_64_SSEDF_CLASS;
6485 classes[1] = X86_64_SSEDF_CLASS;
6488 classes[0] = X86_64_COMPLEX_X87_CLASS;
6491 /* This modes is larger than 16 bytes. */
6499 classes[0] = X86_64_SSE_CLASS;
6500 classes[1] = X86_64_SSEUP_CLASS;
6501 classes[2] = X86_64_SSEUP_CLASS;
6502 classes[3] = X86_64_SSEUP_CLASS;
6510 classes[0] = X86_64_SSE_CLASS;
6511 classes[1] = X86_64_SSEUP_CLASS;
6519 classes[0] = X86_64_SSE_CLASS;
6525 gcc_assert (VECTOR_MODE_P (mode));
6530 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6532 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6533 classes[0] = X86_64_INTEGERSI_CLASS;
6535 classes[0] = X86_64_INTEGER_CLASS;
6536 classes[1] = X86_64_INTEGER_CLASS;
6537 return 1 + (bytes > 8);
6541 /* Examine the argument and return set number of register required in each
6542 class. Return 0 iff parameter should be passed in memory. */
6544 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6545 int *int_nregs, int *sse_nregs)
6547 enum x86_64_reg_class regclass[MAX_CLASSES];
6548 int n = classify_argument (mode, type, regclass, 0);
6554 for (n--; n >= 0; n--)
6555 switch (regclass[n])
6557 case X86_64_INTEGER_CLASS:
6558 case X86_64_INTEGERSI_CLASS:
6561 case X86_64_SSE_CLASS:
6562 case X86_64_SSESF_CLASS:
6563 case X86_64_SSEDF_CLASS:
6566 case X86_64_NO_CLASS:
6567 case X86_64_SSEUP_CLASS:
6569 case X86_64_X87_CLASS:
6570 case X86_64_X87UP_CLASS:
6574 case X86_64_COMPLEX_X87_CLASS:
6575 return in_return ? 2 : 0;
6576 case X86_64_MEMORY_CLASS:
6582 /* Construct container for the argument used by GCC interface. See
6583 FUNCTION_ARG for the detailed description. */
6586 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6587 const_tree type, int in_return, int nintregs, int nsseregs,
6588 const int *intreg, int sse_regno)
6590 /* The following variables hold the static issued_error state. */
6591 static bool issued_sse_arg_error;
6592 static bool issued_sse_ret_error;
6593 static bool issued_x87_ret_error;
6595 enum machine_mode tmpmode;
6597 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6598 enum x86_64_reg_class regclass[MAX_CLASSES];
6602 int needed_sseregs, needed_intregs;
6603 rtx exp[MAX_CLASSES];
6606 n = classify_argument (mode, type, regclass, 0);
6609 if (!examine_argument (mode, type, in_return, &needed_intregs,
6612 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6615 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6616 some less clueful developer tries to use floating-point anyway. */
6617 if (needed_sseregs && !TARGET_SSE)
6621 if (!issued_sse_ret_error)
6623 error ("SSE register return with SSE disabled");
6624 issued_sse_ret_error = true;
6627 else if (!issued_sse_arg_error)
6629 error ("SSE register argument with SSE disabled");
6630 issued_sse_arg_error = true;
6635 /* Likewise, error if the ABI requires us to return values in the
6636 x87 registers and the user specified -mno-80387. */
6637 if (!TARGET_80387 && in_return)
6638 for (i = 0; i < n; i++)
6639 if (regclass[i] == X86_64_X87_CLASS
6640 || regclass[i] == X86_64_X87UP_CLASS
6641 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6643 if (!issued_x87_ret_error)
6645 error ("x87 register return with x87 disabled");
6646 issued_x87_ret_error = true;
6651 /* First construct simple cases. Avoid SCmode, since we want to use
6652 single register to pass this type. */
6653 if (n == 1 && mode != SCmode)
6654 switch (regclass[0])
6656 case X86_64_INTEGER_CLASS:
6657 case X86_64_INTEGERSI_CLASS:
6658 return gen_rtx_REG (mode, intreg[0]);
6659 case X86_64_SSE_CLASS:
6660 case X86_64_SSESF_CLASS:
6661 case X86_64_SSEDF_CLASS:
6662 if (mode != BLKmode)
6663 return gen_reg_or_parallel (mode, orig_mode,
6664 SSE_REGNO (sse_regno));
6666 case X86_64_X87_CLASS:
6667 case X86_64_COMPLEX_X87_CLASS:
6668 return gen_rtx_REG (mode, FIRST_STACK_REG);
6669 case X86_64_NO_CLASS:
6670 /* Zero sized array, struct or class. */
6675 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6676 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6677 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6679 && regclass[0] == X86_64_SSE_CLASS
6680 && regclass[1] == X86_64_SSEUP_CLASS
6681 && regclass[2] == X86_64_SSEUP_CLASS
6682 && regclass[3] == X86_64_SSEUP_CLASS
6684 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6687 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6688 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6689 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6690 && regclass[1] == X86_64_INTEGER_CLASS
6691 && (mode == CDImode || mode == TImode || mode == TFmode)
6692 && intreg[0] + 1 == intreg[1])
6693 return gen_rtx_REG (mode, intreg[0]);
6695 /* Otherwise figure out the entries of the PARALLEL. */
6696 for (i = 0; i < n; i++)
6700 switch (regclass[i])
6702 case X86_64_NO_CLASS:
6704 case X86_64_INTEGER_CLASS:
6705 case X86_64_INTEGERSI_CLASS:
6706 /* Merge TImodes on aligned occasions here too. */
6707 if (i * 8 + 8 > bytes)
6708 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6709 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6713 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6714 if (tmpmode == BLKmode)
6716 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6717 gen_rtx_REG (tmpmode, *intreg),
6721 case X86_64_SSESF_CLASS:
6722 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6723 gen_rtx_REG (SFmode,
6724 SSE_REGNO (sse_regno)),
6728 case X86_64_SSEDF_CLASS:
6729 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6730 gen_rtx_REG (DFmode,
6731 SSE_REGNO (sse_regno)),
6735 case X86_64_SSE_CLASS:
6743 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6753 && regclass[1] == X86_64_SSEUP_CLASS
6754 && regclass[2] == X86_64_SSEUP_CLASS
6755 && regclass[3] == X86_64_SSEUP_CLASS);
6762 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6763 gen_rtx_REG (tmpmode,
6764 SSE_REGNO (sse_regno)),
6773 /* Empty aligned struct, union or class. */
6777 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6778 for (i = 0; i < nexps; i++)
6779 XVECEXP (ret, 0, i) = exp [i];
6783 /* Update the data in CUM to advance over an argument of mode MODE
6784 and data type TYPE. (TYPE is null for libcalls where that information
6785 may not be available.) */
6788 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6789 const_tree type, HOST_WIDE_INT bytes,
6790 HOST_WIDE_INT words)
6806 cum->words += words;
6807 cum->nregs -= words;
6808 cum->regno += words;
6810 if (cum->nregs <= 0)
6818 /* OImode shouldn't be used directly. */
6822 if (cum->float_in_sse < 2)
6825 if (cum->float_in_sse < 1)
6842 if (!type || !AGGREGATE_TYPE_P (type))
6844 cum->sse_words += words;
6845 cum->sse_nregs -= 1;
6846 cum->sse_regno += 1;
6847 if (cum->sse_nregs <= 0)
6861 if (!type || !AGGREGATE_TYPE_P (type))
6863 cum->mmx_words += words;
6864 cum->mmx_nregs -= 1;
6865 cum->mmx_regno += 1;
6866 if (cum->mmx_nregs <= 0)
6877 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6878 const_tree type, HOST_WIDE_INT words, bool named)
6880 int int_nregs, sse_nregs;
6882 /* Unnamed 256bit vector mode parameters are passed on stack. */
6883 if (!named && VALID_AVX256_REG_MODE (mode))
6886 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6887 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6889 cum->nregs -= int_nregs;
6890 cum->sse_nregs -= sse_nregs;
6891 cum->regno += int_nregs;
6892 cum->sse_regno += sse_nregs;
6896 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6897 cum->words = (cum->words + align - 1) & ~(align - 1);
6898 cum->words += words;
6903 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6904 HOST_WIDE_INT words)
6906 /* Otherwise, this should be passed indirect. */
6907 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6909 cum->words += words;
6917 /* Update the data in CUM to advance over an argument of mode MODE and
6918 data type TYPE. (TYPE is null for libcalls where that information
6919 may not be available.) */
6922 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6923 const_tree type, bool named)
6925 HOST_WIDE_INT bytes, words;
6927 if (mode == BLKmode)
6928 bytes = int_size_in_bytes (type);
6930 bytes = GET_MODE_SIZE (mode);
6931 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6934 mode = type_natural_mode (type, NULL);
6936 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6937 function_arg_advance_ms_64 (cum, bytes, words);
6938 else if (TARGET_64BIT)
6939 function_arg_advance_64 (cum, mode, type, words, named);
6941 function_arg_advance_32 (cum, mode, type, bytes, words);
6944 /* Define where to put the arguments to a function.
6945 Value is zero to push the argument on the stack,
6946 or a hard register in which to store the argument.
6948 MODE is the argument's machine mode.
6949 TYPE is the data type of the argument (as a tree).
6950 This is null for libcalls where that information may
6952 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6953 the preceding args and about the function being called.
6954 NAMED is nonzero if this argument is a named parameter
6955 (otherwise it is an extra parameter matching an ellipsis). */
6958 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6959 enum machine_mode orig_mode, const_tree type,
6960 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6962 static bool warnedsse, warnedmmx;
6964 /* Avoid the AL settings for the Unix64 ABI. */
6965 if (mode == VOIDmode)
6981 if (words <= cum->nregs)
6983 int regno = cum->regno;
6985 /* Fastcall allocates the first two DWORD (SImode) or
6986 smaller arguments to ECX and EDX if it isn't an
6992 || (type && AGGREGATE_TYPE_P (type)))
6995 /* ECX not EAX is the first allocated register. */
6996 if (regno == AX_REG)
6999 return gen_rtx_REG (mode, regno);
7004 if (cum->float_in_sse < 2)
7007 if (cum->float_in_sse < 1)
7011 /* In 32bit, we pass TImode in xmm registers. */
7018 if (!type || !AGGREGATE_TYPE_P (type))
7020 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
7023 warning (0, "SSE vector argument without SSE enabled "
7027 return gen_reg_or_parallel (mode, orig_mode,
7028 cum->sse_regno + FIRST_SSE_REG);
7033 /* OImode shouldn't be used directly. */
7042 if (!type || !AGGREGATE_TYPE_P (type))
7045 return gen_reg_or_parallel (mode, orig_mode,
7046 cum->sse_regno + FIRST_SSE_REG);
7056 if (!type || !AGGREGATE_TYPE_P (type))
7058 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
7061 warning (0, "MMX vector argument without MMX enabled "
7065 return gen_reg_or_parallel (mode, orig_mode,
7066 cum->mmx_regno + FIRST_MMX_REG);
7075 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7076 enum machine_mode orig_mode, const_tree type, bool named)
7078 /* Handle a hidden AL argument containing number of registers
7079 for varargs x86-64 functions. */
7080 if (mode == VOIDmode)
7081 return GEN_INT (cum->maybe_vaarg
7082 ? (cum->sse_nregs < 0
7083 ? X86_64_SSE_REGPARM_MAX
7098 /* Unnamed 256bit vector mode parameters are passed on stack. */
7104 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7106 &x86_64_int_parameter_registers [cum->regno],
7111 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7112 enum machine_mode orig_mode, bool named,
7113 HOST_WIDE_INT bytes)
7117 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7118 We use value of -2 to specify that current function call is MSABI. */
7119 if (mode == VOIDmode)
7120 return GEN_INT (-2);
7122 /* If we've run out of registers, it goes on the stack. */
7123 if (cum->nregs == 0)
7126 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7128 /* Only floating point modes are passed in anything but integer regs. */
7129 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7132 regno = cum->regno + FIRST_SSE_REG;
7137 /* Unnamed floating parameters are passed in both the
7138 SSE and integer registers. */
7139 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7140 t2 = gen_rtx_REG (mode, regno);
7141 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7142 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7143 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7146 /* Handle aggregated types passed in register. */
7147 if (orig_mode == BLKmode)
7149 if (bytes > 0 && bytes <= 8)
7150 mode = (bytes > 4 ? DImode : SImode);
7151 if (mode == BLKmode)
7155 return gen_reg_or_parallel (mode, orig_mode, regno);
7158 /* Return where to put the arguments to a function.
7159 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7161 MODE is the argument's machine mode. TYPE is the data type of the
7162 argument. It is null for libcalls where that information may not be
7163 available. CUM gives information about the preceding args and about
7164 the function being called. NAMED is nonzero if this argument is a
7165 named parameter (otherwise it is an extra parameter matching an
7169 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
7170 const_tree type, bool named)
7172 enum machine_mode mode = omode;
7173 HOST_WIDE_INT bytes, words;
7176 if (mode == BLKmode)
7177 bytes = int_size_in_bytes (type);
7179 bytes = GET_MODE_SIZE (mode);
7180 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7182 /* To simplify the code below, represent vector types with a vector mode
7183 even if MMX/SSE are not active. */
7184 if (type && TREE_CODE (type) == VECTOR_TYPE)
7185 mode = type_natural_mode (type, cum);
7187 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7188 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7189 else if (TARGET_64BIT)
7190 arg = function_arg_64 (cum, mode, omode, type, named);
7192 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7194 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
7196 /* This argument uses 256bit AVX modes. */
7198 cfun->machine->callee_pass_avx256_p = true;
7200 cfun->machine->caller_pass_avx256_p = true;
7206 /* A C expression that indicates when an argument must be passed by
7207 reference. If nonzero for an argument, a copy of that argument is
7208 made in memory and a pointer to the argument is passed instead of
7209 the argument itself. The pointer is passed in whatever way is
7210 appropriate for passing a pointer to that type. */
7213 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
7214 enum machine_mode mode ATTRIBUTE_UNUSED,
7215 const_tree type, bool named ATTRIBUTE_UNUSED)
7217 /* See Windows x64 Software Convention. */
7218 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7220 int msize = (int) GET_MODE_SIZE (mode);
7223 /* Arrays are passed by reference. */
7224 if (TREE_CODE (type) == ARRAY_TYPE)
7227 if (AGGREGATE_TYPE_P (type))
7229 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7230 are passed by reference. */
7231 msize = int_size_in_bytes (type);
7235 /* __m128 is passed by reference. */
7237 case 1: case 2: case 4: case 8:
7243 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7249 /* Return true when TYPE should be 128bit aligned for 32bit argument
7250 passing ABI. XXX: This function is obsolete and is only used for
7251 checking psABI compatibility with previous versions of GCC. */
7254 ix86_compat_aligned_value_p (const_tree type)
7256 enum machine_mode mode = TYPE_MODE (type);
7257 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7261 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7263 if (TYPE_ALIGN (type) < 128)
7266 if (AGGREGATE_TYPE_P (type))
7268 /* Walk the aggregates recursively. */
7269 switch (TREE_CODE (type))
7273 case QUAL_UNION_TYPE:
7277 /* Walk all the structure fields. */
7278 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7280 if (TREE_CODE (field) == FIELD_DECL
7281 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7288 /* Just for use if some languages passes arrays by value. */
7289 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7300 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7301 XXX: This function is obsolete and is only used for checking psABI
7302 compatibility with previous versions of GCC. */
7305 ix86_compat_function_arg_boundary (enum machine_mode mode,
7306 const_tree type, unsigned int align)
7308 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7309 natural boundaries. */
7310 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7312 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7313 make an exception for SSE modes since these require 128bit
7316 The handling here differs from field_alignment. ICC aligns MMX
7317 arguments to 4 byte boundaries, while structure fields are aligned
7318 to 8 byte boundaries. */
7321 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7322 align = PARM_BOUNDARY;
7326 if (!ix86_compat_aligned_value_p (type))
7327 align = PARM_BOUNDARY;
7330 if (align > BIGGEST_ALIGNMENT)
7331 align = BIGGEST_ALIGNMENT;
7335 /* Return true when TYPE should be 128bit aligned for 32bit argument
7339 ix86_contains_aligned_value_p (const_tree type)
7341 enum machine_mode mode = TYPE_MODE (type);
7343 if (mode == XFmode || mode == XCmode)
7346 if (TYPE_ALIGN (type) < 128)
7349 if (AGGREGATE_TYPE_P (type))
7351 /* Walk the aggregates recursively. */
7352 switch (TREE_CODE (type))
7356 case QUAL_UNION_TYPE:
7360 /* Walk all the structure fields. */
7361 for (field = TYPE_FIELDS (type);
7363 field = DECL_CHAIN (field))
7365 if (TREE_CODE (field) == FIELD_DECL
7366 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7373 /* Just for use if some languages passes arrays by value. */
7374 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7383 return TYPE_ALIGN (type) >= 128;
7388 /* Gives the alignment boundary, in bits, of an argument with the
7389 specified mode and type. */
7392 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7397 /* Since the main variant type is used for call, we convert it to
7398 the main variant type. */
7399 type = TYPE_MAIN_VARIANT (type);
7400 align = TYPE_ALIGN (type);
7403 align = GET_MODE_ALIGNMENT (mode);
7404 if (align < PARM_BOUNDARY)
7405 align = PARM_BOUNDARY;
7409 unsigned int saved_align = align;
7413 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7416 if (mode == XFmode || mode == XCmode)
7417 align = PARM_BOUNDARY;
7419 else if (!ix86_contains_aligned_value_p (type))
7420 align = PARM_BOUNDARY;
7423 align = PARM_BOUNDARY;
7428 && align != ix86_compat_function_arg_boundary (mode, type,
7432 inform (input_location,
7433 "The ABI for passing parameters with %d-byte"
7434 " alignment has changed in GCC 4.6",
7435 align / BITS_PER_UNIT);
7442 /* Return true if N is a possible register number of function value. */
7445 ix86_function_value_regno_p (const unsigned int regno)
7452 case FIRST_FLOAT_REG:
7453 /* TODO: The function should depend on current function ABI but
7454 builtins.c would need updating then. Therefore we use the
7456 if (TARGET_64BIT && ix86_abi == MS_ABI)
7458 return TARGET_FLOAT_RETURNS_IN_80387;
7464 if (TARGET_MACHO || TARGET_64BIT)
7472 /* Define how to find the value returned by a function.
7473 VALTYPE is the data type of the value (as a tree).
7474 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7475 otherwise, FUNC is 0. */
7478 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7479 const_tree fntype, const_tree fn)
7483 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7484 we normally prevent this case when mmx is not available. However
7485 some ABIs may require the result to be returned like DImode. */
7486 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7487 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7489 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7490 we prevent this case when sse is not available. However some ABIs
7491 may require the result to be returned like integer TImode. */
7492 else if (mode == TImode
7493 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7494 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7496 /* 32-byte vector modes in %ymm0. */
7497 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7498 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7500 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7501 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7502 regno = FIRST_FLOAT_REG;
7504 /* Most things go in %eax. */
7507 /* Override FP return register with %xmm0 for local functions when
7508 SSE math is enabled or for functions with sseregparm attribute. */
7509 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7511 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7512 if ((sse_level >= 1 && mode == SFmode)
7513 || (sse_level == 2 && mode == DFmode))
7514 regno = FIRST_SSE_REG;
7517 /* OImode shouldn't be used directly. */
7518 gcc_assert (mode != OImode);
7520 return gen_rtx_REG (orig_mode, regno);
7524 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7529 /* Handle libcalls, which don't provide a type node. */
7530 if (valtype == NULL)
7542 return gen_rtx_REG (mode, FIRST_SSE_REG);
7545 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7549 return gen_rtx_REG (mode, AX_REG);
7553 ret = construct_container (mode, orig_mode, valtype, 1,
7554 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7555 x86_64_int_return_registers, 0);
7557 /* For zero sized structures, construct_container returns NULL, but we
7558 need to keep rest of compiler happy by returning meaningful value. */
7560 ret = gen_rtx_REG (orig_mode, AX_REG);
7566 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7568 unsigned int regno = AX_REG;
7572 switch (GET_MODE_SIZE (mode))
7575 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7576 && !COMPLEX_MODE_P (mode))
7577 regno = FIRST_SSE_REG;
7581 if (mode == SFmode || mode == DFmode)
7582 regno = FIRST_SSE_REG;
7588 return gen_rtx_REG (orig_mode, regno);
7592 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7593 enum machine_mode orig_mode, enum machine_mode mode)
7595 const_tree fn, fntype;
7598 if (fntype_or_decl && DECL_P (fntype_or_decl))
7599 fn = fntype_or_decl;
7600 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7602 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7603 return function_value_ms_64 (orig_mode, mode);
7604 else if (TARGET_64BIT)
7605 return function_value_64 (orig_mode, mode, valtype);
7607 return function_value_32 (orig_mode, mode, fntype, fn);
7611 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7612 bool outgoing ATTRIBUTE_UNUSED)
7614 enum machine_mode mode, orig_mode;
7616 orig_mode = TYPE_MODE (valtype);
7617 mode = type_natural_mode (valtype, NULL);
7618 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7622 ix86_libcall_value (enum machine_mode mode)
7624 return ix86_function_value_1 (NULL, NULL, mode, mode);
7627 /* Return true iff type is returned in memory. */
7629 static bool ATTRIBUTE_UNUSED
7630 return_in_memory_32 (const_tree type, enum machine_mode mode)
7634 if (mode == BLKmode)
7637 size = int_size_in_bytes (type);
7639 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7642 if (VECTOR_MODE_P (mode) || mode == TImode)
7644 /* User-created vectors small enough to fit in EAX. */
7648 /* MMX/3dNow values are returned in MM0,
7649 except when it doesn't exits or the ABI prescribes otherwise. */
7651 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7653 /* SSE values are returned in XMM0, except when it doesn't exist. */
7657 /* AVX values are returned in YMM0, except when it doesn't exist. */
7668 /* OImode shouldn't be used directly. */
7669 gcc_assert (mode != OImode);
7674 static bool ATTRIBUTE_UNUSED
7675 return_in_memory_64 (const_tree type, enum machine_mode mode)
7677 int needed_intregs, needed_sseregs;
7678 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7681 static bool ATTRIBUTE_UNUSED
7682 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7684 HOST_WIDE_INT size = int_size_in_bytes (type);
7686 /* __m128 is returned in xmm0. */
7687 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7688 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7691 /* Otherwise, the size must be exactly in [1248]. */
7692 return size != 1 && size != 2 && size != 4 && size != 8;
7696 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7698 #ifdef SUBTARGET_RETURN_IN_MEMORY
7699 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7701 const enum machine_mode mode = type_natural_mode (type, NULL);
7705 if (ix86_function_type_abi (fntype) == MS_ABI)
7706 return return_in_memory_ms_64 (type, mode);
7708 return return_in_memory_64 (type, mode);
7711 return return_in_memory_32 (type, mode);
7715 /* When returning SSE vector types, we have a choice of either
7716 (1) being abi incompatible with a -march switch, or
7717 (2) generating an error.
7718 Given no good solution, I think the safest thing is one warning.
7719 The user won't be able to use -Werror, but....
7721 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7722 called in response to actually generating a caller or callee that
7723 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7724 via aggregate_value_p for general type probing from tree-ssa. */
7727 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7729 static bool warnedsse, warnedmmx;
7731 if (!TARGET_64BIT && type)
7733 /* Look at the return type of the function, not the function type. */
7734 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7736 if (!TARGET_SSE && !warnedsse)
7739 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7742 warning (0, "SSE vector return without SSE enabled "
7747 if (!TARGET_MMX && !warnedmmx)
7749 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7752 warning (0, "MMX vector return without MMX enabled "
7762 /* Create the va_list data type. */
7764 /* Returns the calling convention specific va_list date type.
7765 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7768 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7770 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7772 /* For i386 we use plain pointer to argument area. */
7773 if (!TARGET_64BIT || abi == MS_ABI)
7774 return build_pointer_type (char_type_node);
7776 record = lang_hooks.types.make_type (RECORD_TYPE);
7777 type_decl = build_decl (BUILTINS_LOCATION,
7778 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7780 f_gpr = build_decl (BUILTINS_LOCATION,
7781 FIELD_DECL, get_identifier ("gp_offset"),
7782 unsigned_type_node);
7783 f_fpr = build_decl (BUILTINS_LOCATION,
7784 FIELD_DECL, get_identifier ("fp_offset"),
7785 unsigned_type_node);
7786 f_ovf = build_decl (BUILTINS_LOCATION,
7787 FIELD_DECL, get_identifier ("overflow_arg_area"),
7789 f_sav = build_decl (BUILTINS_LOCATION,
7790 FIELD_DECL, get_identifier ("reg_save_area"),
7793 va_list_gpr_counter_field = f_gpr;
7794 va_list_fpr_counter_field = f_fpr;
7796 DECL_FIELD_CONTEXT (f_gpr) = record;
7797 DECL_FIELD_CONTEXT (f_fpr) = record;
7798 DECL_FIELD_CONTEXT (f_ovf) = record;
7799 DECL_FIELD_CONTEXT (f_sav) = record;
7801 TYPE_STUB_DECL (record) = type_decl;
7802 TYPE_NAME (record) = type_decl;
7803 TYPE_FIELDS (record) = f_gpr;
7804 DECL_CHAIN (f_gpr) = f_fpr;
7805 DECL_CHAIN (f_fpr) = f_ovf;
7806 DECL_CHAIN (f_ovf) = f_sav;
7808 layout_type (record);
7810 /* The correct type is an array type of one element. */
7811 return build_array_type (record, build_index_type (size_zero_node));
7814 /* Setup the builtin va_list data type and for 64-bit the additional
7815 calling convention specific va_list data types. */
7818 ix86_build_builtin_va_list (void)
7820 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7822 /* Initialize abi specific va_list builtin types. */
7826 if (ix86_abi == MS_ABI)
7828 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7829 if (TREE_CODE (t) != RECORD_TYPE)
7830 t = build_variant_type_copy (t);
7831 sysv_va_list_type_node = t;
7836 if (TREE_CODE (t) != RECORD_TYPE)
7837 t = build_variant_type_copy (t);
7838 sysv_va_list_type_node = t;
7840 if (ix86_abi != MS_ABI)
7842 t = ix86_build_builtin_va_list_abi (MS_ABI);
7843 if (TREE_CODE (t) != RECORD_TYPE)
7844 t = build_variant_type_copy (t);
7845 ms_va_list_type_node = t;
7850 if (TREE_CODE (t) != RECORD_TYPE)
7851 t = build_variant_type_copy (t);
7852 ms_va_list_type_node = t;
7859 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7862 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7868 /* GPR size of varargs save area. */
7869 if (cfun->va_list_gpr_size)
7870 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7872 ix86_varargs_gpr_size = 0;
7874 /* FPR size of varargs save area. We don't need it if we don't pass
7875 anything in SSE registers. */
7876 if (TARGET_SSE && cfun->va_list_fpr_size)
7877 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7879 ix86_varargs_fpr_size = 0;
7881 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7884 save_area = frame_pointer_rtx;
7885 set = get_varargs_alias_set ();
7887 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7888 if (max > X86_64_REGPARM_MAX)
7889 max = X86_64_REGPARM_MAX;
7891 for (i = cum->regno; i < max; i++)
7893 mem = gen_rtx_MEM (Pmode,
7894 plus_constant (save_area, i * UNITS_PER_WORD));
7895 MEM_NOTRAP_P (mem) = 1;
7896 set_mem_alias_set (mem, set);
7897 emit_move_insn (mem, gen_rtx_REG (Pmode,
7898 x86_64_int_parameter_registers[i]));
7901 if (ix86_varargs_fpr_size)
7903 enum machine_mode smode;
7906 /* Now emit code to save SSE registers. The AX parameter contains number
7907 of SSE parameter registers used to call this function, though all we
7908 actually check here is the zero/non-zero status. */
7910 label = gen_label_rtx ();
7911 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7912 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7915 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7916 we used movdqa (i.e. TImode) instead? Perhaps even better would
7917 be if we could determine the real mode of the data, via a hook
7918 into pass_stdarg. Ignore all that for now. */
7920 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7921 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7923 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7924 if (max > X86_64_SSE_REGPARM_MAX)
7925 max = X86_64_SSE_REGPARM_MAX;
7927 for (i = cum->sse_regno; i < max; ++i)
7929 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7930 mem = gen_rtx_MEM (smode, mem);
7931 MEM_NOTRAP_P (mem) = 1;
7932 set_mem_alias_set (mem, set);
7933 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7935 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7943 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7945 alias_set_type set = get_varargs_alias_set ();
7948 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7952 mem = gen_rtx_MEM (Pmode,
7953 plus_constant (virtual_incoming_args_rtx,
7954 i * UNITS_PER_WORD));
7955 MEM_NOTRAP_P (mem) = 1;
7956 set_mem_alias_set (mem, set);
7958 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7959 emit_move_insn (mem, reg);
7964 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7965 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7968 CUMULATIVE_ARGS next_cum;
7971 /* This argument doesn't appear to be used anymore. Which is good,
7972 because the old code here didn't suppress rtl generation. */
7973 gcc_assert (!no_rtl);
7978 fntype = TREE_TYPE (current_function_decl);
7980 /* For varargs, we do not want to skip the dummy va_dcl argument.
7981 For stdargs, we do want to skip the last named argument. */
7983 if (stdarg_p (fntype))
7984 ix86_function_arg_advance (&next_cum, mode, type, true);
7986 if (cum->call_abi == MS_ABI)
7987 setup_incoming_varargs_ms_64 (&next_cum);
7989 setup_incoming_varargs_64 (&next_cum);
7992 /* Checks if TYPE is of kind va_list char *. */
7995 is_va_list_char_pointer (tree type)
7999 /* For 32-bit it is always true. */
8002 canonic = ix86_canonical_va_list_type (type);
8003 return (canonic == ms_va_list_type_node
8004 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8007 /* Implement va_start. */
8010 ix86_va_start (tree valist, rtx nextarg)
8012 HOST_WIDE_INT words, n_gpr, n_fpr;
8013 tree f_gpr, f_fpr, f_ovf, f_sav;
8014 tree gpr, fpr, ovf, sav, t;
8018 if (flag_split_stack
8019 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8021 unsigned int scratch_regno;
8023 /* When we are splitting the stack, we can't refer to the stack
8024 arguments using internal_arg_pointer, because they may be on
8025 the old stack. The split stack prologue will arrange to
8026 leave a pointer to the old stack arguments in a scratch
8027 register, which we here copy to a pseudo-register. The split
8028 stack prologue can't set the pseudo-register directly because
8029 it (the prologue) runs before any registers have been saved. */
8031 scratch_regno = split_stack_prologue_scratch_regno ();
8032 if (scratch_regno != INVALID_REGNUM)
8036 reg = gen_reg_rtx (Pmode);
8037 cfun->machine->split_stack_varargs_pointer = reg;
8040 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8044 push_topmost_sequence ();
8045 emit_insn_after (seq, entry_of_function ());
8046 pop_topmost_sequence ();
8050 /* Only 64bit target needs something special. */
8051 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8053 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8054 std_expand_builtin_va_start (valist, nextarg);
8059 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8060 next = expand_binop (ptr_mode, add_optab,
8061 cfun->machine->split_stack_varargs_pointer,
8062 crtl->args.arg_offset_rtx,
8063 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8064 convert_move (va_r, next, 0);
8069 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8070 f_fpr = DECL_CHAIN (f_gpr);
8071 f_ovf = DECL_CHAIN (f_fpr);
8072 f_sav = DECL_CHAIN (f_ovf);
8074 valist = build_simple_mem_ref (valist);
8075 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8076 /* The following should be folded into the MEM_REF offset. */
8077 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8079 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8081 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8083 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8086 /* Count number of gp and fp argument registers used. */
8087 words = crtl->args.info.words;
8088 n_gpr = crtl->args.info.regno;
8089 n_fpr = crtl->args.info.sse_regno;
8091 if (cfun->va_list_gpr_size)
8093 type = TREE_TYPE (gpr);
8094 t = build2 (MODIFY_EXPR, type,
8095 gpr, build_int_cst (type, n_gpr * 8));
8096 TREE_SIDE_EFFECTS (t) = 1;
8097 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8100 if (TARGET_SSE && cfun->va_list_fpr_size)
8102 type = TREE_TYPE (fpr);
8103 t = build2 (MODIFY_EXPR, type, fpr,
8104 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8105 TREE_SIDE_EFFECTS (t) = 1;
8106 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8109 /* Find the overflow area. */
8110 type = TREE_TYPE (ovf);
8111 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8112 ovf_rtx = crtl->args.internal_arg_pointer;
8114 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8115 t = make_tree (type, ovf_rtx);
8117 t = build2 (POINTER_PLUS_EXPR, type, t,
8118 size_int (words * UNITS_PER_WORD));
8119 t = build2 (MODIFY_EXPR, type, ovf, t);
8120 TREE_SIDE_EFFECTS (t) = 1;
8121 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8123 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8125 /* Find the register save area.
8126 Prologue of the function save it right above stack frame. */
8127 type = TREE_TYPE (sav);
8128 t = make_tree (type, frame_pointer_rtx);
8129 if (!ix86_varargs_gpr_size)
8130 t = build2 (POINTER_PLUS_EXPR, type, t,
8131 size_int (-8 * X86_64_REGPARM_MAX));
8132 t = build2 (MODIFY_EXPR, type, sav, t);
8133 TREE_SIDE_EFFECTS (t) = 1;
8134 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8138 /* Implement va_arg. */
8141 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8144 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8145 tree f_gpr, f_fpr, f_ovf, f_sav;
8146 tree gpr, fpr, ovf, sav, t;
8148 tree lab_false, lab_over = NULL_TREE;
8153 enum machine_mode nat_mode;
8154 unsigned int arg_boundary;
8156 /* Only 64bit target needs something special. */
8157 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8158 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8160 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8161 f_fpr = DECL_CHAIN (f_gpr);
8162 f_ovf = DECL_CHAIN (f_fpr);
8163 f_sav = DECL_CHAIN (f_ovf);
8165 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8166 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8167 valist = build_va_arg_indirect_ref (valist);
8168 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8169 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8170 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8172 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8174 type = build_pointer_type (type);
8175 size = int_size_in_bytes (type);
8176 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8178 nat_mode = type_natural_mode (type, NULL);
8187 /* Unnamed 256bit vector mode parameters are passed on stack. */
8188 if (ix86_cfun_abi () == SYSV_ABI)
8195 container = construct_container (nat_mode, TYPE_MODE (type),
8196 type, 0, X86_64_REGPARM_MAX,
8197 X86_64_SSE_REGPARM_MAX, intreg,
8202 /* Pull the value out of the saved registers. */
8204 addr = create_tmp_var (ptr_type_node, "addr");
8208 int needed_intregs, needed_sseregs;
8210 tree int_addr, sse_addr;
8212 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8213 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8215 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8217 need_temp = (!REG_P (container)
8218 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8219 || TYPE_ALIGN (type) > 128));
8221 /* In case we are passing structure, verify that it is consecutive block
8222 on the register save area. If not we need to do moves. */
8223 if (!need_temp && !REG_P (container))
8225 /* Verify that all registers are strictly consecutive */
8226 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8230 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8232 rtx slot = XVECEXP (container, 0, i);
8233 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8234 || INTVAL (XEXP (slot, 1)) != i * 16)
8242 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8244 rtx slot = XVECEXP (container, 0, i);
8245 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8246 || INTVAL (XEXP (slot, 1)) != i * 8)
8258 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8259 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8262 /* First ensure that we fit completely in registers. */
8265 t = build_int_cst (TREE_TYPE (gpr),
8266 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8267 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8268 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8269 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8270 gimplify_and_add (t, pre_p);
8274 t = build_int_cst (TREE_TYPE (fpr),
8275 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8276 + X86_64_REGPARM_MAX * 8);
8277 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8278 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8279 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8280 gimplify_and_add (t, pre_p);
8283 /* Compute index to start of area used for integer regs. */
8286 /* int_addr = gpr + sav; */
8287 t = fold_convert (sizetype, gpr);
8288 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8289 gimplify_assign (int_addr, t, pre_p);
8293 /* sse_addr = fpr + sav; */
8294 t = fold_convert (sizetype, fpr);
8295 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8296 gimplify_assign (sse_addr, t, pre_p);
8300 int i, prev_size = 0;
8301 tree temp = create_tmp_var (type, "va_arg_tmp");
8304 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8305 gimplify_assign (addr, t, pre_p);
8307 for (i = 0; i < XVECLEN (container, 0); i++)
8309 rtx slot = XVECEXP (container, 0, i);
8310 rtx reg = XEXP (slot, 0);
8311 enum machine_mode mode = GET_MODE (reg);
8317 tree dest_addr, dest;
8318 int cur_size = GET_MODE_SIZE (mode);
8320 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8321 prev_size = INTVAL (XEXP (slot, 1));
8322 if (prev_size + cur_size > size)
8324 cur_size = size - prev_size;
8325 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8326 if (mode == BLKmode)
8329 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8330 if (mode == GET_MODE (reg))
8331 addr_type = build_pointer_type (piece_type);
8333 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8335 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8338 if (SSE_REGNO_P (REGNO (reg)))
8340 src_addr = sse_addr;
8341 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8345 src_addr = int_addr;
8346 src_offset = REGNO (reg) * 8;
8348 src_addr = fold_convert (addr_type, src_addr);
8349 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
8350 size_int (src_offset));
8352 dest_addr = fold_convert (daddr_type, addr);
8353 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
8354 size_int (prev_size));
8355 if (cur_size == GET_MODE_SIZE (mode))
8357 src = build_va_arg_indirect_ref (src_addr);
8358 dest = build_va_arg_indirect_ref (dest_addr);
8360 gimplify_assign (dest, src, pre_p);
8365 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
8366 3, dest_addr, src_addr,
8367 size_int (cur_size));
8368 gimplify_and_add (copy, pre_p);
8370 prev_size += cur_size;
8376 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8377 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8378 gimplify_assign (gpr, t, pre_p);
8383 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8384 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8385 gimplify_assign (fpr, t, pre_p);
8388 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8390 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8393 /* ... otherwise out of the overflow area. */
8395 /* When we align parameter on stack for caller, if the parameter
8396 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8397 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8398 here with caller. */
8399 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8400 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8401 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8403 /* Care for on-stack alignment if needed. */
8404 if (arg_boundary <= 64 || size == 0)
8408 HOST_WIDE_INT align = arg_boundary / 8;
8409 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
8410 size_int (align - 1));
8411 t = fold_convert (sizetype, t);
8412 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8414 t = fold_convert (TREE_TYPE (ovf), t);
8417 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8418 gimplify_assign (addr, t, pre_p);
8420 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
8421 size_int (rsize * UNITS_PER_WORD));
8422 gimplify_assign (unshare_expr (ovf), t, pre_p);
8425 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8427 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8428 addr = fold_convert (ptrtype, addr);
8431 addr = build_va_arg_indirect_ref (addr);
8432 return build_va_arg_indirect_ref (addr);
8435 /* Return true if OPNUM's MEM should be matched
8436 in movabs* patterns. */
8439 ix86_check_movabs (rtx insn, int opnum)
8443 set = PATTERN (insn);
8444 if (GET_CODE (set) == PARALLEL)
8445 set = XVECEXP (set, 0, 0);
8446 gcc_assert (GET_CODE (set) == SET);
8447 mem = XEXP (set, opnum);
8448 while (GET_CODE (mem) == SUBREG)
8449 mem = SUBREG_REG (mem);
8450 gcc_assert (MEM_P (mem));
8451 return volatile_ok || !MEM_VOLATILE_P (mem);
8454 /* Initialize the table of extra 80387 mathematical constants. */
8457 init_ext_80387_constants (void)
8459 static const char * cst[5] =
8461 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8462 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8463 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8464 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8465 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8469 for (i = 0; i < 5; i++)
8471 real_from_string (&ext_80387_constants_table[i], cst[i]);
8472 /* Ensure each constant is rounded to XFmode precision. */
8473 real_convert (&ext_80387_constants_table[i],
8474 XFmode, &ext_80387_constants_table[i]);
8477 ext_80387_constants_init = 1;
8480 /* Return non-zero if the constant is something that
8481 can be loaded with a special instruction. */
8484 standard_80387_constant_p (rtx x)
8486 enum machine_mode mode = GET_MODE (x);
8490 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8493 if (x == CONST0_RTX (mode))
8495 if (x == CONST1_RTX (mode))
8498 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8500 /* For XFmode constants, try to find a special 80387 instruction when
8501 optimizing for size or on those CPUs that benefit from them. */
8503 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8507 if (! ext_80387_constants_init)
8508 init_ext_80387_constants ();
8510 for (i = 0; i < 5; i++)
8511 if (real_identical (&r, &ext_80387_constants_table[i]))
8515 /* Load of the constant -0.0 or -1.0 will be split as
8516 fldz;fchs or fld1;fchs sequence. */
8517 if (real_isnegzero (&r))
8519 if (real_identical (&r, &dconstm1))
8525 /* Return the opcode of the special instruction to be used to load
8529 standard_80387_constant_opcode (rtx x)
8531 switch (standard_80387_constant_p (x))
8555 /* Return the CONST_DOUBLE representing the 80387 constant that is
8556 loaded by the specified special instruction. The argument IDX
8557 matches the return value from standard_80387_constant_p. */
8560 standard_80387_constant_rtx (int idx)
8564 if (! ext_80387_constants_init)
8565 init_ext_80387_constants ();
8581 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8585 /* Return 1 if X is all 0s and 2 if x is all 1s
8586 in supported SSE vector mode. */
8589 standard_sse_constant_p (rtx x)
8591 enum machine_mode mode = GET_MODE (x);
8593 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8595 if (vector_all_ones_operand (x, mode))
8611 /* Return the opcode of the special instruction to be used to load
8615 standard_sse_constant_opcode (rtx insn, rtx x)
8617 switch (standard_sse_constant_p (x))
8620 switch (get_attr_mode (insn))
8623 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8625 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8626 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8628 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
8630 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8631 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8633 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
8635 return "vxorps\t%x0, %x0, %x0";
8637 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8638 return "vxorps\t%x0, %x0, %x0";
8640 return "vxorpd\t%x0, %x0, %x0";
8642 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8643 return "vxorps\t%x0, %x0, %x0";
8645 return "vpxor\t%x0, %x0, %x0";
8650 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
8657 /* Returns true if OP contains a symbol reference */
8660 symbolic_reference_mentioned_p (rtx op)
8665 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8668 fmt = GET_RTX_FORMAT (GET_CODE (op));
8669 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8675 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8676 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8680 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8687 /* Return true if it is appropriate to emit `ret' instructions in the
8688 body of a function. Do this only if the epilogue is simple, needing a
8689 couple of insns. Prior to reloading, we can't tell how many registers
8690 must be saved, so return false then. Return false if there is no frame
8691 marker to de-allocate. */
8694 ix86_can_use_return_insn_p (void)
8696 struct ix86_frame frame;
8698 if (! reload_completed || frame_pointer_needed)
8701 /* Don't allow more than 32k pop, since that's all we can do
8702 with one instruction. */
8703 if (crtl->args.pops_args && crtl->args.size >= 32768)
8706 ix86_compute_frame_layout (&frame);
8707 return (frame.stack_pointer_offset == UNITS_PER_WORD
8708 && (frame.nregs + frame.nsseregs) == 0);
8711 /* Value should be nonzero if functions must have frame pointers.
8712 Zero means the frame pointer need not be set up (and parms may
8713 be accessed via the stack pointer) in functions that seem suitable. */
8716 ix86_frame_pointer_required (void)
8718 /* If we accessed previous frames, then the generated code expects
8719 to be able to access the saved ebp value in our frame. */
8720 if (cfun->machine->accesses_prev_frame)
8723 /* Several x86 os'es need a frame pointer for other reasons,
8724 usually pertaining to setjmp. */
8725 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8728 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8729 turns off the frame pointer by default. Turn it back on now if
8730 we've not got a leaf function. */
8731 if (TARGET_OMIT_LEAF_FRAME_POINTER
8732 && (!current_function_is_leaf
8733 || ix86_current_function_calls_tls_descriptor))
8736 if (crtl->profile && !flag_fentry)
8742 /* Record that the current function accesses previous call frames. */
8745 ix86_setup_frame_addresses (void)
8747 cfun->machine->accesses_prev_frame = 1;
8750 #ifndef USE_HIDDEN_LINKONCE
8751 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
8752 # define USE_HIDDEN_LINKONCE 1
8754 # define USE_HIDDEN_LINKONCE 0
8758 static int pic_labels_used;
8760 /* Fills in the label name that should be used for a pc thunk for
8761 the given register. */
8764 get_pc_thunk_name (char name[32], unsigned int regno)
8766 gcc_assert (!TARGET_64BIT);
8768 if (USE_HIDDEN_LINKONCE)
8769 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8771 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8775 /* This function generates code for -fpic that loads %ebx with
8776 the return address of the caller and then returns. */
8779 ix86_code_end (void)
8784 for (regno = AX_REG; regno <= SP_REG; regno++)
8789 if (!(pic_labels_used & (1 << regno)))
8792 get_pc_thunk_name (name, regno);
8794 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8795 get_identifier (name),
8796 build_function_type (void_type_node, void_list_node));
8797 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8798 NULL_TREE, void_type_node);
8799 TREE_PUBLIC (decl) = 1;
8800 TREE_STATIC (decl) = 1;
8805 switch_to_section (darwin_sections[text_coal_section]);
8806 fputs ("\t.weak_definition\t", asm_out_file);
8807 assemble_name (asm_out_file, name);
8808 fputs ("\n\t.private_extern\t", asm_out_file);
8809 assemble_name (asm_out_file, name);
8810 putc ('\n', asm_out_file);
8811 ASM_OUTPUT_LABEL (asm_out_file, name);
8812 DECL_WEAK (decl) = 1;
8816 if (USE_HIDDEN_LINKONCE)
8818 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8820 targetm.asm_out.unique_section (decl, 0);
8821 switch_to_section (get_named_section (decl, NULL, 0));
8823 targetm.asm_out.globalize_label (asm_out_file, name);
8824 fputs ("\t.hidden\t", asm_out_file);
8825 assemble_name (asm_out_file, name);
8826 putc ('\n', asm_out_file);
8827 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8831 switch_to_section (text_section);
8832 ASM_OUTPUT_LABEL (asm_out_file, name);
8835 DECL_INITIAL (decl) = make_node (BLOCK);
8836 current_function_decl = decl;
8837 init_function_start (decl);
8838 first_function_block_is_cold = false;
8839 /* Make sure unwind info is emitted for the thunk if needed. */
8840 final_start_function (emit_barrier (), asm_out_file, 1);
8842 /* Pad stack IP move with 4 instructions (two NOPs count
8843 as one instruction). */
8844 if (TARGET_PAD_SHORT_FUNCTION)
8849 fputs ("\tnop\n", asm_out_file);
8852 xops[0] = gen_rtx_REG (Pmode, regno);
8853 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8854 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8855 fputs ("\tret\n", asm_out_file);
8856 final_end_function ();
8857 init_insn_lengths ();
8858 free_after_compilation (cfun);
8860 current_function_decl = NULL;
8863 if (flag_split_stack)
8864 file_end_indicate_split_stack ();
8867 /* Emit code for the SET_GOT patterns. */
8870 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8876 if (TARGET_VXWORKS_RTP && flag_pic)
8878 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8879 xops[2] = gen_rtx_MEM (Pmode,
8880 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8881 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8883 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8884 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8885 an unadorned address. */
8886 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8887 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8888 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8892 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8894 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8896 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8899 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8902 output_asm_insn ("call\t%a2", xops);
8903 #ifdef DWARF2_UNWIND_INFO
8904 /* The call to next label acts as a push. */
8905 if (dwarf2out_do_frame ())
8909 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8910 gen_rtx_PLUS (Pmode,
8913 RTX_FRAME_RELATED_P (insn) = 1;
8914 dwarf2out_frame_debug (insn, true);
8921 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8922 is what will be referenced by the Mach-O PIC subsystem. */
8924 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8927 targetm.asm_out.internal_label (asm_out_file, "L",
8928 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8932 output_asm_insn ("pop%z0\t%0", xops);
8933 #ifdef DWARF2_UNWIND_INFO
8934 /* The pop is a pop and clobbers dest, but doesn't restore it
8935 for unwind info purposes. */
8936 if (dwarf2out_do_frame ())
8940 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8941 dwarf2out_frame_debug (insn, true);
8942 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8943 gen_rtx_PLUS (Pmode,
8946 RTX_FRAME_RELATED_P (insn) = 1;
8947 dwarf2out_frame_debug (insn, true);
8956 get_pc_thunk_name (name, REGNO (dest));
8957 pic_labels_used |= 1 << REGNO (dest);
8959 #ifdef DWARF2_UNWIND_INFO
8960 /* Ensure all queued register saves are flushed before the
8962 if (dwarf2out_do_frame ())
8963 dwarf2out_flush_queued_reg_saves ();
8965 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8966 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8967 output_asm_insn ("call\t%X2", xops);
8968 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8969 is what will be referenced by the Mach-O PIC subsystem. */
8972 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8974 targetm.asm_out.internal_label (asm_out_file, "L",
8975 CODE_LABEL_NUMBER (label));
8982 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8983 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8985 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8990 /* Generate an "push" pattern for input ARG. */
8995 struct machine_function *m = cfun->machine;
8997 if (m->fs.cfa_reg == stack_pointer_rtx)
8998 m->fs.cfa_offset += UNITS_PER_WORD;
8999 m->fs.sp_offset += UNITS_PER_WORD;
9001 return gen_rtx_SET (VOIDmode,
9003 gen_rtx_PRE_DEC (Pmode,
9004 stack_pointer_rtx)),
9008 /* Generate an "pop" pattern for input ARG. */
9013 return gen_rtx_SET (VOIDmode,
9016 gen_rtx_POST_INC (Pmode,
9017 stack_pointer_rtx)));
9020 /* Return >= 0 if there is an unused call-clobbered register available
9021 for the entire function. */
9024 ix86_select_alt_pic_regnum (void)
9026 if (current_function_is_leaf
9028 && !ix86_current_function_calls_tls_descriptor)
9031 /* Can't use the same register for both PIC and DRAP. */
9033 drap = REGNO (crtl->drap_reg);
9036 for (i = 2; i >= 0; --i)
9037 if (i != drap && !df_regs_ever_live_p (i))
9041 return INVALID_REGNUM;
9044 /* Return 1 if we need to save REGNO. */
9046 ix86_save_reg (unsigned int regno, int maybe_eh_return)
9048 if (pic_offset_table_rtx
9049 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9050 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9052 || crtl->calls_eh_return
9053 || crtl->uses_const_pool))
9055 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
9060 if (crtl->calls_eh_return && maybe_eh_return)
9065 unsigned test = EH_RETURN_DATA_REGNO (i);
9066 if (test == INVALID_REGNUM)
9073 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9076 return (df_regs_ever_live_p (regno)
9077 && !call_used_regs[regno]
9078 && !fixed_regs[regno]
9079 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9082 /* Return number of saved general prupose registers. */
9085 ix86_nsaved_regs (void)
9090 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9091 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9096 /* Return number of saved SSE registrers. */
9099 ix86_nsaved_sseregs (void)
9104 if (ix86_cfun_abi () != MS_ABI)
9106 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9107 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9112 /* Given FROM and TO register numbers, say whether this elimination is
9113 allowed. If stack alignment is needed, we can only replace argument
9114 pointer with hard frame pointer, or replace frame pointer with stack
9115 pointer. Otherwise, frame pointer elimination is automatically
9116 handled and all other eliminations are valid. */
9119 ix86_can_eliminate (const int from, const int to)
9121 if (stack_realign_fp)
9122 return ((from == ARG_POINTER_REGNUM
9123 && to == HARD_FRAME_POINTER_REGNUM)
9124 || (from == FRAME_POINTER_REGNUM
9125 && to == STACK_POINTER_REGNUM));
9127 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9130 /* Return the offset between two registers, one to be eliminated, and the other
9131 its replacement, at the start of a routine. */
9134 ix86_initial_elimination_offset (int from, int to)
9136 struct ix86_frame frame;
9137 ix86_compute_frame_layout (&frame);
9139 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9140 return frame.hard_frame_pointer_offset;
9141 else if (from == FRAME_POINTER_REGNUM
9142 && to == HARD_FRAME_POINTER_REGNUM)
9143 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9146 gcc_assert (to == STACK_POINTER_REGNUM);
9148 if (from == ARG_POINTER_REGNUM)
9149 return frame.stack_pointer_offset;
9151 gcc_assert (from == FRAME_POINTER_REGNUM);
9152 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9156 /* In a dynamically-aligned function, we can't know the offset from
9157 stack pointer to frame pointer, so we must ensure that setjmp
9158 eliminates fp against the hard fp (%ebp) rather than trying to
9159 index from %esp up to the top of the frame across a gap that is
9160 of unknown (at compile-time) size. */
9162 ix86_builtin_setjmp_frame_value (void)
9164 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9167 /* On the x86 -fsplit-stack and -fstack-protector both use the same
9168 field in the TCB, so they can not be used together. */
9171 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED,
9172 struct gcc_options *opts ATTRIBUTE_UNUSED)
9176 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
9178 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
9181 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
9184 error ("%<-fsplit-stack%> requires "
9185 "assembler support for CFI directives");
9193 /* When using -fsplit-stack, the allocation routines set a field in
9194 the TCB to the bottom of the stack plus this much space, measured
9197 #define SPLIT_STACK_AVAILABLE 256
9199 /* Fill structure ix86_frame about frame of currently computed function. */
9202 ix86_compute_frame_layout (struct ix86_frame *frame)
9204 unsigned int stack_alignment_needed;
9205 HOST_WIDE_INT offset;
9206 unsigned int preferred_alignment;
9207 HOST_WIDE_INT size = get_frame_size ();
9208 HOST_WIDE_INT to_allocate;
9210 frame->nregs = ix86_nsaved_regs ();
9211 frame->nsseregs = ix86_nsaved_sseregs ();
9213 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9214 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9216 /* MS ABI seem to require stack alignment to be always 16 except for function
9217 prologues and leaf. */
9218 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
9219 && (!current_function_is_leaf || cfun->calls_alloca != 0
9220 || ix86_current_function_calls_tls_descriptor))
9222 preferred_alignment = 16;
9223 stack_alignment_needed = 16;
9224 crtl->preferred_stack_boundary = 128;
9225 crtl->stack_alignment_needed = 128;
9228 gcc_assert (!size || stack_alignment_needed);
9229 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9230 gcc_assert (preferred_alignment <= stack_alignment_needed);
9232 /* For SEH we have to limit the amount of code movement into the prologue.
9233 At present we do this via a BLOCKAGE, at which point there's very little
9234 scheduling that can be done, which means that there's very little point
9235 in doing anything except PUSHs. */
9237 cfun->machine->use_fast_prologue_epilogue = false;
9239 /* During reload iteration the amount of registers saved can change.
9240 Recompute the value as needed. Do not recompute when amount of registers
9241 didn't change as reload does multiple calls to the function and does not
9242 expect the decision to change within single iteration. */
9243 else if (!optimize_function_for_size_p (cfun)
9244 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9246 int count = frame->nregs;
9247 struct cgraph_node *node = cgraph_node (current_function_decl);
9249 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9251 /* The fast prologue uses move instead of push to save registers. This
9252 is significantly longer, but also executes faster as modern hardware
9253 can execute the moves in parallel, but can't do that for push/pop.
9255 Be careful about choosing what prologue to emit: When function takes
9256 many instructions to execute we may use slow version as well as in
9257 case function is known to be outside hot spot (this is known with
9258 feedback only). Weight the size of function by number of registers
9259 to save as it is cheap to use one or two push instructions but very
9260 slow to use many of them. */
9262 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9263 if (node->frequency < NODE_FREQUENCY_NORMAL
9264 || (flag_branch_probabilities
9265 && node->frequency < NODE_FREQUENCY_HOT))
9266 cfun->machine->use_fast_prologue_epilogue = false;
9268 cfun->machine->use_fast_prologue_epilogue
9269 = !expensive_function_p (count);
9271 if (TARGET_PROLOGUE_USING_MOVE
9272 && cfun->machine->use_fast_prologue_epilogue)
9273 frame->save_regs_using_mov = true;
9275 frame->save_regs_using_mov = false;
9277 /* If static stack checking is enabled and done with probes, the registers
9278 need to be saved before allocating the frame. */
9279 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9280 frame->save_regs_using_mov = false;
9282 /* Skip return address. */
9283 offset = UNITS_PER_WORD;
9285 /* Skip pushed static chain. */
9286 if (ix86_static_chain_on_stack)
9287 offset += UNITS_PER_WORD;
9289 /* Skip saved base pointer. */
9290 if (frame_pointer_needed)
9291 offset += UNITS_PER_WORD;
9292 frame->hfp_save_offset = offset;
9294 /* The traditional frame pointer location is at the top of the frame. */
9295 frame->hard_frame_pointer_offset = offset;
9297 /* Register save area */
9298 offset += frame->nregs * UNITS_PER_WORD;
9299 frame->reg_save_offset = offset;
9301 /* Align and set SSE register save area. */
9302 if (frame->nsseregs)
9304 /* The only ABI that has saved SSE registers (Win64) also has a
9305 16-byte aligned default stack, and thus we don't need to be
9306 within the re-aligned local stack frame to save them. */
9307 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9308 offset = (offset + 16 - 1) & -16;
9309 offset += frame->nsseregs * 16;
9311 frame->sse_reg_save_offset = offset;
9313 /* The re-aligned stack starts here. Values before this point are not
9314 directly comparable with values below this point. In order to make
9315 sure that no value happens to be the same before and after, force
9316 the alignment computation below to add a non-zero value. */
9317 if (stack_realign_fp)
9318 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9321 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9322 offset += frame->va_arg_size;
9324 /* Align start of frame for local function. */
9325 if (stack_realign_fp
9326 || offset != frame->sse_reg_save_offset
9328 || !current_function_is_leaf
9329 || cfun->calls_alloca
9330 || ix86_current_function_calls_tls_descriptor)
9331 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9333 /* Frame pointer points here. */
9334 frame->frame_pointer_offset = offset;
9338 /* Add outgoing arguments area. Can be skipped if we eliminated
9339 all the function calls as dead code.
9340 Skipping is however impossible when function calls alloca. Alloca
9341 expander assumes that last crtl->outgoing_args_size
9342 of stack frame are unused. */
9343 if (ACCUMULATE_OUTGOING_ARGS
9344 && (!current_function_is_leaf || cfun->calls_alloca
9345 || ix86_current_function_calls_tls_descriptor))
9347 offset += crtl->outgoing_args_size;
9348 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9351 frame->outgoing_arguments_size = 0;
9353 /* Align stack boundary. Only needed if we're calling another function
9355 if (!current_function_is_leaf || cfun->calls_alloca
9356 || ix86_current_function_calls_tls_descriptor)
9357 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9359 /* We've reached end of stack frame. */
9360 frame->stack_pointer_offset = offset;
9362 /* Size prologue needs to allocate. */
9363 to_allocate = offset - frame->sse_reg_save_offset;
9365 if ((!to_allocate && frame->nregs <= 1)
9366 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9367 frame->save_regs_using_mov = false;
9369 if (ix86_using_red_zone ()
9370 && current_function_sp_is_unchanging
9371 && current_function_is_leaf
9372 && !ix86_current_function_calls_tls_descriptor)
9374 frame->red_zone_size = to_allocate;
9375 if (frame->save_regs_using_mov)
9376 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9377 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9378 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9381 frame->red_zone_size = 0;
9382 frame->stack_pointer_offset -= frame->red_zone_size;
9384 /* The SEH frame pointer location is near the bottom of the frame.
9385 This is enforced by the fact that the difference between the
9386 stack pointer and the frame pointer is limited to 240 bytes in
9387 the unwind data structure. */
9392 /* If we can leave the frame pointer where it is, do so. */
9393 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9394 if (diff > 240 || (diff & 15) != 0)
9396 /* Ideally we'd determine what portion of the local stack frame
9397 (within the constraint of the lowest 240) is most heavily used.
9398 But without that complication, simply bias the frame pointer
9399 by 128 bytes so as to maximize the amount of the local stack
9400 frame that is addressable with 8-bit offsets. */
9401 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9406 /* This is semi-inlined memory_address_length, but simplified
9407 since we know that we're always dealing with reg+offset, and
9408 to avoid having to create and discard all that rtl. */
9411 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9417 /* EBP and R13 cannot be encoded without an offset. */
9418 len = (regno == BP_REG || regno == R13_REG);
9420 else if (IN_RANGE (offset, -128, 127))
9423 /* ESP and R12 must be encoded with a SIB byte. */
9424 if (regno == SP_REG || regno == R12_REG)
9430 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9431 The valid base registers are taken from CFUN->MACHINE->FS. */
9434 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9436 const struct machine_function *m = cfun->machine;
9437 rtx base_reg = NULL;
9438 HOST_WIDE_INT base_offset = 0;
9440 if (m->use_fast_prologue_epilogue)
9442 /* Choose the base register most likely to allow the most scheduling
9443 opportunities. Generally FP is valid througout the function,
9444 while DRAP must be reloaded within the epilogue. But choose either
9445 over the SP due to increased encoding size. */
9449 base_reg = hard_frame_pointer_rtx;
9450 base_offset = m->fs.fp_offset - cfa_offset;
9452 else if (m->fs.drap_valid)
9454 base_reg = crtl->drap_reg;
9455 base_offset = 0 - cfa_offset;
9457 else if (m->fs.sp_valid)
9459 base_reg = stack_pointer_rtx;
9460 base_offset = m->fs.sp_offset - cfa_offset;
9465 HOST_WIDE_INT toffset;
9468 /* Choose the base register with the smallest address encoding.
9469 With a tie, choose FP > DRAP > SP. */
9472 base_reg = stack_pointer_rtx;
9473 base_offset = m->fs.sp_offset - cfa_offset;
9474 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9476 if (m->fs.drap_valid)
9478 toffset = 0 - cfa_offset;
9479 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9482 base_reg = crtl->drap_reg;
9483 base_offset = toffset;
9489 toffset = m->fs.fp_offset - cfa_offset;
9490 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9493 base_reg = hard_frame_pointer_rtx;
9494 base_offset = toffset;
9499 gcc_assert (base_reg != NULL);
9501 return plus_constant (base_reg, base_offset);
9504 /* Emit code to save registers in the prologue. */
9507 ix86_emit_save_regs (void)
9512 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9513 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9515 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9516 RTX_FRAME_RELATED_P (insn) = 1;
9520 /* Emit a single register save at CFA - CFA_OFFSET. */
9523 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9524 HOST_WIDE_INT cfa_offset)
9526 struct machine_function *m = cfun->machine;
9527 rtx reg = gen_rtx_REG (mode, regno);
9528 rtx mem, addr, base, insn;
9530 addr = choose_baseaddr (cfa_offset);
9531 mem = gen_frame_mem (mode, addr);
9533 /* For SSE saves, we need to indicate the 128-bit alignment. */
9534 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9536 insn = emit_move_insn (mem, reg);
9537 RTX_FRAME_RELATED_P (insn) = 1;
9540 if (GET_CODE (base) == PLUS)
9541 base = XEXP (base, 0);
9542 gcc_checking_assert (REG_P (base));
9544 /* When saving registers into a re-aligned local stack frame, avoid
9545 any tricky guessing by dwarf2out. */
9546 if (m->fs.realigned)
9548 gcc_checking_assert (stack_realign_drap);
9550 if (regno == REGNO (crtl->drap_reg))
9552 /* A bit of a hack. We force the DRAP register to be saved in
9553 the re-aligned stack frame, which provides us with a copy
9554 of the CFA that will last past the prologue. Install it. */
9555 gcc_checking_assert (cfun->machine->fs.fp_valid);
9556 addr = plus_constant (hard_frame_pointer_rtx,
9557 cfun->machine->fs.fp_offset - cfa_offset);
9558 mem = gen_rtx_MEM (mode, addr);
9559 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9563 /* The frame pointer is a stable reference within the
9564 aligned frame. Use it. */
9565 gcc_checking_assert (cfun->machine->fs.fp_valid);
9566 addr = plus_constant (hard_frame_pointer_rtx,
9567 cfun->machine->fs.fp_offset - cfa_offset);
9568 mem = gen_rtx_MEM (mode, addr);
9569 add_reg_note (insn, REG_CFA_EXPRESSION,
9570 gen_rtx_SET (VOIDmode, mem, reg));
9574 /* The memory may not be relative to the current CFA register,
9575 which means that we may need to generate a new pattern for
9576 use by the unwind info. */
9577 else if (base != m->fs.cfa_reg)
9579 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9580 mem = gen_rtx_MEM (mode, addr);
9581 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9585 /* Emit code to save registers using MOV insns.
9586 First register is stored at CFA - CFA_OFFSET. */
9588 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9592 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9593 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9595 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9596 cfa_offset -= UNITS_PER_WORD;
9600 /* Emit code to save SSE registers using MOV insns.
9601 First register is stored at CFA - CFA_OFFSET. */
9603 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9607 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9608 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9610 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9615 static GTY(()) rtx queued_cfa_restores;
9617 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9618 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9619 Don't add the note if the previously saved value will be left untouched
9620 within stack red-zone till return, as unwinders can find the same value
9621 in the register and on the stack. */
9624 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9626 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9631 add_reg_note (insn, REG_CFA_RESTORE, reg);
9632 RTX_FRAME_RELATED_P (insn) = 1;
9636 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9639 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9642 ix86_add_queued_cfa_restore_notes (rtx insn)
9645 if (!queued_cfa_restores)
9647 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9649 XEXP (last, 1) = REG_NOTES (insn);
9650 REG_NOTES (insn) = queued_cfa_restores;
9651 queued_cfa_restores = NULL_RTX;
9652 RTX_FRAME_RELATED_P (insn) = 1;
9655 /* Expand prologue or epilogue stack adjustment.
9656 The pattern exist to put a dependency on all ebp-based memory accesses.
9657 STYLE should be negative if instructions should be marked as frame related,
9658 zero if %r11 register is live and cannot be freely used and positive
9662 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9663 int style, bool set_cfa)
9665 struct machine_function *m = cfun->machine;
9667 bool add_frame_related_expr = false;
9670 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9671 else if (x86_64_immediate_operand (offset, DImode))
9672 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9676 /* r11 is used by indirect sibcall return as well, set before the
9677 epilogue and used after the epilogue. */
9679 tmp = gen_rtx_REG (DImode, R11_REG);
9682 gcc_assert (src != hard_frame_pointer_rtx
9683 && dest != hard_frame_pointer_rtx);
9684 tmp = hard_frame_pointer_rtx;
9686 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9688 add_frame_related_expr = true;
9690 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9693 insn = emit_insn (insn);
9695 ix86_add_queued_cfa_restore_notes (insn);
9701 gcc_assert (m->fs.cfa_reg == src);
9702 m->fs.cfa_offset += INTVAL (offset);
9703 m->fs.cfa_reg = dest;
9705 r = gen_rtx_PLUS (Pmode, src, offset);
9706 r = gen_rtx_SET (VOIDmode, dest, r);
9707 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9708 RTX_FRAME_RELATED_P (insn) = 1;
9712 RTX_FRAME_RELATED_P (insn) = 1;
9713 if (add_frame_related_expr)
9715 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9716 r = gen_rtx_SET (VOIDmode, dest, r);
9717 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9721 if (dest == stack_pointer_rtx)
9723 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9724 bool valid = m->fs.sp_valid;
9726 if (src == hard_frame_pointer_rtx)
9728 valid = m->fs.fp_valid;
9729 ooffset = m->fs.fp_offset;
9731 else if (src == crtl->drap_reg)
9733 valid = m->fs.drap_valid;
9738 /* Else there are two possibilities: SP itself, which we set
9739 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9740 taken care of this by hand along the eh_return path. */
9741 gcc_checking_assert (src == stack_pointer_rtx
9742 || offset == const0_rtx);
9745 m->fs.sp_offset = ooffset - INTVAL (offset);
9746 m->fs.sp_valid = valid;
9750 /* Find an available register to be used as dynamic realign argument
9751 pointer regsiter. Such a register will be written in prologue and
9752 used in begin of body, so it must not be
9753 1. parameter passing register.
9755 We reuse static-chain register if it is available. Otherwise, we
9756 use DI for i386 and R13 for x86-64. We chose R13 since it has
9759 Return: the regno of chosen register. */
9762 find_drap_reg (void)
9764 tree decl = cfun->decl;
9768 /* Use R13 for nested function or function need static chain.
9769 Since function with tail call may use any caller-saved
9770 registers in epilogue, DRAP must not use caller-saved
9771 register in such case. */
9772 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9779 /* Use DI for nested function or function need static chain.
9780 Since function with tail call may use any caller-saved
9781 registers in epilogue, DRAP must not use caller-saved
9782 register in such case. */
9783 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9786 /* Reuse static chain register if it isn't used for parameter
9788 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
9789 && !lookup_attribute ("fastcall",
9790 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
9791 && !lookup_attribute ("thiscall",
9792 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
9799 /* Return minimum incoming stack alignment. */
9802 ix86_minimum_incoming_stack_boundary (bool sibcall)
9804 unsigned int incoming_stack_boundary;
9806 /* Prefer the one specified at command line. */
9807 if (ix86_user_incoming_stack_boundary)
9808 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9809 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9810 if -mstackrealign is used, it isn't used for sibcall check and
9811 estimated stack alignment is 128bit. */
9814 && ix86_force_align_arg_pointer
9815 && crtl->stack_alignment_estimated == 128)
9816 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9818 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9820 /* Incoming stack alignment can be changed on individual functions
9821 via force_align_arg_pointer attribute. We use the smallest
9822 incoming stack boundary. */
9823 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9824 && lookup_attribute (ix86_force_align_arg_pointer_string,
9825 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9826 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9828 /* The incoming stack frame has to be aligned at least at
9829 parm_stack_boundary. */
9830 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9831 incoming_stack_boundary = crtl->parm_stack_boundary;
9833 /* Stack at entrance of main is aligned by runtime. We use the
9834 smallest incoming stack boundary. */
9835 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9836 && DECL_NAME (current_function_decl)
9837 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9838 && DECL_FILE_SCOPE_P (current_function_decl))
9839 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9841 return incoming_stack_boundary;
9844 /* Update incoming stack boundary and estimated stack alignment. */
9847 ix86_update_stack_boundary (void)
9849 ix86_incoming_stack_boundary
9850 = ix86_minimum_incoming_stack_boundary (false);
9852 /* x86_64 vararg needs 16byte stack alignment for register save
9856 && crtl->stack_alignment_estimated < 128)
9857 crtl->stack_alignment_estimated = 128;
9860 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9861 needed or an rtx for DRAP otherwise. */
9864 ix86_get_drap_rtx (void)
9866 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9867 crtl->need_drap = true;
9869 if (stack_realign_drap)
9871 /* Assign DRAP to vDRAP and returns vDRAP */
9872 unsigned int regno = find_drap_reg ();
9877 arg_ptr = gen_rtx_REG (Pmode, regno);
9878 crtl->drap_reg = arg_ptr;
9881 drap_vreg = copy_to_reg (arg_ptr);
9885 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9888 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9889 RTX_FRAME_RELATED_P (insn) = 1;
9897 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9900 ix86_internal_arg_pointer (void)
9902 return virtual_incoming_args_rtx;
9905 struct scratch_reg {
9910 /* Return a short-lived scratch register for use on function entry.
9911 In 32-bit mode, it is valid only after the registers are saved
9912 in the prologue. This register must be released by means of
9913 release_scratch_register_on_entry once it is dead. */
9916 get_scratch_register_on_entry (struct scratch_reg *sr)
9924 /* We always use R11 in 64-bit mode. */
9929 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9931 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9932 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9933 int regparm = ix86_function_regparm (fntype, decl);
9935 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9937 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9938 for the static chain register. */
9939 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9940 && drap_regno != AX_REG)
9942 else if (regparm < 2 && drap_regno != DX_REG)
9944 /* ecx is the static chain register. */
9945 else if (regparm < 3 && !fastcall_p && !static_chain_p
9946 && drap_regno != CX_REG)
9948 else if (ix86_save_reg (BX_REG, true))
9950 /* esi is the static chain register. */
9951 else if (!(regparm == 3 && static_chain_p)
9952 && ix86_save_reg (SI_REG, true))
9954 else if (ix86_save_reg (DI_REG, true))
9958 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9963 sr->reg = gen_rtx_REG (Pmode, regno);
9966 rtx insn = emit_insn (gen_push (sr->reg));
9967 RTX_FRAME_RELATED_P (insn) = 1;
9971 /* Release a scratch register obtained from the preceding function. */
9974 release_scratch_register_on_entry (struct scratch_reg *sr)
9978 rtx x, insn = emit_insn (gen_pop (sr->reg));
9980 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9981 RTX_FRAME_RELATED_P (insn) = 1;
9982 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9983 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9984 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9988 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9990 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9993 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9995 /* We skip the probe for the first interval + a small dope of 4 words and
9996 probe that many bytes past the specified size to maintain a protection
9997 area at the botton of the stack. */
9998 const int dope = 4 * UNITS_PER_WORD;
9999 rtx size_rtx = GEN_INT (size);
10001 /* See if we have a constant small number of probes to generate. If so,
10002 that's the easy case. The run-time loop is made up of 11 insns in the
10003 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10004 for n # of intervals. */
10005 if (size <= 5 * PROBE_INTERVAL)
10007 HOST_WIDE_INT i, adjust;
10008 bool first_probe = true;
10010 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10011 values of N from 1 until it exceeds SIZE. If only one probe is
10012 needed, this will not generate any code. Then adjust and probe
10013 to PROBE_INTERVAL + SIZE. */
10014 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10018 adjust = 2 * PROBE_INTERVAL + dope;
10019 first_probe = false;
10022 adjust = PROBE_INTERVAL;
10024 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10025 plus_constant (stack_pointer_rtx, -adjust)));
10026 emit_stack_probe (stack_pointer_rtx);
10030 adjust = size + PROBE_INTERVAL + dope;
10032 adjust = size + PROBE_INTERVAL - i;
10034 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10035 plus_constant (stack_pointer_rtx, -adjust)));
10036 emit_stack_probe (stack_pointer_rtx);
10038 /* Adjust back to account for the additional first interval. */
10039 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10040 plus_constant (stack_pointer_rtx,
10041 PROBE_INTERVAL + dope)));
10044 /* Otherwise, do the same as above, but in a loop. Note that we must be
10045 extra careful with variables wrapping around because we might be at
10046 the very top (or the very bottom) of the address space and we have
10047 to be able to handle this case properly; in particular, we use an
10048 equality test for the loop condition. */
10051 HOST_WIDE_INT rounded_size;
10052 struct scratch_reg sr;
10054 get_scratch_register_on_entry (&sr);
10057 /* Step 1: round SIZE to the previous multiple of the interval. */
10059 rounded_size = size & -PROBE_INTERVAL;
10062 /* Step 2: compute initial and final value of the loop counter. */
10064 /* SP = SP_0 + PROBE_INTERVAL. */
10065 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10066 plus_constant (stack_pointer_rtx,
10067 - (PROBE_INTERVAL + dope))));
10069 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10070 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10071 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10072 gen_rtx_PLUS (Pmode, sr.reg,
10073 stack_pointer_rtx)));
10076 /* Step 3: the loop
10078 while (SP != LAST_ADDR)
10080 SP = SP + PROBE_INTERVAL
10084 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10085 values of N from 1 until it is equal to ROUNDED_SIZE. */
10087 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10090 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10091 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10093 if (size != rounded_size)
10095 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10096 plus_constant (stack_pointer_rtx,
10097 rounded_size - size)));
10098 emit_stack_probe (stack_pointer_rtx);
10101 /* Adjust back to account for the additional first interval. */
10102 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10103 plus_constant (stack_pointer_rtx,
10104 PROBE_INTERVAL + dope)));
10106 release_scratch_register_on_entry (&sr);
10109 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10110 cfun->machine->fs.sp_offset += size;
10112 /* Make sure nothing is scheduled before we are done. */
10113 emit_insn (gen_blockage ());
10116 /* Adjust the stack pointer up to REG while probing it. */
10119 output_adjust_stack_and_probe (rtx reg)
10121 static int labelno = 0;
10122 char loop_lab[32], end_lab[32];
10125 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10126 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10128 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10130 /* Jump to END_LAB if SP == LAST_ADDR. */
10131 xops[0] = stack_pointer_rtx;
10133 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10134 fputs ("\tje\t", asm_out_file);
10135 assemble_name_raw (asm_out_file, end_lab);
10136 fputc ('\n', asm_out_file);
10138 /* SP = SP + PROBE_INTERVAL. */
10139 xops[1] = GEN_INT (PROBE_INTERVAL);
10140 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10143 xops[1] = const0_rtx;
10144 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10146 fprintf (asm_out_file, "\tjmp\t");
10147 assemble_name_raw (asm_out_file, loop_lab);
10148 fputc ('\n', asm_out_file);
10150 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10155 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10156 inclusive. These are offsets from the current stack pointer. */
10159 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10161 /* See if we have a constant small number of probes to generate. If so,
10162 that's the easy case. The run-time loop is made up of 7 insns in the
10163 generic case while the compile-time loop is made up of n insns for n #
10165 if (size <= 7 * PROBE_INTERVAL)
10169 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10170 it exceeds SIZE. If only one probe is needed, this will not
10171 generate any code. Then probe at FIRST + SIZE. */
10172 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10173 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
10175 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
10178 /* Otherwise, do the same as above, but in a loop. Note that we must be
10179 extra careful with variables wrapping around because we might be at
10180 the very top (or the very bottom) of the address space and we have
10181 to be able to handle this case properly; in particular, we use an
10182 equality test for the loop condition. */
10185 HOST_WIDE_INT rounded_size, last;
10186 struct scratch_reg sr;
10188 get_scratch_register_on_entry (&sr);
10191 /* Step 1: round SIZE to the previous multiple of the interval. */
10193 rounded_size = size & -PROBE_INTERVAL;
10196 /* Step 2: compute initial and final value of the loop counter. */
10198 /* TEST_OFFSET = FIRST. */
10199 emit_move_insn (sr.reg, GEN_INT (-first));
10201 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10202 last = first + rounded_size;
10205 /* Step 3: the loop
10207 while (TEST_ADDR != LAST_ADDR)
10209 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10213 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10214 until it is equal to ROUNDED_SIZE. */
10216 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10219 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10220 that SIZE is equal to ROUNDED_SIZE. */
10222 if (size != rounded_size)
10223 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
10226 rounded_size - size));
10228 release_scratch_register_on_entry (&sr);
10231 /* Make sure nothing is scheduled before we are done. */
10232 emit_insn (gen_blockage ());
10235 /* Probe a range of stack addresses from REG to END, inclusive. These are
10236 offsets from the current stack pointer. */
10239 output_probe_stack_range (rtx reg, rtx end)
10241 static int labelno = 0;
10242 char loop_lab[32], end_lab[32];
10245 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10246 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10248 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10250 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10253 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10254 fputs ("\tje\t", asm_out_file);
10255 assemble_name_raw (asm_out_file, end_lab);
10256 fputc ('\n', asm_out_file);
10258 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10259 xops[1] = GEN_INT (PROBE_INTERVAL);
10260 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10262 /* Probe at TEST_ADDR. */
10263 xops[0] = stack_pointer_rtx;
10265 xops[2] = const0_rtx;
10266 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10268 fprintf (asm_out_file, "\tjmp\t");
10269 assemble_name_raw (asm_out_file, loop_lab);
10270 fputc ('\n', asm_out_file);
10272 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10277 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10278 to be generated in correct form. */
10280 ix86_finalize_stack_realign_flags (void)
10282 /* Check if stack realign is really needed after reload, and
10283 stores result in cfun */
10284 unsigned int incoming_stack_boundary
10285 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10286 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10287 unsigned int stack_realign = (incoming_stack_boundary
10288 < (current_function_is_leaf
10289 ? crtl->max_used_stack_slot_alignment
10290 : crtl->stack_alignment_needed));
10292 if (crtl->stack_realign_finalized)
10294 /* After stack_realign_needed is finalized, we can't no longer
10296 gcc_assert (crtl->stack_realign_needed == stack_realign);
10300 crtl->stack_realign_needed = stack_realign;
10301 crtl->stack_realign_finalized = true;
10305 /* Expand the prologue into a bunch of separate insns. */
10308 ix86_expand_prologue (void)
10310 struct machine_function *m = cfun->machine;
10313 struct ix86_frame frame;
10314 HOST_WIDE_INT allocate;
10315 bool int_registers_saved;
10317 ix86_finalize_stack_realign_flags ();
10319 /* DRAP should not coexist with stack_realign_fp */
10320 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10322 memset (&m->fs, 0, sizeof (m->fs));
10324 /* Initialize CFA state for before the prologue. */
10325 m->fs.cfa_reg = stack_pointer_rtx;
10326 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10328 /* Track SP offset to the CFA. We continue tracking this after we've
10329 swapped the CFA register away from SP. In the case of re-alignment
10330 this is fudged; we're interested to offsets within the local frame. */
10331 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10332 m->fs.sp_valid = true;
10334 ix86_compute_frame_layout (&frame);
10336 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10338 /* We should have already generated an error for any use of
10339 ms_hook on a nested function. */
10340 gcc_checking_assert (!ix86_static_chain_on_stack);
10342 /* Check if profiling is active and we shall use profiling before
10343 prologue variant. If so sorry. */
10344 if (crtl->profile && flag_fentry != 0)
10345 sorry ("ms_hook_prologue attribute isn%'t compatible "
10346 "with -mfentry for 32-bit");
10348 /* In ix86_asm_output_function_label we emitted:
10349 8b ff movl.s %edi,%edi
10351 8b ec movl.s %esp,%ebp
10353 This matches the hookable function prologue in Win32 API
10354 functions in Microsoft Windows XP Service Pack 2 and newer.
10355 Wine uses this to enable Windows apps to hook the Win32 API
10356 functions provided by Wine.
10358 What that means is that we've already set up the frame pointer. */
10360 if (frame_pointer_needed
10361 && !(crtl->drap_reg && crtl->stack_realign_needed))
10365 /* We've decided to use the frame pointer already set up.
10366 Describe this to the unwinder by pretending that both
10367 push and mov insns happen right here.
10369 Putting the unwind info here at the end of the ms_hook
10370 is done so that we can make absolutely certain we get
10371 the required byte sequence at the start of the function,
10372 rather than relying on an assembler that can produce
10373 the exact encoding required.
10375 However it does mean (in the unpatched case) that we have
10376 a 1 insn window where the asynchronous unwind info is
10377 incorrect. However, if we placed the unwind info at
10378 its correct location we would have incorrect unwind info
10379 in the patched case. Which is probably all moot since
10380 I don't expect Wine generates dwarf2 unwind info for the
10381 system libraries that use this feature. */
10383 insn = emit_insn (gen_blockage ());
10385 push = gen_push (hard_frame_pointer_rtx);
10386 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10387 stack_pointer_rtx);
10388 RTX_FRAME_RELATED_P (push) = 1;
10389 RTX_FRAME_RELATED_P (mov) = 1;
10391 RTX_FRAME_RELATED_P (insn) = 1;
10392 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10393 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10395 /* Note that gen_push incremented m->fs.cfa_offset, even
10396 though we didn't emit the push insn here. */
10397 m->fs.cfa_reg = hard_frame_pointer_rtx;
10398 m->fs.fp_offset = m->fs.cfa_offset;
10399 m->fs.fp_valid = true;
10403 /* The frame pointer is not needed so pop %ebp again.
10404 This leaves us with a pristine state. */
10405 emit_insn (gen_pop (hard_frame_pointer_rtx));
10409 /* The first insn of a function that accepts its static chain on the
10410 stack is to push the register that would be filled in by a direct
10411 call. This insn will be skipped by the trampoline. */
10412 else if (ix86_static_chain_on_stack)
10414 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10415 emit_insn (gen_blockage ());
10417 /* We don't want to interpret this push insn as a register save,
10418 only as a stack adjustment. The real copy of the register as
10419 a save will be done later, if needed. */
10420 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10421 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10422 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10423 RTX_FRAME_RELATED_P (insn) = 1;
10426 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10427 of DRAP is needed and stack realignment is really needed after reload */
10428 if (stack_realign_drap)
10430 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10432 /* Only need to push parameter pointer reg if it is caller saved. */
10433 if (!call_used_regs[REGNO (crtl->drap_reg)])
10435 /* Push arg pointer reg */
10436 insn = emit_insn (gen_push (crtl->drap_reg));
10437 RTX_FRAME_RELATED_P (insn) = 1;
10440 /* Grab the argument pointer. */
10441 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10442 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10443 RTX_FRAME_RELATED_P (insn) = 1;
10444 m->fs.cfa_reg = crtl->drap_reg;
10445 m->fs.cfa_offset = 0;
10447 /* Align the stack. */
10448 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10450 GEN_INT (-align_bytes)));
10451 RTX_FRAME_RELATED_P (insn) = 1;
10453 /* Replicate the return address on the stack so that return
10454 address can be reached via (argp - 1) slot. This is needed
10455 to implement macro RETURN_ADDR_RTX and intrinsic function
10456 expand_builtin_return_addr etc. */
10457 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10458 t = gen_frame_mem (Pmode, t);
10459 insn = emit_insn (gen_push (t));
10460 RTX_FRAME_RELATED_P (insn) = 1;
10462 /* For the purposes of frame and register save area addressing,
10463 we've started over with a new frame. */
10464 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10465 m->fs.realigned = true;
10468 if (frame_pointer_needed && !m->fs.fp_valid)
10470 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10471 slower on all targets. Also sdb doesn't like it. */
10472 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10473 RTX_FRAME_RELATED_P (insn) = 1;
10475 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10477 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10478 RTX_FRAME_RELATED_P (insn) = 1;
10480 if (m->fs.cfa_reg == stack_pointer_rtx)
10481 m->fs.cfa_reg = hard_frame_pointer_rtx;
10482 m->fs.fp_offset = m->fs.sp_offset;
10483 m->fs.fp_valid = true;
10487 int_registers_saved = (frame.nregs == 0);
10489 if (!int_registers_saved)
10491 /* If saving registers via PUSH, do so now. */
10492 if (!frame.save_regs_using_mov)
10494 ix86_emit_save_regs ();
10495 int_registers_saved = true;
10496 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10499 /* When using red zone we may start register saving before allocating
10500 the stack frame saving one cycle of the prologue. However, avoid
10501 doing this if we have to probe the stack; at least on x86_64 the
10502 stack probe can turn into a call that clobbers a red zone location. */
10503 else if (ix86_using_red_zone ()
10504 && (! TARGET_STACK_PROBE
10505 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10507 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10508 int_registers_saved = true;
10512 if (stack_realign_fp)
10514 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10515 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10517 /* The computation of the size of the re-aligned stack frame means
10518 that we must allocate the size of the register save area before
10519 performing the actual alignment. Otherwise we cannot guarantee
10520 that there's enough storage above the realignment point. */
10521 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10522 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10523 GEN_INT (m->fs.sp_offset
10524 - frame.sse_reg_save_offset),
10527 /* Align the stack. */
10528 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10530 GEN_INT (-align_bytes)));
10532 /* For the purposes of register save area addressing, the stack
10533 pointer is no longer valid. As for the value of sp_offset,
10534 see ix86_compute_frame_layout, which we need to match in order
10535 to pass verification of stack_pointer_offset at the end. */
10536 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10537 m->fs.sp_valid = false;
10540 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10542 if (flag_stack_usage)
10544 /* We start to count from ARG_POINTER. */
10545 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10547 /* If it was realigned, take into account the fake frame. */
10548 if (stack_realign_drap)
10550 if (ix86_static_chain_on_stack)
10551 stack_size += UNITS_PER_WORD;
10553 if (!call_used_regs[REGNO (crtl->drap_reg)])
10554 stack_size += UNITS_PER_WORD;
10556 /* This over-estimates by 1 minimal-stack-alignment-unit but
10557 mitigates that by counting in the new return address slot. */
10558 current_function_dynamic_stack_size
10559 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10562 current_function_static_stack_size = stack_size;
10565 /* The stack has already been decremented by the instruction calling us
10566 so we need to probe unconditionally to preserve the protection area. */
10567 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10569 /* We expect the registers to be saved when probes are used. */
10570 gcc_assert (int_registers_saved);
10572 if (STACK_CHECK_MOVING_SP)
10574 ix86_adjust_stack_and_probe (allocate);
10579 HOST_WIDE_INT size = allocate;
10581 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10582 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10584 if (TARGET_STACK_PROBE)
10585 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10587 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10593 else if (!ix86_target_stack_probe ()
10594 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10596 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10597 GEN_INT (-allocate), -1,
10598 m->fs.cfa_reg == stack_pointer_rtx);
10602 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10604 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10606 bool eax_live = false;
10607 bool r10_live = false;
10610 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10611 if (!TARGET_64BIT_MS_ABI)
10612 eax_live = ix86_eax_live_at_start_p ();
10616 emit_insn (gen_push (eax));
10617 allocate -= UNITS_PER_WORD;
10621 r10 = gen_rtx_REG (Pmode, R10_REG);
10622 emit_insn (gen_push (r10));
10623 allocate -= UNITS_PER_WORD;
10626 emit_move_insn (eax, GEN_INT (allocate));
10627 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10629 /* Use the fact that AX still contains ALLOCATE. */
10630 adjust_stack_insn = (TARGET_64BIT
10631 ? gen_pro_epilogue_adjust_stack_di_sub
10632 : gen_pro_epilogue_adjust_stack_si_sub);
10634 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10635 stack_pointer_rtx, eax));
10637 /* Note that SEH directives need to continue tracking the stack
10638 pointer even after the frame pointer has been set up. */
10639 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10641 if (m->fs.cfa_reg == stack_pointer_rtx)
10642 m->fs.cfa_offset += allocate;
10644 RTX_FRAME_RELATED_P (insn) = 1;
10645 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10646 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10647 plus_constant (stack_pointer_rtx,
10650 m->fs.sp_offset += allocate;
10652 if (r10_live && eax_live)
10654 t = choose_baseaddr (m->fs.sp_offset - allocate);
10655 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10656 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10657 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10659 else if (eax_live || r10_live)
10661 t = choose_baseaddr (m->fs.sp_offset - allocate);
10662 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10665 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10667 /* If we havn't already set up the frame pointer, do so now. */
10668 if (frame_pointer_needed && !m->fs.fp_valid)
10670 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10671 GEN_INT (frame.stack_pointer_offset
10672 - frame.hard_frame_pointer_offset));
10673 insn = emit_insn (insn);
10674 RTX_FRAME_RELATED_P (insn) = 1;
10675 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10677 if (m->fs.cfa_reg == stack_pointer_rtx)
10678 m->fs.cfa_reg = hard_frame_pointer_rtx;
10679 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10680 m->fs.fp_valid = true;
10683 if (!int_registers_saved)
10684 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10685 if (frame.nsseregs)
10686 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10688 pic_reg_used = false;
10689 if (pic_offset_table_rtx
10690 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10693 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10695 if (alt_pic_reg_used != INVALID_REGNUM)
10696 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10698 pic_reg_used = true;
10705 if (ix86_cmodel == CM_LARGE_PIC)
10707 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10708 rtx label = gen_label_rtx ();
10709 emit_label (label);
10710 LABEL_PRESERVE_P (label) = 1;
10711 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10712 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10713 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10714 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10715 pic_offset_table_rtx, tmp_reg));
10718 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10721 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10724 /* In the pic_reg_used case, make sure that the got load isn't deleted
10725 when mcount needs it. Blockage to avoid call movement across mcount
10726 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10728 if (crtl->profile && !flag_fentry && pic_reg_used)
10729 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10731 if (crtl->drap_reg && !crtl->stack_realign_needed)
10733 /* vDRAP is setup but after reload it turns out stack realign
10734 isn't necessary, here we will emit prologue to setup DRAP
10735 without stack realign adjustment */
10736 t = choose_baseaddr (0);
10737 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10740 /* Prevent instructions from being scheduled into register save push
10741 sequence when access to the redzone area is done through frame pointer.
10742 The offset between the frame pointer and the stack pointer is calculated
10743 relative to the value of the stack pointer at the end of the function
10744 prologue, and moving instructions that access redzone area via frame
10745 pointer inside push sequence violates this assumption. */
10746 if (frame_pointer_needed && frame.red_zone_size)
10747 emit_insn (gen_memory_blockage ());
10749 /* Emit cld instruction if stringops are used in the function. */
10750 if (TARGET_CLD && ix86_current_function_needs_cld)
10751 emit_insn (gen_cld ());
10753 /* SEH requires that the prologue end within 256 bytes of the start of
10754 the function. Prevent instruction schedules that would extend that. */
10756 emit_insn (gen_blockage ());
10759 /* Emit code to restore REG using a POP insn. */
10762 ix86_emit_restore_reg_using_pop (rtx reg)
10764 struct machine_function *m = cfun->machine;
10765 rtx insn = emit_insn (gen_pop (reg));
10767 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10768 m->fs.sp_offset -= UNITS_PER_WORD;
10770 if (m->fs.cfa_reg == crtl->drap_reg
10771 && REGNO (reg) == REGNO (crtl->drap_reg))
10773 /* Previously we'd represented the CFA as an expression
10774 like *(%ebp - 8). We've just popped that value from
10775 the stack, which means we need to reset the CFA to
10776 the drap register. This will remain until we restore
10777 the stack pointer. */
10778 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10779 RTX_FRAME_RELATED_P (insn) = 1;
10781 /* This means that the DRAP register is valid for addressing too. */
10782 m->fs.drap_valid = true;
10786 if (m->fs.cfa_reg == stack_pointer_rtx)
10788 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10789 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10790 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10791 RTX_FRAME_RELATED_P (insn) = 1;
10793 m->fs.cfa_offset -= UNITS_PER_WORD;
10796 /* When the frame pointer is the CFA, and we pop it, we are
10797 swapping back to the stack pointer as the CFA. This happens
10798 for stack frames that don't allocate other data, so we assume
10799 the stack pointer is now pointing at the return address, i.e.
10800 the function entry state, which makes the offset be 1 word. */
10801 if (reg == hard_frame_pointer_rtx)
10803 m->fs.fp_valid = false;
10804 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10806 m->fs.cfa_reg = stack_pointer_rtx;
10807 m->fs.cfa_offset -= UNITS_PER_WORD;
10809 add_reg_note (insn, REG_CFA_DEF_CFA,
10810 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10811 GEN_INT (m->fs.cfa_offset)));
10812 RTX_FRAME_RELATED_P (insn) = 1;
10817 /* Emit code to restore saved registers using POP insns. */
10820 ix86_emit_restore_regs_using_pop (void)
10822 unsigned int regno;
10824 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10825 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10826 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10829 /* Emit code and notes for the LEAVE instruction. */
10832 ix86_emit_leave (void)
10834 struct machine_function *m = cfun->machine;
10835 rtx insn = emit_insn (ix86_gen_leave ());
10837 ix86_add_queued_cfa_restore_notes (insn);
10839 gcc_assert (m->fs.fp_valid);
10840 m->fs.sp_valid = true;
10841 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10842 m->fs.fp_valid = false;
10844 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10846 m->fs.cfa_reg = stack_pointer_rtx;
10847 m->fs.cfa_offset = m->fs.sp_offset;
10849 add_reg_note (insn, REG_CFA_DEF_CFA,
10850 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10851 RTX_FRAME_RELATED_P (insn) = 1;
10852 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10857 /* Emit code to restore saved registers using MOV insns.
10858 First register is restored from CFA - CFA_OFFSET. */
10860 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10861 int maybe_eh_return)
10863 struct machine_function *m = cfun->machine;
10864 unsigned int regno;
10866 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10867 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10869 rtx reg = gen_rtx_REG (Pmode, regno);
10872 mem = choose_baseaddr (cfa_offset);
10873 mem = gen_frame_mem (Pmode, mem);
10874 insn = emit_move_insn (reg, mem);
10876 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10878 /* Previously we'd represented the CFA as an expression
10879 like *(%ebp - 8). We've just popped that value from
10880 the stack, which means we need to reset the CFA to
10881 the drap register. This will remain until we restore
10882 the stack pointer. */
10883 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10884 RTX_FRAME_RELATED_P (insn) = 1;
10886 /* This means that the DRAP register is valid for addressing. */
10887 m->fs.drap_valid = true;
10890 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10892 cfa_offset -= UNITS_PER_WORD;
10896 /* Emit code to restore saved registers using MOV insns.
10897 First register is restored from CFA - CFA_OFFSET. */
10899 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10900 int maybe_eh_return)
10902 unsigned int regno;
10904 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10905 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10907 rtx reg = gen_rtx_REG (V4SFmode, regno);
10910 mem = choose_baseaddr (cfa_offset);
10911 mem = gen_rtx_MEM (V4SFmode, mem);
10912 set_mem_align (mem, 128);
10913 emit_move_insn (reg, mem);
10915 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10921 /* Restore function stack, frame, and registers. */
10924 ix86_expand_epilogue (int style)
10926 struct machine_function *m = cfun->machine;
10927 struct machine_frame_state frame_state_save = m->fs;
10928 struct ix86_frame frame;
10929 bool restore_regs_via_mov;
10932 ix86_finalize_stack_realign_flags ();
10933 ix86_compute_frame_layout (&frame);
10935 m->fs.sp_valid = (!frame_pointer_needed
10936 || (current_function_sp_is_unchanging
10937 && !stack_realign_fp));
10938 gcc_assert (!m->fs.sp_valid
10939 || m->fs.sp_offset == frame.stack_pointer_offset);
10941 /* The FP must be valid if the frame pointer is present. */
10942 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10943 gcc_assert (!m->fs.fp_valid
10944 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10946 /* We must have *some* valid pointer to the stack frame. */
10947 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10949 /* The DRAP is never valid at this point. */
10950 gcc_assert (!m->fs.drap_valid);
10952 /* See the comment about red zone and frame
10953 pointer usage in ix86_expand_prologue. */
10954 if (frame_pointer_needed && frame.red_zone_size)
10955 emit_insn (gen_memory_blockage ());
10957 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10958 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10960 /* Determine the CFA offset of the end of the red-zone. */
10961 m->fs.red_zone_offset = 0;
10962 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10964 /* The red-zone begins below the return address. */
10965 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10967 /* When the register save area is in the aligned portion of
10968 the stack, determine the maximum runtime displacement that
10969 matches up with the aligned frame. */
10970 if (stack_realign_drap)
10971 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10975 /* Special care must be taken for the normal return case of a function
10976 using eh_return: the eax and edx registers are marked as saved, but
10977 not restored along this path. Adjust the save location to match. */
10978 if (crtl->calls_eh_return && style != 2)
10979 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10981 /* EH_RETURN requires the use of moves to function properly. */
10982 if (crtl->calls_eh_return)
10983 restore_regs_via_mov = true;
10984 /* SEH requires the use of pops to identify the epilogue. */
10985 else if (TARGET_SEH)
10986 restore_regs_via_mov = false;
10987 /* If we're only restoring one register and sp is not valid then
10988 using a move instruction to restore the register since it's
10989 less work than reloading sp and popping the register. */
10990 else if (!m->fs.sp_valid && frame.nregs <= 1)
10991 restore_regs_via_mov = true;
10992 else if (TARGET_EPILOGUE_USING_MOVE
10993 && cfun->machine->use_fast_prologue_epilogue
10994 && (frame.nregs > 1
10995 || m->fs.sp_offset != frame.reg_save_offset))
10996 restore_regs_via_mov = true;
10997 else if (frame_pointer_needed
10999 && m->fs.sp_offset != frame.reg_save_offset)
11000 restore_regs_via_mov = true;
11001 else if (frame_pointer_needed
11002 && TARGET_USE_LEAVE
11003 && cfun->machine->use_fast_prologue_epilogue
11004 && frame.nregs == 1)
11005 restore_regs_via_mov = true;
11007 restore_regs_via_mov = false;
11009 if (restore_regs_via_mov || frame.nsseregs)
11011 /* Ensure that the entire register save area is addressable via
11012 the stack pointer, if we will restore via sp. */
11014 && m->fs.sp_offset > 0x7fffffff
11015 && !(m->fs.fp_valid || m->fs.drap_valid)
11016 && (frame.nsseregs + frame.nregs) != 0)
11018 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11019 GEN_INT (m->fs.sp_offset
11020 - frame.sse_reg_save_offset),
11022 m->fs.cfa_reg == stack_pointer_rtx);
11026 /* If there are any SSE registers to restore, then we have to do it
11027 via moves, since there's obviously no pop for SSE regs. */
11028 if (frame.nsseregs)
11029 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11032 if (restore_regs_via_mov)
11037 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11039 /* eh_return epilogues need %ecx added to the stack pointer. */
11042 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11044 /* Stack align doesn't work with eh_return. */
11045 gcc_assert (!stack_realign_drap);
11046 /* Neither does regparm nested functions. */
11047 gcc_assert (!ix86_static_chain_on_stack);
11049 if (frame_pointer_needed)
11051 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11052 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
11053 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11055 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11056 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11058 /* Note that we use SA as a temporary CFA, as the return
11059 address is at the proper place relative to it. We
11060 pretend this happens at the FP restore insn because
11061 prior to this insn the FP would be stored at the wrong
11062 offset relative to SA, and after this insn we have no
11063 other reasonable register to use for the CFA. We don't
11064 bother resetting the CFA to the SP for the duration of
11065 the return insn. */
11066 add_reg_note (insn, REG_CFA_DEF_CFA,
11067 plus_constant (sa, UNITS_PER_WORD));
11068 ix86_add_queued_cfa_restore_notes (insn);
11069 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11070 RTX_FRAME_RELATED_P (insn) = 1;
11072 m->fs.cfa_reg = sa;
11073 m->fs.cfa_offset = UNITS_PER_WORD;
11074 m->fs.fp_valid = false;
11076 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11077 const0_rtx, style, false);
11081 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11082 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
11083 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11084 ix86_add_queued_cfa_restore_notes (insn);
11086 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11087 if (m->fs.cfa_offset != UNITS_PER_WORD)
11089 m->fs.cfa_offset = UNITS_PER_WORD;
11090 add_reg_note (insn, REG_CFA_DEF_CFA,
11091 plus_constant (stack_pointer_rtx,
11093 RTX_FRAME_RELATED_P (insn) = 1;
11096 m->fs.sp_offset = UNITS_PER_WORD;
11097 m->fs.sp_valid = true;
11102 /* SEH requires that the function end with (1) a stack adjustment
11103 if necessary, (2) a sequence of pops, and (3) a return or
11104 jump instruction. Prevent insns from the function body from
11105 being scheduled into this sequence. */
11108 /* Prevent a catch region from being adjacent to the standard
11109 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11110 several other flags that would be interesting to test are
11112 if (flag_non_call_exceptions)
11113 emit_insn (gen_nops (const1_rtx));
11115 emit_insn (gen_blockage ());
11118 /* First step is to deallocate the stack frame so that we can
11119 pop the registers. */
11120 if (!m->fs.sp_valid)
11122 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11123 GEN_INT (m->fs.fp_offset
11124 - frame.reg_save_offset),
11127 else if (m->fs.sp_offset != frame.reg_save_offset)
11129 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11130 GEN_INT (m->fs.sp_offset
11131 - frame.reg_save_offset),
11133 m->fs.cfa_reg == stack_pointer_rtx);
11136 ix86_emit_restore_regs_using_pop ();
11139 /* If we used a stack pointer and haven't already got rid of it,
11141 if (m->fs.fp_valid)
11143 /* If the stack pointer is valid and pointing at the frame
11144 pointer store address, then we only need a pop. */
11145 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11146 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11147 /* Leave results in shorter dependency chains on CPUs that are
11148 able to grok it fast. */
11149 else if (TARGET_USE_LEAVE
11150 || optimize_function_for_size_p (cfun)
11151 || !cfun->machine->use_fast_prologue_epilogue)
11152 ix86_emit_leave ();
11155 pro_epilogue_adjust_stack (stack_pointer_rtx,
11156 hard_frame_pointer_rtx,
11157 const0_rtx, style, !using_drap);
11158 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11164 int param_ptr_offset = UNITS_PER_WORD;
11167 gcc_assert (stack_realign_drap);
11169 if (ix86_static_chain_on_stack)
11170 param_ptr_offset += UNITS_PER_WORD;
11171 if (!call_used_regs[REGNO (crtl->drap_reg)])
11172 param_ptr_offset += UNITS_PER_WORD;
11174 insn = emit_insn (gen_rtx_SET
11175 (VOIDmode, stack_pointer_rtx,
11176 gen_rtx_PLUS (Pmode,
11178 GEN_INT (-param_ptr_offset))));
11179 m->fs.cfa_reg = stack_pointer_rtx;
11180 m->fs.cfa_offset = param_ptr_offset;
11181 m->fs.sp_offset = param_ptr_offset;
11182 m->fs.realigned = false;
11184 add_reg_note (insn, REG_CFA_DEF_CFA,
11185 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11186 GEN_INT (param_ptr_offset)));
11187 RTX_FRAME_RELATED_P (insn) = 1;
11189 if (!call_used_regs[REGNO (crtl->drap_reg)])
11190 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11193 /* At this point the stack pointer must be valid, and we must have
11194 restored all of the registers. We may not have deallocated the
11195 entire stack frame. We've delayed this until now because it may
11196 be possible to merge the local stack deallocation with the
11197 deallocation forced by ix86_static_chain_on_stack. */
11198 gcc_assert (m->fs.sp_valid);
11199 gcc_assert (!m->fs.fp_valid);
11200 gcc_assert (!m->fs.realigned);
11201 if (m->fs.sp_offset != UNITS_PER_WORD)
11203 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11204 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11208 /* Sibcall epilogues don't want a return instruction. */
11211 m->fs = frame_state_save;
11215 /* Emit vzeroupper if needed. */
11216 if (TARGET_VZEROUPPER
11217 && !TREE_THIS_VOLATILE (cfun->decl)
11218 && !cfun->machine->caller_return_avx256_p)
11219 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
11221 if (crtl->args.pops_args && crtl->args.size)
11223 rtx popc = GEN_INT (crtl->args.pops_args);
11225 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11226 address, do explicit add, and jump indirectly to the caller. */
11228 if (crtl->args.pops_args >= 65536)
11230 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11233 /* There is no "pascal" calling convention in any 64bit ABI. */
11234 gcc_assert (!TARGET_64BIT);
11236 insn = emit_insn (gen_pop (ecx));
11237 m->fs.cfa_offset -= UNITS_PER_WORD;
11238 m->fs.sp_offset -= UNITS_PER_WORD;
11240 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11241 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11242 add_reg_note (insn, REG_CFA_REGISTER,
11243 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11244 RTX_FRAME_RELATED_P (insn) = 1;
11246 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11248 emit_jump_insn (gen_return_indirect_internal (ecx));
11251 emit_jump_insn (gen_return_pop_internal (popc));
11254 emit_jump_insn (gen_return_internal ());
11256 /* Restore the state back to the state from the prologue,
11257 so that it's correct for the next epilogue. */
11258 m->fs = frame_state_save;
11261 /* Reset from the function's potential modifications. */
11264 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11265 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11267 if (pic_offset_table_rtx)
11268 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11270 /* Mach-O doesn't support labels at the end of objects, so if
11271 it looks like we might want one, insert a NOP. */
11273 rtx insn = get_last_insn ();
11276 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11277 insn = PREV_INSN (insn);
11281 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11282 fputs ("\tnop\n", file);
11288 /* Return a scratch register to use in the split stack prologue. The
11289 split stack prologue is used for -fsplit-stack. It is the first
11290 instructions in the function, even before the regular prologue.
11291 The scratch register can be any caller-saved register which is not
11292 used for parameters or for the static chain. */
11294 static unsigned int
11295 split_stack_prologue_scratch_regno (void)
11304 is_fastcall = (lookup_attribute ("fastcall",
11305 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11307 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11311 if (DECL_STATIC_CHAIN (cfun->decl))
11313 sorry ("-fsplit-stack does not support fastcall with "
11314 "nested function");
11315 return INVALID_REGNUM;
11319 else if (regparm < 3)
11321 if (!DECL_STATIC_CHAIN (cfun->decl))
11327 sorry ("-fsplit-stack does not support 2 register "
11328 " parameters for a nested function");
11329 return INVALID_REGNUM;
11336 /* FIXME: We could make this work by pushing a register
11337 around the addition and comparison. */
11338 sorry ("-fsplit-stack does not support 3 register parameters");
11339 return INVALID_REGNUM;
11344 /* A SYMBOL_REF for the function which allocates new stackspace for
11347 static GTY(()) rtx split_stack_fn;
11349 /* A SYMBOL_REF for the more stack function when using the large
11352 static GTY(()) rtx split_stack_fn_large;
11354 /* Handle -fsplit-stack. These are the first instructions in the
11355 function, even before the regular prologue. */
11358 ix86_expand_split_stack_prologue (void)
11360 struct ix86_frame frame;
11361 HOST_WIDE_INT allocate;
11362 unsigned HOST_WIDE_INT args_size;
11363 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11364 rtx scratch_reg = NULL_RTX;
11365 rtx varargs_label = NULL_RTX;
11368 gcc_assert (flag_split_stack && reload_completed);
11370 ix86_finalize_stack_realign_flags ();
11371 ix86_compute_frame_layout (&frame);
11372 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11374 /* This is the label we will branch to if we have enough stack
11375 space. We expect the basic block reordering pass to reverse this
11376 branch if optimizing, so that we branch in the unlikely case. */
11377 label = gen_label_rtx ();
11379 /* We need to compare the stack pointer minus the frame size with
11380 the stack boundary in the TCB. The stack boundary always gives
11381 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11382 can compare directly. Otherwise we need to do an addition. */
11384 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11385 UNSPEC_STACK_CHECK);
11386 limit = gen_rtx_CONST (Pmode, limit);
11387 limit = gen_rtx_MEM (Pmode, limit);
11388 if (allocate < SPLIT_STACK_AVAILABLE)
11389 current = stack_pointer_rtx;
11392 unsigned int scratch_regno;
11395 /* We need a scratch register to hold the stack pointer minus
11396 the required frame size. Since this is the very start of the
11397 function, the scratch register can be any caller-saved
11398 register which is not used for parameters. */
11399 offset = GEN_INT (- allocate);
11400 scratch_regno = split_stack_prologue_scratch_regno ();
11401 if (scratch_regno == INVALID_REGNUM)
11403 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11404 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11406 /* We don't use ix86_gen_add3 in this case because it will
11407 want to split to lea, but when not optimizing the insn
11408 will not be split after this point. */
11409 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11410 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11415 emit_move_insn (scratch_reg, offset);
11416 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11417 stack_pointer_rtx));
11419 current = scratch_reg;
11422 ix86_expand_branch (GEU, current, limit, label);
11423 jump_insn = get_last_insn ();
11424 JUMP_LABEL (jump_insn) = label;
11426 /* Mark the jump as very likely to be taken. */
11427 add_reg_note (jump_insn, REG_BR_PROB,
11428 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11430 if (split_stack_fn == NULL_RTX)
11431 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11432 fn = split_stack_fn;
11434 /* Get more stack space. We pass in the desired stack space and the
11435 size of the arguments to copy to the new stack. In 32-bit mode
11436 we push the parameters; __morestack will return on a new stack
11437 anyhow. In 64-bit mode we pass the parameters in r10 and
11439 allocate_rtx = GEN_INT (allocate);
11440 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11441 call_fusage = NULL_RTX;
11446 reg10 = gen_rtx_REG (Pmode, R10_REG);
11447 reg11 = gen_rtx_REG (Pmode, R11_REG);
11449 /* If this function uses a static chain, it will be in %r10.
11450 Preserve it across the call to __morestack. */
11451 if (DECL_STATIC_CHAIN (cfun->decl))
11455 rax = gen_rtx_REG (Pmode, AX_REG);
11456 emit_move_insn (rax, reg10);
11457 use_reg (&call_fusage, rax);
11460 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11462 HOST_WIDE_INT argval;
11464 /* When using the large model we need to load the address
11465 into a register, and we've run out of registers. So we
11466 switch to a different calling convention, and we call a
11467 different function: __morestack_large. We pass the
11468 argument size in the upper 32 bits of r10 and pass the
11469 frame size in the lower 32 bits. */
11470 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11471 gcc_assert ((args_size & 0xffffffff) == args_size);
11473 if (split_stack_fn_large == NULL_RTX)
11474 split_stack_fn_large =
11475 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11477 if (ix86_cmodel == CM_LARGE_PIC)
11481 label = gen_label_rtx ();
11482 emit_label (label);
11483 LABEL_PRESERVE_P (label) = 1;
11484 emit_insn (gen_set_rip_rex64 (reg10, label));
11485 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11486 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11487 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11489 x = gen_rtx_CONST (Pmode, x);
11490 emit_move_insn (reg11, x);
11491 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11492 x = gen_const_mem (Pmode, x);
11493 emit_move_insn (reg11, x);
11496 emit_move_insn (reg11, split_stack_fn_large);
11500 argval = ((args_size << 16) << 16) + allocate;
11501 emit_move_insn (reg10, GEN_INT (argval));
11505 emit_move_insn (reg10, allocate_rtx);
11506 emit_move_insn (reg11, GEN_INT (args_size));
11507 use_reg (&call_fusage, reg11);
11510 use_reg (&call_fusage, reg10);
11514 emit_insn (gen_push (GEN_INT (args_size)));
11515 emit_insn (gen_push (allocate_rtx));
11517 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11518 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11520 add_function_usage_to (call_insn, call_fusage);
11522 /* In order to make call/return prediction work right, we now need
11523 to execute a return instruction. See
11524 libgcc/config/i386/morestack.S for the details on how this works.
11526 For flow purposes gcc must not see this as a return
11527 instruction--we need control flow to continue at the subsequent
11528 label. Therefore, we use an unspec. */
11529 gcc_assert (crtl->args.pops_args < 65536);
11530 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11532 /* If we are in 64-bit mode and this function uses a static chain,
11533 we saved %r10 in %rax before calling _morestack. */
11534 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11535 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11536 gen_rtx_REG (Pmode, AX_REG));
11538 /* If this function calls va_start, we need to store a pointer to
11539 the arguments on the old stack, because they may not have been
11540 all copied to the new stack. At this point the old stack can be
11541 found at the frame pointer value used by __morestack, because
11542 __morestack has set that up before calling back to us. Here we
11543 store that pointer in a scratch register, and in
11544 ix86_expand_prologue we store the scratch register in a stack
11546 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11548 unsigned int scratch_regno;
11552 scratch_regno = split_stack_prologue_scratch_regno ();
11553 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11554 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11558 return address within this function
11559 return address of caller of this function
11561 So we add three words to get to the stack arguments.
11565 return address within this function
11566 first argument to __morestack
11567 second argument to __morestack
11568 return address of caller of this function
11570 So we add five words to get to the stack arguments.
11572 words = TARGET_64BIT ? 3 : 5;
11573 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11574 gen_rtx_PLUS (Pmode, frame_reg,
11575 GEN_INT (words * UNITS_PER_WORD))));
11577 varargs_label = gen_label_rtx ();
11578 emit_jump_insn (gen_jump (varargs_label));
11579 JUMP_LABEL (get_last_insn ()) = varargs_label;
11584 emit_label (label);
11585 LABEL_NUSES (label) = 1;
11587 /* If this function calls va_start, we now have to set the scratch
11588 register for the case where we do not call __morestack. In this
11589 case we need to set it based on the stack pointer. */
11590 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11592 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11593 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11594 GEN_INT (UNITS_PER_WORD))));
11596 emit_label (varargs_label);
11597 LABEL_NUSES (varargs_label) = 1;
11601 /* We may have to tell the dataflow pass that the split stack prologue
11602 is initializing a scratch register. */
11605 ix86_live_on_entry (bitmap regs)
11607 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11609 gcc_assert (flag_split_stack);
11610 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11614 /* Extract the parts of an RTL expression that is a valid memory address
11615 for an instruction. Return 0 if the structure of the address is
11616 grossly off. Return -1 if the address contains ASHIFT, so it is not
11617 strictly valid, but still used for computing length of lea instruction. */
11620 ix86_decompose_address (rtx addr, struct ix86_address *out)
11622 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11623 rtx base_reg, index_reg;
11624 HOST_WIDE_INT scale = 1;
11625 rtx scale_rtx = NULL_RTX;
11628 enum ix86_address_seg seg = SEG_DEFAULT;
11630 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
11632 else if (GET_CODE (addr) == PLUS)
11634 rtx addends[4], op;
11642 addends[n++] = XEXP (op, 1);
11645 while (GET_CODE (op) == PLUS);
11650 for (i = n; i >= 0; --i)
11653 switch (GET_CODE (op))
11658 index = XEXP (op, 0);
11659 scale_rtx = XEXP (op, 1);
11665 index = XEXP (op, 0);
11666 tmp = XEXP (op, 1);
11667 if (!CONST_INT_P (tmp))
11669 scale = INTVAL (tmp);
11670 if ((unsigned HOST_WIDE_INT) scale > 3)
11672 scale = 1 << scale;
11676 if (XINT (op, 1) == UNSPEC_TP
11677 && TARGET_TLS_DIRECT_SEG_REFS
11678 && seg == SEG_DEFAULT)
11679 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11708 else if (GET_CODE (addr) == MULT)
11710 index = XEXP (addr, 0); /* index*scale */
11711 scale_rtx = XEXP (addr, 1);
11713 else if (GET_CODE (addr) == ASHIFT)
11715 /* We're called for lea too, which implements ashift on occasion. */
11716 index = XEXP (addr, 0);
11717 tmp = XEXP (addr, 1);
11718 if (!CONST_INT_P (tmp))
11720 scale = INTVAL (tmp);
11721 if ((unsigned HOST_WIDE_INT) scale > 3)
11723 scale = 1 << scale;
11727 disp = addr; /* displacement */
11729 /* Extract the integral value of scale. */
11732 if (!CONST_INT_P (scale_rtx))
11734 scale = INTVAL (scale_rtx);
11737 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11738 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11740 /* Avoid useless 0 displacement. */
11741 if (disp == const0_rtx && (base || index))
11744 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11745 if (base_reg && index_reg && scale == 1
11746 && (index_reg == arg_pointer_rtx
11747 || index_reg == frame_pointer_rtx
11748 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11751 tmp = base, base = index, index = tmp;
11752 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11755 /* Special case: %ebp cannot be encoded as a base without a displacement.
11759 && (base_reg == hard_frame_pointer_rtx
11760 || base_reg == frame_pointer_rtx
11761 || base_reg == arg_pointer_rtx
11762 || (REG_P (base_reg)
11763 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11764 || REGNO (base_reg) == R13_REG))))
11767 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11768 Avoid this by transforming to [%esi+0].
11769 Reload calls address legitimization without cfun defined, so we need
11770 to test cfun for being non-NULL. */
11771 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11772 && base_reg && !index_reg && !disp
11773 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11776 /* Special case: encode reg+reg instead of reg*2. */
11777 if (!base && index && scale == 2)
11778 base = index, base_reg = index_reg, scale = 1;
11780 /* Special case: scaling cannot be encoded without base or displacement. */
11781 if (!base && !disp && index && scale != 1)
11785 out->index = index;
11787 out->scale = scale;
11793 /* Return cost of the memory address x.
11794 For i386, it is better to use a complex address than let gcc copy
11795 the address into a reg and make a new pseudo. But not if the address
11796 requires to two regs - that would mean more pseudos with longer
11799 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11801 struct ix86_address parts;
11803 int ok = ix86_decompose_address (x, &parts);
11807 if (parts.base && GET_CODE (parts.base) == SUBREG)
11808 parts.base = SUBREG_REG (parts.base);
11809 if (parts.index && GET_CODE (parts.index) == SUBREG)
11810 parts.index = SUBREG_REG (parts.index);
11812 /* Attempt to minimize number of registers in the address. */
11814 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11816 && (!REG_P (parts.index)
11817 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11821 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11823 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11824 && parts.base != parts.index)
11827 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11828 since it's predecode logic can't detect the length of instructions
11829 and it degenerates to vector decoded. Increase cost of such
11830 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11831 to split such addresses or even refuse such addresses at all.
11833 Following addressing modes are affected:
11838 The first and last case may be avoidable by explicitly coding the zero in
11839 memory address, but I don't have AMD-K6 machine handy to check this
11843 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11844 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11845 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11851 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11852 this is used for to form addresses to local data when -fPIC is in
11856 darwin_local_data_pic (rtx disp)
11858 return (GET_CODE (disp) == UNSPEC
11859 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11862 /* Determine if a given RTX is a valid constant. We already know this
11863 satisfies CONSTANT_P. */
11866 legitimate_constant_p (rtx x)
11868 switch (GET_CODE (x))
11873 if (GET_CODE (x) == PLUS)
11875 if (!CONST_INT_P (XEXP (x, 1)))
11880 if (TARGET_MACHO && darwin_local_data_pic (x))
11883 /* Only some unspecs are valid as "constants". */
11884 if (GET_CODE (x) == UNSPEC)
11885 switch (XINT (x, 1))
11888 case UNSPEC_GOTOFF:
11889 case UNSPEC_PLTOFF:
11890 return TARGET_64BIT;
11892 case UNSPEC_NTPOFF:
11893 x = XVECEXP (x, 0, 0);
11894 return (GET_CODE (x) == SYMBOL_REF
11895 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11896 case UNSPEC_DTPOFF:
11897 x = XVECEXP (x, 0, 0);
11898 return (GET_CODE (x) == SYMBOL_REF
11899 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11904 /* We must have drilled down to a symbol. */
11905 if (GET_CODE (x) == LABEL_REF)
11907 if (GET_CODE (x) != SYMBOL_REF)
11912 /* TLS symbols are never valid. */
11913 if (SYMBOL_REF_TLS_MODEL (x))
11916 /* DLLIMPORT symbols are never valid. */
11917 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11918 && SYMBOL_REF_DLLIMPORT_P (x))
11922 /* mdynamic-no-pic */
11923 if (MACHO_DYNAMIC_NO_PIC_P)
11924 return machopic_symbol_defined_p (x);
11929 if (GET_MODE (x) == TImode
11930 && x != CONST0_RTX (TImode)
11936 if (!standard_sse_constant_p (x))
11943 /* Otherwise we handle everything else in the move patterns. */
11947 /* Determine if it's legal to put X into the constant pool. This
11948 is not possible for the address of thread-local symbols, which
11949 is checked above. */
11952 ix86_cannot_force_const_mem (rtx x)
11954 /* We can always put integral constants and vectors in memory. */
11955 switch (GET_CODE (x))
11965 return !legitimate_constant_p (x);
11969 /* Nonzero if the constant value X is a legitimate general operand
11970 when generating PIC code. It is given that flag_pic is on and
11971 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11974 legitimate_pic_operand_p (rtx x)
11978 switch (GET_CODE (x))
11981 inner = XEXP (x, 0);
11982 if (GET_CODE (inner) == PLUS
11983 && CONST_INT_P (XEXP (inner, 1)))
11984 inner = XEXP (inner, 0);
11986 /* Only some unspecs are valid as "constants". */
11987 if (GET_CODE (inner) == UNSPEC)
11988 switch (XINT (inner, 1))
11991 case UNSPEC_GOTOFF:
11992 case UNSPEC_PLTOFF:
11993 return TARGET_64BIT;
11995 x = XVECEXP (inner, 0, 0);
11996 return (GET_CODE (x) == SYMBOL_REF
11997 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11998 case UNSPEC_MACHOPIC_OFFSET:
11999 return legitimate_pic_address_disp_p (x);
12007 return legitimate_pic_address_disp_p (x);
12014 /* Determine if a given CONST RTX is a valid memory displacement
12018 legitimate_pic_address_disp_p (rtx disp)
12022 /* In 64bit mode we can allow direct addresses of symbols and labels
12023 when they are not dynamic symbols. */
12026 rtx op0 = disp, op1;
12028 switch (GET_CODE (disp))
12034 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12036 op0 = XEXP (XEXP (disp, 0), 0);
12037 op1 = XEXP (XEXP (disp, 0), 1);
12038 if (!CONST_INT_P (op1)
12039 || INTVAL (op1) >= 16*1024*1024
12040 || INTVAL (op1) < -16*1024*1024)
12042 if (GET_CODE (op0) == LABEL_REF)
12044 if (GET_CODE (op0) != SYMBOL_REF)
12049 /* TLS references should always be enclosed in UNSPEC. */
12050 if (SYMBOL_REF_TLS_MODEL (op0))
12052 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
12053 && ix86_cmodel != CM_LARGE_PIC)
12061 if (GET_CODE (disp) != CONST)
12063 disp = XEXP (disp, 0);
12067 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12068 of GOT tables. We should not need these anyway. */
12069 if (GET_CODE (disp) != UNSPEC
12070 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12071 && XINT (disp, 1) != UNSPEC_GOTOFF
12072 && XINT (disp, 1) != UNSPEC_PCREL
12073 && XINT (disp, 1) != UNSPEC_PLTOFF))
12076 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12077 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12083 if (GET_CODE (disp) == PLUS)
12085 if (!CONST_INT_P (XEXP (disp, 1)))
12087 disp = XEXP (disp, 0);
12091 if (TARGET_MACHO && darwin_local_data_pic (disp))
12094 if (GET_CODE (disp) != UNSPEC)
12097 switch (XINT (disp, 1))
12102 /* We need to check for both symbols and labels because VxWorks loads
12103 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12105 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12106 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12107 case UNSPEC_GOTOFF:
12108 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12109 While ABI specify also 32bit relocation but we don't produce it in
12110 small PIC model at all. */
12111 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12112 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12114 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12116 case UNSPEC_GOTTPOFF:
12117 case UNSPEC_GOTNTPOFF:
12118 case UNSPEC_INDNTPOFF:
12121 disp = XVECEXP (disp, 0, 0);
12122 return (GET_CODE (disp) == SYMBOL_REF
12123 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12124 case UNSPEC_NTPOFF:
12125 disp = XVECEXP (disp, 0, 0);
12126 return (GET_CODE (disp) == SYMBOL_REF
12127 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12128 case UNSPEC_DTPOFF:
12129 disp = XVECEXP (disp, 0, 0);
12130 return (GET_CODE (disp) == SYMBOL_REF
12131 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12137 /* Recognizes RTL expressions that are valid memory addresses for an
12138 instruction. The MODE argument is the machine mode for the MEM
12139 expression that wants to use this address.
12141 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12142 convert common non-canonical forms to canonical form so that they will
12146 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12147 rtx addr, bool strict)
12149 struct ix86_address parts;
12150 rtx base, index, disp;
12151 HOST_WIDE_INT scale;
12153 if (ix86_decompose_address (addr, &parts) <= 0)
12154 /* Decomposition failed. */
12158 index = parts.index;
12160 scale = parts.scale;
12162 /* Validate base register.
12164 Don't allow SUBREG's that span more than a word here. It can lead to spill
12165 failures when the base is one word out of a two word structure, which is
12166 represented internally as a DImode int. */
12174 else if (GET_CODE (base) == SUBREG
12175 && REG_P (SUBREG_REG (base))
12176 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
12178 reg = SUBREG_REG (base);
12180 /* Base is not a register. */
12183 if (GET_MODE (base) != Pmode)
12184 /* Base is not in Pmode. */
12187 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12188 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12189 /* Base is not valid. */
12193 /* Validate index register.
12195 Don't allow SUBREG's that span more than a word here -- same as above. */
12203 else if (GET_CODE (index) == SUBREG
12204 && REG_P (SUBREG_REG (index))
12205 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
12207 reg = SUBREG_REG (index);
12209 /* Index is not a register. */
12212 if (GET_MODE (index) != Pmode)
12213 /* Index is not in Pmode. */
12216 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12217 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12218 /* Index is not valid. */
12222 /* Validate scale factor. */
12226 /* Scale without index. */
12229 if (scale != 2 && scale != 4 && scale != 8)
12230 /* Scale is not a valid multiplier. */
12234 /* Validate displacement. */
12237 if (GET_CODE (disp) == CONST
12238 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12239 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12240 switch (XINT (XEXP (disp, 0), 1))
12242 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12243 used. While ABI specify also 32bit relocations, we don't produce
12244 them at all and use IP relative instead. */
12246 case UNSPEC_GOTOFF:
12247 gcc_assert (flag_pic);
12249 goto is_legitimate_pic;
12251 /* 64bit address unspec. */
12254 case UNSPEC_GOTPCREL:
12256 gcc_assert (flag_pic);
12257 goto is_legitimate_pic;
12259 case UNSPEC_GOTTPOFF:
12260 case UNSPEC_GOTNTPOFF:
12261 case UNSPEC_INDNTPOFF:
12262 case UNSPEC_NTPOFF:
12263 case UNSPEC_DTPOFF:
12266 case UNSPEC_STACK_CHECK:
12267 gcc_assert (flag_split_stack);
12271 /* Invalid address unspec. */
12275 else if (SYMBOLIC_CONST (disp)
12279 && MACHOPIC_INDIRECT
12280 && !machopic_operand_p (disp)
12286 if (TARGET_64BIT && (index || base))
12288 /* foo@dtpoff(%rX) is ok. */
12289 if (GET_CODE (disp) != CONST
12290 || GET_CODE (XEXP (disp, 0)) != PLUS
12291 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12292 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12293 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12294 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12295 /* Non-constant pic memory reference. */
12298 else if ((!TARGET_MACHO || flag_pic)
12299 && ! legitimate_pic_address_disp_p (disp))
12300 /* Displacement is an invalid pic construct. */
12303 else if (MACHO_DYNAMIC_NO_PIC_P && !legitimate_constant_p (disp))
12304 /* displacment must be referenced via non_lazy_pointer */
12308 /* This code used to verify that a symbolic pic displacement
12309 includes the pic_offset_table_rtx register.
12311 While this is good idea, unfortunately these constructs may
12312 be created by "adds using lea" optimization for incorrect
12321 This code is nonsensical, but results in addressing
12322 GOT table with pic_offset_table_rtx base. We can't
12323 just refuse it easily, since it gets matched by
12324 "addsi3" pattern, that later gets split to lea in the
12325 case output register differs from input. While this
12326 can be handled by separate addsi pattern for this case
12327 that never results in lea, this seems to be easier and
12328 correct fix for crash to disable this test. */
12330 else if (GET_CODE (disp) != LABEL_REF
12331 && !CONST_INT_P (disp)
12332 && (GET_CODE (disp) != CONST
12333 || !legitimate_constant_p (disp))
12334 && (GET_CODE (disp) != SYMBOL_REF
12335 || !legitimate_constant_p (disp)))
12336 /* Displacement is not constant. */
12338 else if (TARGET_64BIT
12339 && !x86_64_immediate_operand (disp, VOIDmode))
12340 /* Displacement is out of range. */
12344 /* Everything looks valid. */
12348 /* Determine if a given RTX is a valid constant address. */
12351 constant_address_p (rtx x)
12353 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12356 /* Return a unique alias set for the GOT. */
12358 static alias_set_type
12359 ix86_GOT_alias_set (void)
12361 static alias_set_type set = -1;
12363 set = new_alias_set ();
12367 /* Return a legitimate reference for ORIG (an address) using the
12368 register REG. If REG is 0, a new pseudo is generated.
12370 There are two types of references that must be handled:
12372 1. Global data references must load the address from the GOT, via
12373 the PIC reg. An insn is emitted to do this load, and the reg is
12376 2. Static data references, constant pool addresses, and code labels
12377 compute the address as an offset from the GOT, whose base is in
12378 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12379 differentiate them from global data objects. The returned
12380 address is the PIC reg + an unspec constant.
12382 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12383 reg also appears in the address. */
12386 legitimize_pic_address (rtx orig, rtx reg)
12389 rtx new_rtx = orig;
12393 if (TARGET_MACHO && !TARGET_64BIT)
12396 reg = gen_reg_rtx (Pmode);
12397 /* Use the generic Mach-O PIC machinery. */
12398 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12402 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12404 else if (TARGET_64BIT
12405 && ix86_cmodel != CM_SMALL_PIC
12406 && gotoff_operand (addr, Pmode))
12409 /* This symbol may be referenced via a displacement from the PIC
12410 base address (@GOTOFF). */
12412 if (reload_in_progress)
12413 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12414 if (GET_CODE (addr) == CONST)
12415 addr = XEXP (addr, 0);
12416 if (GET_CODE (addr) == PLUS)
12418 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12420 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12423 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12424 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12426 tmpreg = gen_reg_rtx (Pmode);
12429 emit_move_insn (tmpreg, new_rtx);
12433 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12434 tmpreg, 1, OPTAB_DIRECT);
12437 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12439 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12441 /* This symbol may be referenced via a displacement from the PIC
12442 base address (@GOTOFF). */
12444 if (reload_in_progress)
12445 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12446 if (GET_CODE (addr) == CONST)
12447 addr = XEXP (addr, 0);
12448 if (GET_CODE (addr) == PLUS)
12450 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12452 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12455 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12456 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12457 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12461 emit_move_insn (reg, new_rtx);
12465 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12466 /* We can't use @GOTOFF for text labels on VxWorks;
12467 see gotoff_operand. */
12468 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12470 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12472 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12473 return legitimize_dllimport_symbol (addr, true);
12474 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12475 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12476 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12478 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12479 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12483 /* For x64 PE-COFF there is no GOT table. So we use address
12485 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12487 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12488 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12491 reg = gen_reg_rtx (Pmode);
12492 emit_move_insn (reg, new_rtx);
12495 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12497 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12498 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12499 new_rtx = gen_const_mem (Pmode, new_rtx);
12500 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12503 reg = gen_reg_rtx (Pmode);
12504 /* Use directly gen_movsi, otherwise the address is loaded
12505 into register for CSE. We don't want to CSE this addresses,
12506 instead we CSE addresses from the GOT table, so skip this. */
12507 emit_insn (gen_movsi (reg, new_rtx));
12512 /* This symbol must be referenced via a load from the
12513 Global Offset Table (@GOT). */
12515 if (reload_in_progress)
12516 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12517 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12518 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12520 new_rtx = force_reg (Pmode, new_rtx);
12521 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12522 new_rtx = gen_const_mem (Pmode, new_rtx);
12523 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12526 reg = gen_reg_rtx (Pmode);
12527 emit_move_insn (reg, new_rtx);
12533 if (CONST_INT_P (addr)
12534 && !x86_64_immediate_operand (addr, VOIDmode))
12538 emit_move_insn (reg, addr);
12542 new_rtx = force_reg (Pmode, addr);
12544 else if (GET_CODE (addr) == CONST)
12546 addr = XEXP (addr, 0);
12548 /* We must match stuff we generate before. Assume the only
12549 unspecs that can get here are ours. Not that we could do
12550 anything with them anyway.... */
12551 if (GET_CODE (addr) == UNSPEC
12552 || (GET_CODE (addr) == PLUS
12553 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12555 gcc_assert (GET_CODE (addr) == PLUS);
12557 if (GET_CODE (addr) == PLUS)
12559 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12561 /* Check first to see if this is a constant offset from a @GOTOFF
12562 symbol reference. */
12563 if (gotoff_operand (op0, Pmode)
12564 && CONST_INT_P (op1))
12568 if (reload_in_progress)
12569 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12570 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12572 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12573 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12574 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12578 emit_move_insn (reg, new_rtx);
12584 if (INTVAL (op1) < -16*1024*1024
12585 || INTVAL (op1) >= 16*1024*1024)
12587 if (!x86_64_immediate_operand (op1, Pmode))
12588 op1 = force_reg (Pmode, op1);
12589 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12595 base = legitimize_pic_address (XEXP (addr, 0), reg);
12596 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12597 base == reg ? NULL_RTX : reg);
12599 if (CONST_INT_P (new_rtx))
12600 new_rtx = plus_constant (base, INTVAL (new_rtx));
12603 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12605 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12606 new_rtx = XEXP (new_rtx, 1);
12608 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12616 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12619 get_thread_pointer (int to_reg)
12623 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12627 reg = gen_reg_rtx (Pmode);
12628 insn = gen_rtx_SET (VOIDmode, reg, tp);
12629 insn = emit_insn (insn);
12634 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12635 false if we expect this to be used for a memory address and true if
12636 we expect to load the address into a register. */
12639 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
12641 rtx dest, base, off, pic, tp;
12646 case TLS_MODEL_GLOBAL_DYNAMIC:
12647 dest = gen_reg_rtx (Pmode);
12648 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12650 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12652 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12655 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
12656 insns = get_insns ();
12659 RTL_CONST_CALL_P (insns) = 1;
12660 emit_libcall_block (insns, dest, rax, x);
12662 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12663 emit_insn (gen_tls_global_dynamic_64 (dest, x));
12665 emit_insn (gen_tls_global_dynamic_32 (dest, x));
12667 if (TARGET_GNU2_TLS)
12669 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12671 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12675 case TLS_MODEL_LOCAL_DYNAMIC:
12676 base = gen_reg_rtx (Pmode);
12677 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12679 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12681 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
12684 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
12685 insns = get_insns ();
12688 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
12689 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
12690 RTL_CONST_CALL_P (insns) = 1;
12691 emit_libcall_block (insns, base, rax, note);
12693 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12694 emit_insn (gen_tls_local_dynamic_base_64 (base));
12696 emit_insn (gen_tls_local_dynamic_base_32 (base));
12698 if (TARGET_GNU2_TLS)
12700 rtx x = ix86_tls_module_base ();
12702 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12703 gen_rtx_MINUS (Pmode, x, tp));
12706 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12707 off = gen_rtx_CONST (Pmode, off);
12709 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12711 if (TARGET_GNU2_TLS)
12713 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12715 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12720 case TLS_MODEL_INITIAL_EXEC:
12723 if (TARGET_SUN_TLS)
12725 /* The Sun linker took the AMD64 TLS spec literally
12726 and can only handle %rax as destination of the
12727 initial executable code sequence. */
12729 dest = gen_reg_rtx (Pmode);
12730 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12735 type = UNSPEC_GOTNTPOFF;
12739 if (reload_in_progress)
12740 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12741 pic = pic_offset_table_rtx;
12742 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12744 else if (!TARGET_ANY_GNU_TLS)
12746 pic = gen_reg_rtx (Pmode);
12747 emit_insn (gen_set_got (pic));
12748 type = UNSPEC_GOTTPOFF;
12753 type = UNSPEC_INDNTPOFF;
12756 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12757 off = gen_rtx_CONST (Pmode, off);
12759 off = gen_rtx_PLUS (Pmode, pic, off);
12760 off = gen_const_mem (Pmode, off);
12761 set_mem_alias_set (off, ix86_GOT_alias_set ());
12763 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12765 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12766 off = force_reg (Pmode, off);
12767 return gen_rtx_PLUS (Pmode, base, off);
12771 base = get_thread_pointer (true);
12772 dest = gen_reg_rtx (Pmode);
12773 emit_insn (gen_subsi3 (dest, base, off));
12777 case TLS_MODEL_LOCAL_EXEC:
12778 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12779 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12780 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12781 off = gen_rtx_CONST (Pmode, off);
12783 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12785 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12786 return gen_rtx_PLUS (Pmode, base, off);
12790 base = get_thread_pointer (true);
12791 dest = gen_reg_rtx (Pmode);
12792 emit_insn (gen_subsi3 (dest, base, off));
12797 gcc_unreachable ();
12803 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12806 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12807 htab_t dllimport_map;
12810 get_dllimport_decl (tree decl)
12812 struct tree_map *h, in;
12815 const char *prefix;
12816 size_t namelen, prefixlen;
12821 if (!dllimport_map)
12822 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12824 in.hash = htab_hash_pointer (decl);
12825 in.base.from = decl;
12826 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12827 h = (struct tree_map *) *loc;
12831 *loc = h = ggc_alloc_tree_map ();
12833 h->base.from = decl;
12834 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12835 VAR_DECL, NULL, ptr_type_node);
12836 DECL_ARTIFICIAL (to) = 1;
12837 DECL_IGNORED_P (to) = 1;
12838 DECL_EXTERNAL (to) = 1;
12839 TREE_READONLY (to) = 1;
12841 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12842 name = targetm.strip_name_encoding (name);
12843 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12844 ? "*__imp_" : "*__imp__";
12845 namelen = strlen (name);
12846 prefixlen = strlen (prefix);
12847 imp_name = (char *) alloca (namelen + prefixlen + 1);
12848 memcpy (imp_name, prefix, prefixlen);
12849 memcpy (imp_name + prefixlen, name, namelen + 1);
12851 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12852 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12853 SET_SYMBOL_REF_DECL (rtl, to);
12854 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12856 rtl = gen_const_mem (Pmode, rtl);
12857 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12859 SET_DECL_RTL (to, rtl);
12860 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12865 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12866 true if we require the result be a register. */
12869 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12874 gcc_assert (SYMBOL_REF_DECL (symbol));
12875 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12877 x = DECL_RTL (imp_decl);
12879 x = force_reg (Pmode, x);
12883 /* Try machine-dependent ways of modifying an illegitimate address
12884 to be legitimate. If we find one, return the new, valid address.
12885 This macro is used in only one place: `memory_address' in explow.c.
12887 OLDX is the address as it was before break_out_memory_refs was called.
12888 In some cases it is useful to look at this to decide what needs to be done.
12890 It is always safe for this macro to do nothing. It exists to recognize
12891 opportunities to optimize the output.
12893 For the 80386, we handle X+REG by loading X into a register R and
12894 using R+REG. R will go in a general reg and indexing will be used.
12895 However, if REG is a broken-out memory address or multiplication,
12896 nothing needs to be done because REG can certainly go in a general reg.
12898 When -fpic is used, special handling is needed for symbolic references.
12899 See comments by legitimize_pic_address in i386.c for details. */
12902 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12903 enum machine_mode mode)
12908 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12910 return legitimize_tls_address (x, (enum tls_model) log, false);
12911 if (GET_CODE (x) == CONST
12912 && GET_CODE (XEXP (x, 0)) == PLUS
12913 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12914 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12916 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12917 (enum tls_model) log, false);
12918 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12921 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12923 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12924 return legitimize_dllimport_symbol (x, true);
12925 if (GET_CODE (x) == CONST
12926 && GET_CODE (XEXP (x, 0)) == PLUS
12927 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12928 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12930 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12931 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12935 if (flag_pic && SYMBOLIC_CONST (x))
12936 return legitimize_pic_address (x, 0);
12939 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12940 return machopic_indirect_data_reference (x, 0);
12943 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12944 if (GET_CODE (x) == ASHIFT
12945 && CONST_INT_P (XEXP (x, 1))
12946 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12949 log = INTVAL (XEXP (x, 1));
12950 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12951 GEN_INT (1 << log));
12954 if (GET_CODE (x) == PLUS)
12956 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12958 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12959 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12960 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12963 log = INTVAL (XEXP (XEXP (x, 0), 1));
12964 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12965 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12966 GEN_INT (1 << log));
12969 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12970 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12971 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12974 log = INTVAL (XEXP (XEXP (x, 1), 1));
12975 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12976 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12977 GEN_INT (1 << log));
12980 /* Put multiply first if it isn't already. */
12981 if (GET_CODE (XEXP (x, 1)) == MULT)
12983 rtx tmp = XEXP (x, 0);
12984 XEXP (x, 0) = XEXP (x, 1);
12989 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12990 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12991 created by virtual register instantiation, register elimination, and
12992 similar optimizations. */
12993 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12996 x = gen_rtx_PLUS (Pmode,
12997 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12998 XEXP (XEXP (x, 1), 0)),
12999 XEXP (XEXP (x, 1), 1));
13003 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13004 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13005 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13006 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13007 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13008 && CONSTANT_P (XEXP (x, 1)))
13011 rtx other = NULL_RTX;
13013 if (CONST_INT_P (XEXP (x, 1)))
13015 constant = XEXP (x, 1);
13016 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13018 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13020 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13021 other = XEXP (x, 1);
13029 x = gen_rtx_PLUS (Pmode,
13030 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13031 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13032 plus_constant (other, INTVAL (constant)));
13036 if (changed && ix86_legitimate_address_p (mode, x, false))
13039 if (GET_CODE (XEXP (x, 0)) == MULT)
13042 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13045 if (GET_CODE (XEXP (x, 1)) == MULT)
13048 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13052 && REG_P (XEXP (x, 1))
13053 && REG_P (XEXP (x, 0)))
13056 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13059 x = legitimize_pic_address (x, 0);
13062 if (changed && ix86_legitimate_address_p (mode, x, false))
13065 if (REG_P (XEXP (x, 0)))
13067 rtx temp = gen_reg_rtx (Pmode);
13068 rtx val = force_operand (XEXP (x, 1), temp);
13070 emit_move_insn (temp, val);
13072 XEXP (x, 1) = temp;
13076 else if (REG_P (XEXP (x, 1)))
13078 rtx temp = gen_reg_rtx (Pmode);
13079 rtx val = force_operand (XEXP (x, 0), temp);
13081 emit_move_insn (temp, val);
13083 XEXP (x, 0) = temp;
13091 /* Print an integer constant expression in assembler syntax. Addition
13092 and subtraction are the only arithmetic that may appear in these
13093 expressions. FILE is the stdio stream to write to, X is the rtx, and
13094 CODE is the operand print code from the output string. */
13097 output_pic_addr_const (FILE *file, rtx x, int code)
13101 switch (GET_CODE (x))
13104 gcc_assert (flag_pic);
13109 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13110 output_addr_const (file, x);
13113 const char *name = XSTR (x, 0);
13115 /* Mark the decl as referenced so that cgraph will
13116 output the function. */
13117 if (SYMBOL_REF_DECL (x))
13118 mark_decl_referenced (SYMBOL_REF_DECL (x));
13121 if (MACHOPIC_INDIRECT
13122 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13123 name = machopic_indirection_name (x, /*stub_p=*/true);
13125 assemble_name (file, name);
13127 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
13128 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13129 fputs ("@PLT", file);
13136 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13137 assemble_name (asm_out_file, buf);
13141 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13145 /* This used to output parentheses around the expression,
13146 but that does not work on the 386 (either ATT or BSD assembler). */
13147 output_pic_addr_const (file, XEXP (x, 0), code);
13151 if (GET_MODE (x) == VOIDmode)
13153 /* We can use %d if the number is <32 bits and positive. */
13154 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13155 fprintf (file, "0x%lx%08lx",
13156 (unsigned long) CONST_DOUBLE_HIGH (x),
13157 (unsigned long) CONST_DOUBLE_LOW (x));
13159 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13162 /* We can't handle floating point constants;
13163 TARGET_PRINT_OPERAND must handle them. */
13164 output_operand_lossage ("floating constant misused");
13168 /* Some assemblers need integer constants to appear first. */
13169 if (CONST_INT_P (XEXP (x, 0)))
13171 output_pic_addr_const (file, XEXP (x, 0), code);
13173 output_pic_addr_const (file, XEXP (x, 1), code);
13177 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13178 output_pic_addr_const (file, XEXP (x, 1), code);
13180 output_pic_addr_const (file, XEXP (x, 0), code);
13186 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13187 output_pic_addr_const (file, XEXP (x, 0), code);
13189 output_pic_addr_const (file, XEXP (x, 1), code);
13191 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13195 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13197 bool f = i386_asm_output_addr_const_extra (file, x);
13202 gcc_assert (XVECLEN (x, 0) == 1);
13203 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13204 switch (XINT (x, 1))
13207 fputs ("@GOT", file);
13209 case UNSPEC_GOTOFF:
13210 fputs ("@GOTOFF", file);
13212 case UNSPEC_PLTOFF:
13213 fputs ("@PLTOFF", file);
13216 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13217 "(%rip)" : "[rip]", file);
13219 case UNSPEC_GOTPCREL:
13220 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13221 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13223 case UNSPEC_GOTTPOFF:
13224 /* FIXME: This might be @TPOFF in Sun ld too. */
13225 fputs ("@gottpoff", file);
13228 fputs ("@tpoff", file);
13230 case UNSPEC_NTPOFF:
13232 fputs ("@tpoff", file);
13234 fputs ("@ntpoff", file);
13236 case UNSPEC_DTPOFF:
13237 fputs ("@dtpoff", file);
13239 case UNSPEC_GOTNTPOFF:
13241 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13242 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13244 fputs ("@gotntpoff", file);
13246 case UNSPEC_INDNTPOFF:
13247 fputs ("@indntpoff", file);
13250 case UNSPEC_MACHOPIC_OFFSET:
13252 machopic_output_function_base_name (file);
13256 output_operand_lossage ("invalid UNSPEC as operand");
13262 output_operand_lossage ("invalid expression as operand");
13266 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13267 We need to emit DTP-relative relocations. */
13269 static void ATTRIBUTE_UNUSED
13270 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13272 fputs (ASM_LONG, file);
13273 output_addr_const (file, x);
13274 fputs ("@dtpoff", file);
13280 fputs (", 0", file);
13283 gcc_unreachable ();
13287 /* Return true if X is a representation of the PIC register. This copes
13288 with calls from ix86_find_base_term, where the register might have
13289 been replaced by a cselib value. */
13292 ix86_pic_register_p (rtx x)
13294 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13295 return (pic_offset_table_rtx
13296 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13298 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13301 /* Helper function for ix86_delegitimize_address.
13302 Attempt to delegitimize TLS local-exec accesses. */
13305 ix86_delegitimize_tls_address (rtx orig_x)
13307 rtx x = orig_x, unspec;
13308 struct ix86_address addr;
13310 if (!TARGET_TLS_DIRECT_SEG_REFS)
13314 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13316 if (ix86_decompose_address (x, &addr) == 0
13317 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13318 || addr.disp == NULL_RTX
13319 || GET_CODE (addr.disp) != CONST)
13321 unspec = XEXP (addr.disp, 0);
13322 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13323 unspec = XEXP (unspec, 0);
13324 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13326 x = XVECEXP (unspec, 0, 0);
13327 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13328 if (unspec != XEXP (addr.disp, 0))
13329 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13332 rtx idx = addr.index;
13333 if (addr.scale != 1)
13334 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13335 x = gen_rtx_PLUS (Pmode, idx, x);
13338 x = gen_rtx_PLUS (Pmode, addr.base, x);
13339 if (MEM_P (orig_x))
13340 x = replace_equiv_address_nv (orig_x, x);
13344 /* In the name of slightly smaller debug output, and to cater to
13345 general assembler lossage, recognize PIC+GOTOFF and turn it back
13346 into a direct symbol reference.
13348 On Darwin, this is necessary to avoid a crash, because Darwin
13349 has a different PIC label for each routine but the DWARF debugging
13350 information is not associated with any particular routine, so it's
13351 necessary to remove references to the PIC label from RTL stored by
13352 the DWARF output code. */
13355 ix86_delegitimize_address (rtx x)
13357 rtx orig_x = delegitimize_mem_from_attrs (x);
13358 /* addend is NULL or some rtx if x is something+GOTOFF where
13359 something doesn't include the PIC register. */
13360 rtx addend = NULL_RTX;
13361 /* reg_addend is NULL or a multiple of some register. */
13362 rtx reg_addend = NULL_RTX;
13363 /* const_addend is NULL or a const_int. */
13364 rtx const_addend = NULL_RTX;
13365 /* This is the result, or NULL. */
13366 rtx result = NULL_RTX;
13375 if (GET_CODE (x) != CONST
13376 || GET_CODE (XEXP (x, 0)) != UNSPEC
13377 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13378 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13379 || !MEM_P (orig_x))
13380 return ix86_delegitimize_tls_address (orig_x);
13381 x = XVECEXP (XEXP (x, 0), 0, 0);
13382 if (GET_MODE (orig_x) != Pmode)
13384 x = simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
13391 if (GET_CODE (x) != PLUS
13392 || GET_CODE (XEXP (x, 1)) != CONST)
13393 return ix86_delegitimize_tls_address (orig_x);
13395 if (ix86_pic_register_p (XEXP (x, 0)))
13396 /* %ebx + GOT/GOTOFF */
13398 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13400 /* %ebx + %reg * scale + GOT/GOTOFF */
13401 reg_addend = XEXP (x, 0);
13402 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13403 reg_addend = XEXP (reg_addend, 1);
13404 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13405 reg_addend = XEXP (reg_addend, 0);
13408 reg_addend = NULL_RTX;
13409 addend = XEXP (x, 0);
13413 addend = XEXP (x, 0);
13415 x = XEXP (XEXP (x, 1), 0);
13416 if (GET_CODE (x) == PLUS
13417 && CONST_INT_P (XEXP (x, 1)))
13419 const_addend = XEXP (x, 1);
13423 if (GET_CODE (x) == UNSPEC
13424 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13425 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13426 result = XVECEXP (x, 0, 0);
13428 if (TARGET_MACHO && darwin_local_data_pic (x)
13429 && !MEM_P (orig_x))
13430 result = XVECEXP (x, 0, 0);
13433 return ix86_delegitimize_tls_address (orig_x);
13436 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13438 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13441 /* If the rest of original X doesn't involve the PIC register, add
13442 addend and subtract pic_offset_table_rtx. This can happen e.g.
13444 leal (%ebx, %ecx, 4), %ecx
13446 movl foo@GOTOFF(%ecx), %edx
13447 in which case we return (%ecx - %ebx) + foo. */
13448 if (pic_offset_table_rtx)
13449 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13450 pic_offset_table_rtx),
13455 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13457 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13458 if (result == NULL_RTX)
13464 /* If X is a machine specific address (i.e. a symbol or label being
13465 referenced as a displacement from the GOT implemented using an
13466 UNSPEC), then return the base term. Otherwise return X. */
13469 ix86_find_base_term (rtx x)
13475 if (GET_CODE (x) != CONST)
13477 term = XEXP (x, 0);
13478 if (GET_CODE (term) == PLUS
13479 && (CONST_INT_P (XEXP (term, 1))
13480 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13481 term = XEXP (term, 0);
13482 if (GET_CODE (term) != UNSPEC
13483 || (XINT (term, 1) != UNSPEC_GOTPCREL
13484 && XINT (term, 1) != UNSPEC_PCREL))
13487 return XVECEXP (term, 0, 0);
13490 return ix86_delegitimize_address (x);
13494 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13495 int fp, FILE *file)
13497 const char *suffix;
13499 if (mode == CCFPmode || mode == CCFPUmode)
13501 code = ix86_fp_compare_code_to_integer (code);
13505 code = reverse_condition (code);
13556 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13560 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13561 Those same assemblers have the same but opposite lossage on cmov. */
13562 if (mode == CCmode)
13563 suffix = fp ? "nbe" : "a";
13564 else if (mode == CCCmode)
13567 gcc_unreachable ();
13583 gcc_unreachable ();
13587 gcc_assert (mode == CCmode || mode == CCCmode);
13604 gcc_unreachable ();
13608 /* ??? As above. */
13609 gcc_assert (mode == CCmode || mode == CCCmode);
13610 suffix = fp ? "nb" : "ae";
13613 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13617 /* ??? As above. */
13618 if (mode == CCmode)
13620 else if (mode == CCCmode)
13621 suffix = fp ? "nb" : "ae";
13623 gcc_unreachable ();
13626 suffix = fp ? "u" : "p";
13629 suffix = fp ? "nu" : "np";
13632 gcc_unreachable ();
13634 fputs (suffix, file);
13637 /* Print the name of register X to FILE based on its machine mode and number.
13638 If CODE is 'w', pretend the mode is HImode.
13639 If CODE is 'b', pretend the mode is QImode.
13640 If CODE is 'k', pretend the mode is SImode.
13641 If CODE is 'q', pretend the mode is DImode.
13642 If CODE is 'x', pretend the mode is V4SFmode.
13643 If CODE is 't', pretend the mode is V8SFmode.
13644 If CODE is 'h', pretend the reg is the 'high' byte register.
13645 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13646 If CODE is 'd', duplicate the operand for AVX instruction.
13650 print_reg (rtx x, int code, FILE *file)
13653 bool duplicated = code == 'd' && TARGET_AVX;
13655 gcc_assert (x == pc_rtx
13656 || (REGNO (x) != ARG_POINTER_REGNUM
13657 && REGNO (x) != FRAME_POINTER_REGNUM
13658 && REGNO (x) != FLAGS_REG
13659 && REGNO (x) != FPSR_REG
13660 && REGNO (x) != FPCR_REG));
13662 if (ASSEMBLER_DIALECT == ASM_ATT)
13667 gcc_assert (TARGET_64BIT);
13668 fputs ("rip", file);
13672 if (code == 'w' || MMX_REG_P (x))
13674 else if (code == 'b')
13676 else if (code == 'k')
13678 else if (code == 'q')
13680 else if (code == 'y')
13682 else if (code == 'h')
13684 else if (code == 'x')
13686 else if (code == 't')
13689 code = GET_MODE_SIZE (GET_MODE (x));
13691 /* Irritatingly, AMD extended registers use different naming convention
13692 from the normal registers. */
13693 if (REX_INT_REG_P (x))
13695 gcc_assert (TARGET_64BIT);
13699 error ("extended registers have no high halves");
13702 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13705 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13708 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13711 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13714 error ("unsupported operand size for extended register");
13724 if (STACK_TOP_P (x))
13733 if (! ANY_FP_REG_P (x))
13734 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13739 reg = hi_reg_name[REGNO (x)];
13742 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13744 reg = qi_reg_name[REGNO (x)];
13747 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13749 reg = qi_high_reg_name[REGNO (x)];
13754 gcc_assert (!duplicated);
13756 fputs (hi_reg_name[REGNO (x)] + 1, file);
13761 gcc_unreachable ();
13767 if (ASSEMBLER_DIALECT == ASM_ATT)
13768 fprintf (file, ", %%%s", reg);
13770 fprintf (file, ", %s", reg);
13774 /* Locate some local-dynamic symbol still in use by this function
13775 so that we can print its name in some tls_local_dynamic_base
13779 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13783 if (GET_CODE (x) == SYMBOL_REF
13784 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13786 cfun->machine->some_ld_name = XSTR (x, 0);
13793 static const char *
13794 get_some_local_dynamic_name (void)
13798 if (cfun->machine->some_ld_name)
13799 return cfun->machine->some_ld_name;
13801 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13802 if (NONDEBUG_INSN_P (insn)
13803 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13804 return cfun->machine->some_ld_name;
13809 /* Meaning of CODE:
13810 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13811 C -- print opcode suffix for set/cmov insn.
13812 c -- like C, but print reversed condition
13813 F,f -- likewise, but for floating-point.
13814 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13816 R -- print the prefix for register names.
13817 z -- print the opcode suffix for the size of the current operand.
13818 Z -- likewise, with special suffixes for x87 instructions.
13819 * -- print a star (in certain assembler syntax)
13820 A -- print an absolute memory reference.
13821 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13822 s -- print a shift double count, followed by the assemblers argument
13824 b -- print the QImode name of the register for the indicated operand.
13825 %b0 would print %al if operands[0] is reg 0.
13826 w -- likewise, print the HImode name of the register.
13827 k -- likewise, print the SImode name of the register.
13828 q -- likewise, print the DImode name of the register.
13829 x -- likewise, print the V4SFmode name of the register.
13830 t -- likewise, print the V8SFmode name of the register.
13831 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13832 y -- print "st(0)" instead of "st" as a register.
13833 d -- print duplicated register operand for AVX instruction.
13834 D -- print condition for SSE cmp instruction.
13835 P -- if PIC, print an @PLT suffix.
13836 X -- don't print any sort of PIC '@' suffix for a symbol.
13837 & -- print some in-use local-dynamic symbol name.
13838 H -- print a memory address offset by 8; used for sse high-parts
13839 Y -- print condition for XOP pcom* instruction.
13840 + -- print a branch hint as 'cs' or 'ds' prefix
13841 ; -- print a semicolon (after prefixes due to bug in older gas).
13842 @ -- print a segment register of thread base pointer load
13846 ix86_print_operand (FILE *file, rtx x, int code)
13853 if (ASSEMBLER_DIALECT == ASM_ATT)
13859 const char *name = get_some_local_dynamic_name ();
13861 output_operand_lossage ("'%%&' used without any "
13862 "local dynamic TLS references");
13864 assemble_name (file, name);
13869 switch (ASSEMBLER_DIALECT)
13876 /* Intel syntax. For absolute addresses, registers should not
13877 be surrounded by braces. */
13881 ix86_print_operand (file, x, 0);
13888 gcc_unreachable ();
13891 ix86_print_operand (file, x, 0);
13896 if (ASSEMBLER_DIALECT == ASM_ATT)
13901 if (ASSEMBLER_DIALECT == ASM_ATT)
13906 if (ASSEMBLER_DIALECT == ASM_ATT)
13911 if (ASSEMBLER_DIALECT == ASM_ATT)
13916 if (ASSEMBLER_DIALECT == ASM_ATT)
13921 if (ASSEMBLER_DIALECT == ASM_ATT)
13926 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13928 /* Opcodes don't get size suffixes if using Intel opcodes. */
13929 if (ASSEMBLER_DIALECT == ASM_INTEL)
13932 switch (GET_MODE_SIZE (GET_MODE (x)))
13951 output_operand_lossage
13952 ("invalid operand size for operand code '%c'", code);
13957 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13959 (0, "non-integer operand used with operand code '%c'", code);
13963 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13964 if (ASSEMBLER_DIALECT == ASM_INTEL)
13967 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13969 switch (GET_MODE_SIZE (GET_MODE (x)))
13972 #ifdef HAVE_AS_IX86_FILDS
13982 #ifdef HAVE_AS_IX86_FILDQ
13985 fputs ("ll", file);
13993 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13995 /* 387 opcodes don't get size suffixes
13996 if the operands are registers. */
13997 if (STACK_REG_P (x))
14000 switch (GET_MODE_SIZE (GET_MODE (x)))
14021 output_operand_lossage
14022 ("invalid operand type used with operand code '%c'", code);
14026 output_operand_lossage
14027 ("invalid operand size for operand code '%c'", code);
14044 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14046 ix86_print_operand (file, x, 0);
14047 fputs (", ", file);
14052 /* Little bit of braindamage here. The SSE compare instructions
14053 does use completely different names for the comparisons that the
14054 fp conditional moves. */
14057 switch (GET_CODE (x))
14060 fputs ("eq", file);
14063 fputs ("eq_us", file);
14066 fputs ("lt", file);
14069 fputs ("nge", file);
14072 fputs ("le", file);
14075 fputs ("ngt", file);
14078 fputs ("unord", file);
14081 fputs ("neq", file);
14084 fputs ("neq_oq", file);
14087 fputs ("ge", file);
14090 fputs ("nlt", file);
14093 fputs ("gt", file);
14096 fputs ("nle", file);
14099 fputs ("ord", file);
14102 output_operand_lossage ("operand is not a condition code, "
14103 "invalid operand code 'D'");
14109 switch (GET_CODE (x))
14113 fputs ("eq", file);
14117 fputs ("lt", file);
14121 fputs ("le", file);
14124 fputs ("unord", file);
14128 fputs ("neq", file);
14132 fputs ("nlt", file);
14136 fputs ("nle", file);
14139 fputs ("ord", file);
14142 output_operand_lossage ("operand is not a condition code, "
14143 "invalid operand code 'D'");
14149 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14150 if (ASSEMBLER_DIALECT == ASM_ATT)
14152 switch (GET_MODE (x))
14154 case HImode: putc ('w', file); break;
14156 case SFmode: putc ('l', file); break;
14158 case DFmode: putc ('q', file); break;
14159 default: gcc_unreachable ();
14166 if (!COMPARISON_P (x))
14168 output_operand_lossage ("operand is neither a constant nor a "
14169 "condition code, invalid operand code "
14173 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
14176 if (!COMPARISON_P (x))
14178 output_operand_lossage ("operand is neither a constant nor a "
14179 "condition code, invalid operand code "
14183 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14184 if (ASSEMBLER_DIALECT == ASM_ATT)
14187 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
14190 /* Like above, but reverse condition */
14192 /* Check to see if argument to %c is really a constant
14193 and not a condition code which needs to be reversed. */
14194 if (!COMPARISON_P (x))
14196 output_operand_lossage ("operand is neither a constant nor a "
14197 "condition code, invalid operand "
14201 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
14204 if (!COMPARISON_P (x))
14206 output_operand_lossage ("operand is neither a constant nor a "
14207 "condition code, invalid operand "
14211 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14212 if (ASSEMBLER_DIALECT == ASM_ATT)
14215 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
14219 /* It doesn't actually matter what mode we use here, as we're
14220 only going to use this for printing. */
14221 x = adjust_address_nv (x, DImode, 8);
14229 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
14232 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14235 int pred_val = INTVAL (XEXP (x, 0));
14237 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14238 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14240 int taken = pred_val > REG_BR_PROB_BASE / 2;
14241 int cputaken = final_forward_branch_p (current_output_insn) == 0;
14243 /* Emit hints only in the case default branch prediction
14244 heuristics would fail. */
14245 if (taken != cputaken)
14247 /* We use 3e (DS) prefix for taken branches and
14248 2e (CS) prefix for not taken branches. */
14250 fputs ("ds ; ", file);
14252 fputs ("cs ; ", file);
14260 switch (GET_CODE (x))
14263 fputs ("neq", file);
14266 fputs ("eq", file);
14270 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14274 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14278 fputs ("le", file);
14282 fputs ("lt", file);
14285 fputs ("unord", file);
14288 fputs ("ord", file);
14291 fputs ("ueq", file);
14294 fputs ("nlt", file);
14297 fputs ("nle", file);
14300 fputs ("ule", file);
14303 fputs ("ult", file);
14306 fputs ("une", file);
14309 output_operand_lossage ("operand is not a condition code, "
14310 "invalid operand code 'Y'");
14316 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14322 if (ASSEMBLER_DIALECT == ASM_ATT)
14325 /* The kernel uses a different segment register for performance
14326 reasons; a system call would not have to trash the userspace
14327 segment register, which would be expensive. */
14328 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14329 fputs ("fs", file);
14331 fputs ("gs", file);
14335 output_operand_lossage ("invalid operand code '%c'", code);
14340 print_reg (x, code, file);
14342 else if (MEM_P (x))
14344 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14345 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14346 && GET_MODE (x) != BLKmode)
14349 switch (GET_MODE_SIZE (GET_MODE (x)))
14351 case 1: size = "BYTE"; break;
14352 case 2: size = "WORD"; break;
14353 case 4: size = "DWORD"; break;
14354 case 8: size = "QWORD"; break;
14355 case 12: size = "TBYTE"; break;
14357 if (GET_MODE (x) == XFmode)
14362 case 32: size = "YMMWORD"; break;
14364 gcc_unreachable ();
14367 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14370 else if (code == 'w')
14372 else if (code == 'k')
14375 fputs (size, file);
14376 fputs (" PTR ", file);
14380 /* Avoid (%rip) for call operands. */
14381 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14382 && !CONST_INT_P (x))
14383 output_addr_const (file, x);
14384 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14385 output_operand_lossage ("invalid constraints for operand");
14387 output_address (x);
14390 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14395 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14396 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14398 if (ASSEMBLER_DIALECT == ASM_ATT)
14400 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14402 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14404 fprintf (file, "0x%08x", (unsigned int) l);
14407 /* These float cases don't actually occur as immediate operands. */
14408 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14412 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14413 fputs (dstr, file);
14416 else if (GET_CODE (x) == CONST_DOUBLE
14417 && GET_MODE (x) == XFmode)
14421 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14422 fputs (dstr, file);
14427 /* We have patterns that allow zero sets of memory, for instance.
14428 In 64-bit mode, we should probably support all 8-byte vectors,
14429 since we can in fact encode that into an immediate. */
14430 if (GET_CODE (x) == CONST_VECTOR)
14432 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14438 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14440 if (ASSEMBLER_DIALECT == ASM_ATT)
14443 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14444 || GET_CODE (x) == LABEL_REF)
14446 if (ASSEMBLER_DIALECT == ASM_ATT)
14449 fputs ("OFFSET FLAT:", file);
14452 if (CONST_INT_P (x))
14453 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14454 else if (flag_pic || MACHOPIC_INDIRECT)
14455 output_pic_addr_const (file, x, code);
14457 output_addr_const (file, x);
14462 ix86_print_operand_punct_valid_p (unsigned char code)
14464 return (code == '@' || code == '*' || code == '+'
14465 || code == '&' || code == ';');
14468 /* Print a memory operand whose address is ADDR. */
14471 ix86_print_operand_address (FILE *file, rtx addr)
14473 struct ix86_address parts;
14474 rtx base, index, disp;
14476 int ok = ix86_decompose_address (addr, &parts);
14481 index = parts.index;
14483 scale = parts.scale;
14491 if (ASSEMBLER_DIALECT == ASM_ATT)
14493 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14496 gcc_unreachable ();
14499 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14500 if (TARGET_64BIT && !base && !index)
14504 if (GET_CODE (disp) == CONST
14505 && GET_CODE (XEXP (disp, 0)) == PLUS
14506 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14507 symbol = XEXP (XEXP (disp, 0), 0);
14509 if (GET_CODE (symbol) == LABEL_REF
14510 || (GET_CODE (symbol) == SYMBOL_REF
14511 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14514 if (!base && !index)
14516 /* Displacement only requires special attention. */
14518 if (CONST_INT_P (disp))
14520 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14521 fputs ("ds:", file);
14522 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14525 output_pic_addr_const (file, disp, 0);
14527 output_addr_const (file, disp);
14531 if (ASSEMBLER_DIALECT == ASM_ATT)
14536 output_pic_addr_const (file, disp, 0);
14537 else if (GET_CODE (disp) == LABEL_REF)
14538 output_asm_label (disp);
14540 output_addr_const (file, disp);
14545 print_reg (base, 0, file);
14549 print_reg (index, 0, file);
14551 fprintf (file, ",%d", scale);
14557 rtx offset = NULL_RTX;
14561 /* Pull out the offset of a symbol; print any symbol itself. */
14562 if (GET_CODE (disp) == CONST
14563 && GET_CODE (XEXP (disp, 0)) == PLUS
14564 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14566 offset = XEXP (XEXP (disp, 0), 1);
14567 disp = gen_rtx_CONST (VOIDmode,
14568 XEXP (XEXP (disp, 0), 0));
14572 output_pic_addr_const (file, disp, 0);
14573 else if (GET_CODE (disp) == LABEL_REF)
14574 output_asm_label (disp);
14575 else if (CONST_INT_P (disp))
14578 output_addr_const (file, disp);
14584 print_reg (base, 0, file);
14587 if (INTVAL (offset) >= 0)
14589 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14593 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14600 print_reg (index, 0, file);
14602 fprintf (file, "*%d", scale);
14609 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14612 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14616 if (GET_CODE (x) != UNSPEC)
14619 op = XVECEXP (x, 0, 0);
14620 switch (XINT (x, 1))
14622 case UNSPEC_GOTTPOFF:
14623 output_addr_const (file, op);
14624 /* FIXME: This might be @TPOFF in Sun ld. */
14625 fputs ("@gottpoff", file);
14628 output_addr_const (file, op);
14629 fputs ("@tpoff", file);
14631 case UNSPEC_NTPOFF:
14632 output_addr_const (file, op);
14634 fputs ("@tpoff", file);
14636 fputs ("@ntpoff", file);
14638 case UNSPEC_DTPOFF:
14639 output_addr_const (file, op);
14640 fputs ("@dtpoff", file);
14642 case UNSPEC_GOTNTPOFF:
14643 output_addr_const (file, op);
14645 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14646 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14648 fputs ("@gotntpoff", file);
14650 case UNSPEC_INDNTPOFF:
14651 output_addr_const (file, op);
14652 fputs ("@indntpoff", file);
14655 case UNSPEC_MACHOPIC_OFFSET:
14656 output_addr_const (file, op);
14658 machopic_output_function_base_name (file);
14662 case UNSPEC_STACK_CHECK:
14666 gcc_assert (flag_split_stack);
14668 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14669 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14671 gcc_unreachable ();
14674 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14685 /* Split one or more double-mode RTL references into pairs of half-mode
14686 references. The RTL can be REG, offsettable MEM, integer constant, or
14687 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14688 split and "num" is its length. lo_half and hi_half are output arrays
14689 that parallel "operands". */
14692 split_double_mode (enum machine_mode mode, rtx operands[],
14693 int num, rtx lo_half[], rtx hi_half[])
14695 enum machine_mode half_mode;
14701 half_mode = DImode;
14704 half_mode = SImode;
14707 gcc_unreachable ();
14710 byte = GET_MODE_SIZE (half_mode);
14714 rtx op = operands[num];
14716 /* simplify_subreg refuse to split volatile memory addresses,
14717 but we still have to handle it. */
14720 lo_half[num] = adjust_address (op, half_mode, 0);
14721 hi_half[num] = adjust_address (op, half_mode, byte);
14725 lo_half[num] = simplify_gen_subreg (half_mode, op,
14726 GET_MODE (op) == VOIDmode
14727 ? mode : GET_MODE (op), 0);
14728 hi_half[num] = simplify_gen_subreg (half_mode, op,
14729 GET_MODE (op) == VOIDmode
14730 ? mode : GET_MODE (op), byte);
14735 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14736 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14737 is the expression of the binary operation. The output may either be
14738 emitted here, or returned to the caller, like all output_* functions.
14740 There is no guarantee that the operands are the same mode, as they
14741 might be within FLOAT or FLOAT_EXTEND expressions. */
14743 #ifndef SYSV386_COMPAT
14744 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14745 wants to fix the assemblers because that causes incompatibility
14746 with gcc. No-one wants to fix gcc because that causes
14747 incompatibility with assemblers... You can use the option of
14748 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14749 #define SYSV386_COMPAT 1
14753 output_387_binary_op (rtx insn, rtx *operands)
14755 static char buf[40];
14758 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14760 #ifdef ENABLE_CHECKING
14761 /* Even if we do not want to check the inputs, this documents input
14762 constraints. Which helps in understanding the following code. */
14763 if (STACK_REG_P (operands[0])
14764 && ((REG_P (operands[1])
14765 && REGNO (operands[0]) == REGNO (operands[1])
14766 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14767 || (REG_P (operands[2])
14768 && REGNO (operands[0]) == REGNO (operands[2])
14769 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14770 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14773 gcc_assert (is_sse);
14776 switch (GET_CODE (operands[3]))
14779 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14780 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14788 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14789 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14797 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14798 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14806 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14807 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14815 gcc_unreachable ();
14822 strcpy (buf, ssep);
14823 if (GET_MODE (operands[0]) == SFmode)
14824 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14826 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14830 strcpy (buf, ssep + 1);
14831 if (GET_MODE (operands[0]) == SFmode)
14832 strcat (buf, "ss\t{%2, %0|%0, %2}");
14834 strcat (buf, "sd\t{%2, %0|%0, %2}");
14840 switch (GET_CODE (operands[3]))
14844 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14846 rtx temp = operands[2];
14847 operands[2] = operands[1];
14848 operands[1] = temp;
14851 /* know operands[0] == operands[1]. */
14853 if (MEM_P (operands[2]))
14859 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14861 if (STACK_TOP_P (operands[0]))
14862 /* How is it that we are storing to a dead operand[2]?
14863 Well, presumably operands[1] is dead too. We can't
14864 store the result to st(0) as st(0) gets popped on this
14865 instruction. Instead store to operands[2] (which I
14866 think has to be st(1)). st(1) will be popped later.
14867 gcc <= 2.8.1 didn't have this check and generated
14868 assembly code that the Unixware assembler rejected. */
14869 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14871 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14875 if (STACK_TOP_P (operands[0]))
14876 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14878 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14883 if (MEM_P (operands[1]))
14889 if (MEM_P (operands[2]))
14895 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14898 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14899 derived assemblers, confusingly reverse the direction of
14900 the operation for fsub{r} and fdiv{r} when the
14901 destination register is not st(0). The Intel assembler
14902 doesn't have this brain damage. Read !SYSV386_COMPAT to
14903 figure out what the hardware really does. */
14904 if (STACK_TOP_P (operands[0]))
14905 p = "{p\t%0, %2|rp\t%2, %0}";
14907 p = "{rp\t%2, %0|p\t%0, %2}";
14909 if (STACK_TOP_P (operands[0]))
14910 /* As above for fmul/fadd, we can't store to st(0). */
14911 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14913 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14918 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14921 if (STACK_TOP_P (operands[0]))
14922 p = "{rp\t%0, %1|p\t%1, %0}";
14924 p = "{p\t%1, %0|rp\t%0, %1}";
14926 if (STACK_TOP_P (operands[0]))
14927 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14929 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14934 if (STACK_TOP_P (operands[0]))
14936 if (STACK_TOP_P (operands[1]))
14937 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14939 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14942 else if (STACK_TOP_P (operands[1]))
14945 p = "{\t%1, %0|r\t%0, %1}";
14947 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14953 p = "{r\t%2, %0|\t%0, %2}";
14955 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14961 gcc_unreachable ();
14968 /* Return needed mode for entity in optimize_mode_switching pass. */
14971 ix86_mode_needed (int entity, rtx insn)
14973 enum attr_i387_cw mode;
14975 /* The mode UNINITIALIZED is used to store control word after a
14976 function call or ASM pattern. The mode ANY specify that function
14977 has no requirements on the control word and make no changes in the
14978 bits we are interested in. */
14981 || (NONJUMP_INSN_P (insn)
14982 && (asm_noperands (PATTERN (insn)) >= 0
14983 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14984 return I387_CW_UNINITIALIZED;
14986 if (recog_memoized (insn) < 0)
14987 return I387_CW_ANY;
14989 mode = get_attr_i387_cw (insn);
14994 if (mode == I387_CW_TRUNC)
14999 if (mode == I387_CW_FLOOR)
15004 if (mode == I387_CW_CEIL)
15009 if (mode == I387_CW_MASK_PM)
15014 gcc_unreachable ();
15017 return I387_CW_ANY;
15020 /* Output code to initialize control word copies used by trunc?f?i and
15021 rounding patterns. CURRENT_MODE is set to current control word,
15022 while NEW_MODE is set to new control word. */
15025 emit_i387_cw_initialization (int mode)
15027 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15030 enum ix86_stack_slot slot;
15032 rtx reg = gen_reg_rtx (HImode);
15034 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15035 emit_move_insn (reg, copy_rtx (stored_mode));
15037 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
15038 || optimize_function_for_size_p (cfun))
15042 case I387_CW_TRUNC:
15043 /* round toward zero (truncate) */
15044 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15045 slot = SLOT_CW_TRUNC;
15048 case I387_CW_FLOOR:
15049 /* round down toward -oo */
15050 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15051 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15052 slot = SLOT_CW_FLOOR;
15056 /* round up toward +oo */
15057 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15058 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15059 slot = SLOT_CW_CEIL;
15062 case I387_CW_MASK_PM:
15063 /* mask precision exception for nearbyint() */
15064 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15065 slot = SLOT_CW_MASK_PM;
15069 gcc_unreachable ();
15076 case I387_CW_TRUNC:
15077 /* round toward zero (truncate) */
15078 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
15079 slot = SLOT_CW_TRUNC;
15082 case I387_CW_FLOOR:
15083 /* round down toward -oo */
15084 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
15085 slot = SLOT_CW_FLOOR;
15089 /* round up toward +oo */
15090 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
15091 slot = SLOT_CW_CEIL;
15094 case I387_CW_MASK_PM:
15095 /* mask precision exception for nearbyint() */
15096 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15097 slot = SLOT_CW_MASK_PM;
15101 gcc_unreachable ();
15105 gcc_assert (slot < MAX_386_STACK_LOCALS);
15107 new_mode = assign_386_stack_local (HImode, slot);
15108 emit_move_insn (new_mode, reg);
15111 /* Output code for INSN to convert a float to a signed int. OPERANDS
15112 are the insn operands. The output may be [HSD]Imode and the input
15113 operand may be [SDX]Fmode. */
15116 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
15118 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15119 int dimode_p = GET_MODE (operands[0]) == DImode;
15120 int round_mode = get_attr_i387_cw (insn);
15122 /* Jump through a hoop or two for DImode, since the hardware has no
15123 non-popping instruction. We used to do this a different way, but
15124 that was somewhat fragile and broke with post-reload splitters. */
15125 if ((dimode_p || fisttp) && !stack_top_dies)
15126 output_asm_insn ("fld\t%y1", operands);
15128 gcc_assert (STACK_TOP_P (operands[1]));
15129 gcc_assert (MEM_P (operands[0]));
15130 gcc_assert (GET_MODE (operands[1]) != TFmode);
15133 output_asm_insn ("fisttp%Z0\t%0", operands);
15136 if (round_mode != I387_CW_ANY)
15137 output_asm_insn ("fldcw\t%3", operands);
15138 if (stack_top_dies || dimode_p)
15139 output_asm_insn ("fistp%Z0\t%0", operands);
15141 output_asm_insn ("fist%Z0\t%0", operands);
15142 if (round_mode != I387_CW_ANY)
15143 output_asm_insn ("fldcw\t%2", operands);
15149 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15150 have the values zero or one, indicates the ffreep insn's operand
15151 from the OPERANDS array. */
15153 static const char *
15154 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15156 if (TARGET_USE_FFREEP)
15157 #ifdef HAVE_AS_IX86_FFREEP
15158 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15161 static char retval[32];
15162 int regno = REGNO (operands[opno]);
15164 gcc_assert (FP_REGNO_P (regno));
15166 regno -= FIRST_STACK_REG;
15168 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15173 return opno ? "fstp\t%y1" : "fstp\t%y0";
15177 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15178 should be used. UNORDERED_P is true when fucom should be used. */
15181 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
15183 int stack_top_dies;
15184 rtx cmp_op0, cmp_op1;
15185 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
15189 cmp_op0 = operands[0];
15190 cmp_op1 = operands[1];
15194 cmp_op0 = operands[1];
15195 cmp_op1 = operands[2];
15200 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
15201 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
15202 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
15203 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
15205 if (GET_MODE (operands[0]) == SFmode)
15207 return &ucomiss[TARGET_AVX ? 0 : 1];
15209 return &comiss[TARGET_AVX ? 0 : 1];
15212 return &ucomisd[TARGET_AVX ? 0 : 1];
15214 return &comisd[TARGET_AVX ? 0 : 1];
15217 gcc_assert (STACK_TOP_P (cmp_op0));
15219 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15221 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15223 if (stack_top_dies)
15225 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15226 return output_387_ffreep (operands, 1);
15229 return "ftst\n\tfnstsw\t%0";
15232 if (STACK_REG_P (cmp_op1)
15234 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15235 && REGNO (cmp_op1) != FIRST_STACK_REG)
15237 /* If both the top of the 387 stack dies, and the other operand
15238 is also a stack register that dies, then this must be a
15239 `fcompp' float compare */
15243 /* There is no double popping fcomi variant. Fortunately,
15244 eflags is immune from the fstp's cc clobbering. */
15246 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15248 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15249 return output_387_ffreep (operands, 0);
15254 return "fucompp\n\tfnstsw\t%0";
15256 return "fcompp\n\tfnstsw\t%0";
15261 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15263 static const char * const alt[16] =
15265 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15266 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15267 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15268 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15270 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15271 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15275 "fcomi\t{%y1, %0|%0, %y1}",
15276 "fcomip\t{%y1, %0|%0, %y1}",
15277 "fucomi\t{%y1, %0|%0, %y1}",
15278 "fucomip\t{%y1, %0|%0, %y1}",
15289 mask = eflags_p << 3;
15290 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15291 mask |= unordered_p << 1;
15292 mask |= stack_top_dies;
15294 gcc_assert (mask < 16);
15303 ix86_output_addr_vec_elt (FILE *file, int value)
15305 const char *directive = ASM_LONG;
15309 directive = ASM_QUAD;
15311 gcc_assert (!TARGET_64BIT);
15314 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15318 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15320 const char *directive = ASM_LONG;
15323 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15324 directive = ASM_QUAD;
15326 gcc_assert (!TARGET_64BIT);
15328 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15329 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15330 fprintf (file, "%s%s%d-%s%d\n",
15331 directive, LPREFIX, value, LPREFIX, rel);
15332 else if (HAVE_AS_GOTOFF_IN_DATA)
15333 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15335 else if (TARGET_MACHO)
15337 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15338 machopic_output_function_base_name (file);
15343 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15344 GOT_SYMBOL_NAME, LPREFIX, value);
15347 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15351 ix86_expand_clear (rtx dest)
15355 /* We play register width games, which are only valid after reload. */
15356 gcc_assert (reload_completed);
15358 /* Avoid HImode and its attendant prefix byte. */
15359 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15360 dest = gen_rtx_REG (SImode, REGNO (dest));
15361 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15363 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15364 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15366 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15367 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15373 /* X is an unchanging MEM. If it is a constant pool reference, return
15374 the constant pool rtx, else NULL. */
15377 maybe_get_pool_constant (rtx x)
15379 x = ix86_delegitimize_address (XEXP (x, 0));
15381 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15382 return get_pool_constant (x);
15388 ix86_expand_move (enum machine_mode mode, rtx operands[])
15391 enum tls_model model;
15396 if (GET_CODE (op1) == SYMBOL_REF)
15398 model = SYMBOL_REF_TLS_MODEL (op1);
15401 op1 = legitimize_tls_address (op1, model, true);
15402 op1 = force_operand (op1, op0);
15406 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15407 && SYMBOL_REF_DLLIMPORT_P (op1))
15408 op1 = legitimize_dllimport_symbol (op1, false);
15410 else if (GET_CODE (op1) == CONST
15411 && GET_CODE (XEXP (op1, 0)) == PLUS
15412 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15414 rtx addend = XEXP (XEXP (op1, 0), 1);
15415 rtx symbol = XEXP (XEXP (op1, 0), 0);
15418 model = SYMBOL_REF_TLS_MODEL (symbol);
15420 tmp = legitimize_tls_address (symbol, model, true);
15421 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15422 && SYMBOL_REF_DLLIMPORT_P (symbol))
15423 tmp = legitimize_dllimport_symbol (symbol, true);
15427 tmp = force_operand (tmp, NULL);
15428 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15429 op0, 1, OPTAB_DIRECT);
15435 if ((flag_pic || MACHOPIC_INDIRECT)
15436 && mode == Pmode && symbolic_operand (op1, Pmode))
15438 if (TARGET_MACHO && !TARGET_64BIT)
15441 /* dynamic-no-pic */
15442 if (MACHOPIC_INDIRECT)
15444 rtx temp = ((reload_in_progress
15445 || ((op0 && REG_P (op0))
15447 ? op0 : gen_reg_rtx (Pmode));
15448 op1 = machopic_indirect_data_reference (op1, temp);
15450 op1 = machopic_legitimize_pic_address (op1, mode,
15451 temp == op1 ? 0 : temp);
15453 if (op0 != op1 && GET_CODE (op0) != MEM)
15455 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15459 if (GET_CODE (op0) == MEM)
15460 op1 = force_reg (Pmode, op1);
15464 if (GET_CODE (temp) != REG)
15465 temp = gen_reg_rtx (Pmode);
15466 temp = legitimize_pic_address (op1, temp);
15471 /* dynamic-no-pic */
15477 op1 = force_reg (Pmode, op1);
15478 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
15480 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15481 op1 = legitimize_pic_address (op1, reg);
15490 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15491 || !push_operand (op0, mode))
15493 op1 = force_reg (mode, op1);
15495 if (push_operand (op0, mode)
15496 && ! general_no_elim_operand (op1, mode))
15497 op1 = copy_to_mode_reg (mode, op1);
15499 /* Force large constants in 64bit compilation into register
15500 to get them CSEed. */
15501 if (can_create_pseudo_p ()
15502 && (mode == DImode) && TARGET_64BIT
15503 && immediate_operand (op1, mode)
15504 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15505 && !register_operand (op0, mode)
15507 op1 = copy_to_mode_reg (mode, op1);
15509 if (can_create_pseudo_p ()
15510 && FLOAT_MODE_P (mode)
15511 && GET_CODE (op1) == CONST_DOUBLE)
15513 /* If we are loading a floating point constant to a register,
15514 force the value to memory now, since we'll get better code
15515 out the back end. */
15517 op1 = validize_mem (force_const_mem (mode, op1));
15518 if (!register_operand (op0, mode))
15520 rtx temp = gen_reg_rtx (mode);
15521 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15522 emit_move_insn (op0, temp);
15528 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15532 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15534 rtx op0 = operands[0], op1 = operands[1];
15535 unsigned int align = GET_MODE_ALIGNMENT (mode);
15537 /* Force constants other than zero into memory. We do not know how
15538 the instructions used to build constants modify the upper 64 bits
15539 of the register, once we have that information we may be able
15540 to handle some of them more efficiently. */
15541 if (can_create_pseudo_p ()
15542 && register_operand (op0, mode)
15543 && (CONSTANT_P (op1)
15544 || (GET_CODE (op1) == SUBREG
15545 && CONSTANT_P (SUBREG_REG (op1))))
15546 && !standard_sse_constant_p (op1))
15547 op1 = validize_mem (force_const_mem (mode, op1));
15549 /* We need to check memory alignment for SSE mode since attribute
15550 can make operands unaligned. */
15551 if (can_create_pseudo_p ()
15552 && SSE_REG_MODE_P (mode)
15553 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15554 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15558 /* ix86_expand_vector_move_misalign() does not like constants ... */
15559 if (CONSTANT_P (op1)
15560 || (GET_CODE (op1) == SUBREG
15561 && CONSTANT_P (SUBREG_REG (op1))))
15562 op1 = validize_mem (force_const_mem (mode, op1));
15564 /* ... nor both arguments in memory. */
15565 if (!register_operand (op0, mode)
15566 && !register_operand (op1, mode))
15567 op1 = force_reg (mode, op1);
15569 tmp[0] = op0; tmp[1] = op1;
15570 ix86_expand_vector_move_misalign (mode, tmp);
15574 /* Make operand1 a register if it isn't already. */
15575 if (can_create_pseudo_p ()
15576 && !register_operand (op0, mode)
15577 && !register_operand (op1, mode))
15579 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15583 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15586 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15587 straight to ix86_expand_vector_move. */
15588 /* Code generation for scalar reg-reg moves of single and double precision data:
15589 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15593 if (x86_sse_partial_reg_dependency == true)
15598 Code generation for scalar loads of double precision data:
15599 if (x86_sse_split_regs == true)
15600 movlpd mem, reg (gas syntax)
15604 Code generation for unaligned packed loads of single precision data
15605 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15606 if (x86_sse_unaligned_move_optimal)
15609 if (x86_sse_partial_reg_dependency == true)
15621 Code generation for unaligned packed loads of double precision data
15622 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15623 if (x86_sse_unaligned_move_optimal)
15626 if (x86_sse_split_regs == true)
15639 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15648 switch (GET_MODE_CLASS (mode))
15650 case MODE_VECTOR_INT:
15652 switch (GET_MODE_SIZE (mode))
15655 /* If we're optimizing for size, movups is the smallest. */
15656 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15658 op0 = gen_lowpart (V4SFmode, op0);
15659 op1 = gen_lowpart (V4SFmode, op1);
15660 emit_insn (gen_avx_movups (op0, op1));
15663 op0 = gen_lowpart (V16QImode, op0);
15664 op1 = gen_lowpart (V16QImode, op1);
15665 emit_insn (gen_avx_movdqu (op0, op1));
15668 op0 = gen_lowpart (V32QImode, op0);
15669 op1 = gen_lowpart (V32QImode, op1);
15670 emit_insn (gen_avx_movdqu256 (op0, op1));
15673 gcc_unreachable ();
15676 case MODE_VECTOR_FLOAT:
15677 op0 = gen_lowpart (mode, op0);
15678 op1 = gen_lowpart (mode, op1);
15683 emit_insn (gen_avx_movups (op0, op1));
15686 emit_insn (gen_avx_movups256 (op0, op1));
15689 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15691 op0 = gen_lowpart (V4SFmode, op0);
15692 op1 = gen_lowpart (V4SFmode, op1);
15693 emit_insn (gen_avx_movups (op0, op1));
15696 emit_insn (gen_avx_movupd (op0, op1));
15699 emit_insn (gen_avx_movupd256 (op0, op1));
15702 gcc_unreachable ();
15707 gcc_unreachable ();
15715 /* If we're optimizing for size, movups is the smallest. */
15716 if (optimize_insn_for_size_p ()
15717 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15719 op0 = gen_lowpart (V4SFmode, op0);
15720 op1 = gen_lowpart (V4SFmode, op1);
15721 emit_insn (gen_sse_movups (op0, op1));
15725 /* ??? If we have typed data, then it would appear that using
15726 movdqu is the only way to get unaligned data loaded with
15728 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15730 op0 = gen_lowpart (V16QImode, op0);
15731 op1 = gen_lowpart (V16QImode, op1);
15732 emit_insn (gen_sse2_movdqu (op0, op1));
15736 if (TARGET_SSE2 && mode == V2DFmode)
15740 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15742 op0 = gen_lowpart (V2DFmode, op0);
15743 op1 = gen_lowpart (V2DFmode, op1);
15744 emit_insn (gen_sse2_movupd (op0, op1));
15748 /* When SSE registers are split into halves, we can avoid
15749 writing to the top half twice. */
15750 if (TARGET_SSE_SPLIT_REGS)
15752 emit_clobber (op0);
15757 /* ??? Not sure about the best option for the Intel chips.
15758 The following would seem to satisfy; the register is
15759 entirely cleared, breaking the dependency chain. We
15760 then store to the upper half, with a dependency depth
15761 of one. A rumor has it that Intel recommends two movsd
15762 followed by an unpacklpd, but this is unconfirmed. And
15763 given that the dependency depth of the unpacklpd would
15764 still be one, I'm not sure why this would be better. */
15765 zero = CONST0_RTX (V2DFmode);
15768 m = adjust_address (op1, DFmode, 0);
15769 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15770 m = adjust_address (op1, DFmode, 8);
15771 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15775 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15777 op0 = gen_lowpart (V4SFmode, op0);
15778 op1 = gen_lowpart (V4SFmode, op1);
15779 emit_insn (gen_sse_movups (op0, op1));
15783 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15784 emit_move_insn (op0, CONST0_RTX (mode));
15786 emit_clobber (op0);
15788 if (mode != V4SFmode)
15789 op0 = gen_lowpart (V4SFmode, op0);
15790 m = adjust_address (op1, V2SFmode, 0);
15791 emit_insn (gen_sse_loadlps (op0, op0, m));
15792 m = adjust_address (op1, V2SFmode, 8);
15793 emit_insn (gen_sse_loadhps (op0, op0, m));
15796 else if (MEM_P (op0))
15798 /* If we're optimizing for size, movups is the smallest. */
15799 if (optimize_insn_for_size_p ()
15800 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15802 op0 = gen_lowpart (V4SFmode, op0);
15803 op1 = gen_lowpart (V4SFmode, op1);
15804 emit_insn (gen_sse_movups (op0, op1));
15808 /* ??? Similar to above, only less clear because of quote
15809 typeless stores unquote. */
15810 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15811 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15813 op0 = gen_lowpart (V16QImode, op0);
15814 op1 = gen_lowpart (V16QImode, op1);
15815 emit_insn (gen_sse2_movdqu (op0, op1));
15819 if (TARGET_SSE2 && mode == V2DFmode)
15821 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15823 op0 = gen_lowpart (V2DFmode, op0);
15824 op1 = gen_lowpart (V2DFmode, op1);
15825 emit_insn (gen_sse2_movupd (op0, op1));
15829 m = adjust_address (op0, DFmode, 0);
15830 emit_insn (gen_sse2_storelpd (m, op1));
15831 m = adjust_address (op0, DFmode, 8);
15832 emit_insn (gen_sse2_storehpd (m, op1));
15837 if (mode != V4SFmode)
15838 op1 = gen_lowpart (V4SFmode, op1);
15840 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15842 op0 = gen_lowpart (V4SFmode, op0);
15843 emit_insn (gen_sse_movups (op0, op1));
15847 m = adjust_address (op0, V2SFmode, 0);
15848 emit_insn (gen_sse_storelps (m, op1));
15849 m = adjust_address (op0, V2SFmode, 8);
15850 emit_insn (gen_sse_storehps (m, op1));
15855 gcc_unreachable ();
15858 /* Expand a push in MODE. This is some mode for which we do not support
15859 proper push instructions, at least from the registers that we expect
15860 the value to live in. */
15863 ix86_expand_push (enum machine_mode mode, rtx x)
15867 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15868 GEN_INT (-GET_MODE_SIZE (mode)),
15869 stack_pointer_rtx, 1, OPTAB_DIRECT);
15870 if (tmp != stack_pointer_rtx)
15871 emit_move_insn (stack_pointer_rtx, tmp);
15873 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15875 /* When we push an operand onto stack, it has to be aligned at least
15876 at the function argument boundary. However since we don't have
15877 the argument type, we can't determine the actual argument
15879 emit_move_insn (tmp, x);
15882 /* Helper function of ix86_fixup_binary_operands to canonicalize
15883 operand order. Returns true if the operands should be swapped. */
15886 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15889 rtx dst = operands[0];
15890 rtx src1 = operands[1];
15891 rtx src2 = operands[2];
15893 /* If the operation is not commutative, we can't do anything. */
15894 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15897 /* Highest priority is that src1 should match dst. */
15898 if (rtx_equal_p (dst, src1))
15900 if (rtx_equal_p (dst, src2))
15903 /* Next highest priority is that immediate constants come second. */
15904 if (immediate_operand (src2, mode))
15906 if (immediate_operand (src1, mode))
15909 /* Lowest priority is that memory references should come second. */
15919 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15920 destination to use for the operation. If different from the true
15921 destination in operands[0], a copy operation will be required. */
15924 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15927 rtx dst = operands[0];
15928 rtx src1 = operands[1];
15929 rtx src2 = operands[2];
15931 /* Canonicalize operand order. */
15932 if (ix86_swap_binary_operands_p (code, mode, operands))
15936 /* It is invalid to swap operands of different modes. */
15937 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15944 /* Both source operands cannot be in memory. */
15945 if (MEM_P (src1) && MEM_P (src2))
15947 /* Optimization: Only read from memory once. */
15948 if (rtx_equal_p (src1, src2))
15950 src2 = force_reg (mode, src2);
15954 src2 = force_reg (mode, src2);
15957 /* If the destination is memory, and we do not have matching source
15958 operands, do things in registers. */
15959 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15960 dst = gen_reg_rtx (mode);
15962 /* Source 1 cannot be a constant. */
15963 if (CONSTANT_P (src1))
15964 src1 = force_reg (mode, src1);
15966 /* Source 1 cannot be a non-matching memory. */
15967 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15968 src1 = force_reg (mode, src1);
15970 operands[1] = src1;
15971 operands[2] = src2;
15975 /* Similarly, but assume that the destination has already been
15976 set up properly. */
15979 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15980 enum machine_mode mode, rtx operands[])
15982 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15983 gcc_assert (dst == operands[0]);
15986 /* Attempt to expand a binary operator. Make the expansion closer to the
15987 actual machine, then just general_operand, which will allow 3 separate
15988 memory references (one output, two input) in a single insn. */
15991 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15994 rtx src1, src2, dst, op, clob;
15996 dst = ix86_fixup_binary_operands (code, mode, operands);
15997 src1 = operands[1];
15998 src2 = operands[2];
16000 /* Emit the instruction. */
16002 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
16003 if (reload_in_progress)
16005 /* Reload doesn't know about the flags register, and doesn't know that
16006 it doesn't want to clobber it. We can only do this with PLUS. */
16007 gcc_assert (code == PLUS);
16010 else if (reload_completed
16012 && !rtx_equal_p (dst, src1))
16014 /* This is going to be an LEA; avoid splitting it later. */
16019 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16020 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16023 /* Fix up the destination if needed. */
16024 if (dst != operands[0])
16025 emit_move_insn (operands[0], dst);
16028 /* Return TRUE or FALSE depending on whether the binary operator meets the
16029 appropriate constraints. */
16032 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
16035 rtx dst = operands[0];
16036 rtx src1 = operands[1];
16037 rtx src2 = operands[2];
16039 /* Both source operands cannot be in memory. */
16040 if (MEM_P (src1) && MEM_P (src2))
16043 /* Canonicalize operand order for commutative operators. */
16044 if (ix86_swap_binary_operands_p (code, mode, operands))
16051 /* If the destination is memory, we must have a matching source operand. */
16052 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16055 /* Source 1 cannot be a constant. */
16056 if (CONSTANT_P (src1))
16059 /* Source 1 cannot be a non-matching memory. */
16060 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16062 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16063 return (code == AND
16066 || (TARGET_64BIT && mode == DImode))
16067 && CONST_INT_P (src2)
16068 && (INTVAL (src2) == 0xff
16069 || INTVAL (src2) == 0xffff));
16075 /* Attempt to expand a unary operator. Make the expansion closer to the
16076 actual machine, then just general_operand, which will allow 2 separate
16077 memory references (one output, one input) in a single insn. */
16080 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
16083 int matching_memory;
16084 rtx src, dst, op, clob;
16089 /* If the destination is memory, and we do not have matching source
16090 operands, do things in registers. */
16091 matching_memory = 0;
16094 if (rtx_equal_p (dst, src))
16095 matching_memory = 1;
16097 dst = gen_reg_rtx (mode);
16100 /* When source operand is memory, destination must match. */
16101 if (MEM_P (src) && !matching_memory)
16102 src = force_reg (mode, src);
16104 /* Emit the instruction. */
16106 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
16107 if (reload_in_progress || code == NOT)
16109 /* Reload doesn't know about the flags register, and doesn't know that
16110 it doesn't want to clobber it. */
16111 gcc_assert (code == NOT);
16116 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16117 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16120 /* Fix up the destination if needed. */
16121 if (dst != operands[0])
16122 emit_move_insn (operands[0], dst);
16125 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16126 divisor are within the the range [0-255]. */
16129 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
16132 rtx end_label, qimode_label;
16133 rtx insn, div, mod;
16134 rtx scratch, tmp0, tmp1, tmp2;
16135 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
16136 rtx (*gen_zero_extend) (rtx, rtx);
16137 rtx (*gen_test_ccno_1) (rtx, rtx);
16142 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
16143 gen_test_ccno_1 = gen_testsi_ccno_1;
16144 gen_zero_extend = gen_zero_extendqisi2;
16147 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
16148 gen_test_ccno_1 = gen_testdi_ccno_1;
16149 gen_zero_extend = gen_zero_extendqidi2;
16152 gcc_unreachable ();
16155 end_label = gen_label_rtx ();
16156 qimode_label = gen_label_rtx ();
16158 scratch = gen_reg_rtx (mode);
16160 /* Use 8bit unsigned divimod if dividend and divisor are within the
16161 the range [0-255]. */
16162 emit_move_insn (scratch, operands[2]);
16163 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
16164 scratch, 1, OPTAB_DIRECT);
16165 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
16166 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
16167 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16168 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16169 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16171 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16172 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16173 JUMP_LABEL (insn) = qimode_label;
16175 /* Generate original signed/unsigned divimod. */
16176 div = gen_divmod4_1 (operands[0], operands[1],
16177 operands[2], operands[3]);
16180 /* Branch to the end. */
16181 emit_jump_insn (gen_jump (end_label));
16184 /* Generate 8bit unsigned divide. */
16185 emit_label (qimode_label);
16186 /* Don't use operands[0] for result of 8bit divide since not all
16187 registers support QImode ZERO_EXTRACT. */
16188 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16189 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16190 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16191 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16195 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16196 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16200 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16201 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16204 /* Extract remainder from AH. */
16205 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16206 if (REG_P (operands[1]))
16207 insn = emit_move_insn (operands[1], tmp1);
16210 /* Need a new scratch register since the old one has result
16212 scratch = gen_reg_rtx (mode);
16213 emit_move_insn (scratch, tmp1);
16214 insn = emit_move_insn (operands[1], scratch);
16216 set_unique_reg_note (insn, REG_EQUAL, mod);
16218 /* Zero extend quotient from AL. */
16219 tmp1 = gen_lowpart (QImode, tmp0);
16220 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16221 set_unique_reg_note (insn, REG_EQUAL, div);
16223 emit_label (end_label);
16226 #define LEA_SEARCH_THRESHOLD 12
16228 /* Search backward for non-agu definition of register number REGNO1
16229 or register number REGNO2 in INSN's basic block until
16230 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16231 2. Reach BB boundary, or
16232 3. Reach agu definition.
16233 Returns the distance between the non-agu definition point and INSN.
16234 If no definition point, returns -1. */
16237 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16240 basic_block bb = BLOCK_FOR_INSN (insn);
16243 enum attr_type insn_type;
16245 if (insn != BB_HEAD (bb))
16247 rtx prev = PREV_INSN (insn);
16248 while (prev && distance < LEA_SEARCH_THRESHOLD)
16250 if (NONDEBUG_INSN_P (prev))
16253 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16254 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16255 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16256 && (regno1 == DF_REF_REGNO (*def_rec)
16257 || regno2 == DF_REF_REGNO (*def_rec)))
16259 insn_type = get_attr_type (prev);
16260 if (insn_type != TYPE_LEA)
16264 if (prev == BB_HEAD (bb))
16266 prev = PREV_INSN (prev);
16270 if (distance < LEA_SEARCH_THRESHOLD)
16274 bool simple_loop = false;
16276 FOR_EACH_EDGE (e, ei, bb->preds)
16279 simple_loop = true;
16285 rtx prev = BB_END (bb);
16288 && distance < LEA_SEARCH_THRESHOLD)
16290 if (NONDEBUG_INSN_P (prev))
16293 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16294 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16295 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16296 && (regno1 == DF_REF_REGNO (*def_rec)
16297 || regno2 == DF_REF_REGNO (*def_rec)))
16299 insn_type = get_attr_type (prev);
16300 if (insn_type != TYPE_LEA)
16304 prev = PREV_INSN (prev);
16312 /* get_attr_type may modify recog data. We want to make sure
16313 that recog data is valid for instruction INSN, on which
16314 distance_non_agu_define is called. INSN is unchanged here. */
16315 extract_insn_cached (insn);
16319 /* Return the distance between INSN and the next insn that uses
16320 register number REGNO0 in memory address. Return -1 if no such
16321 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16324 distance_agu_use (unsigned int regno0, rtx insn)
16326 basic_block bb = BLOCK_FOR_INSN (insn);
16331 if (insn != BB_END (bb))
16333 rtx next = NEXT_INSN (insn);
16334 while (next && distance < LEA_SEARCH_THRESHOLD)
16336 if (NONDEBUG_INSN_P (next))
16340 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16341 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16342 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16343 && regno0 == DF_REF_REGNO (*use_rec))
16345 /* Return DISTANCE if OP0 is used in memory
16346 address in NEXT. */
16350 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16351 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16352 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16353 && regno0 == DF_REF_REGNO (*def_rec))
16355 /* Return -1 if OP0 is set in NEXT. */
16359 if (next == BB_END (bb))
16361 next = NEXT_INSN (next);
16365 if (distance < LEA_SEARCH_THRESHOLD)
16369 bool simple_loop = false;
16371 FOR_EACH_EDGE (e, ei, bb->succs)
16374 simple_loop = true;
16380 rtx next = BB_HEAD (bb);
16383 && distance < LEA_SEARCH_THRESHOLD)
16385 if (NONDEBUG_INSN_P (next))
16389 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16390 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16391 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16392 && regno0 == DF_REF_REGNO (*use_rec))
16394 /* Return DISTANCE if OP0 is used in memory
16395 address in NEXT. */
16399 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16400 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16401 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16402 && regno0 == DF_REF_REGNO (*def_rec))
16404 /* Return -1 if OP0 is set in NEXT. */
16409 next = NEXT_INSN (next);
16417 /* Define this macro to tune LEA priority vs ADD, it take effect when
16418 there is a dilemma of choicing LEA or ADD
16419 Negative value: ADD is more preferred than LEA
16421 Positive value: LEA is more preferred than ADD*/
16422 #define IX86_LEA_PRIORITY 2
16424 /* Return true if it is ok to optimize an ADD operation to LEA
16425 operation to avoid flag register consumation. For most processors,
16426 ADD is faster than LEA. For the processors like ATOM, if the
16427 destination register of LEA holds an actual address which will be
16428 used soon, LEA is better and otherwise ADD is better. */
16431 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16433 unsigned int regno0 = true_regnum (operands[0]);
16434 unsigned int regno1 = true_regnum (operands[1]);
16435 unsigned int regno2 = true_regnum (operands[2]);
16437 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16438 if (regno0 != regno1 && regno0 != regno2)
16441 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16445 int dist_define, dist_use;
16447 /* Return false if REGNO0 isn't used in memory address. */
16448 dist_use = distance_agu_use (regno0, insn);
16452 dist_define = distance_non_agu_define (regno1, regno2, insn);
16453 if (dist_define <= 0)
16456 /* If this insn has both backward non-agu dependence and forward
16457 agu dependence, the one with short distance take effect. */
16458 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16465 /* Return true if destination reg of SET_BODY is shift count of
16469 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16475 /* Retrieve destination of SET_BODY. */
16476 switch (GET_CODE (set_body))
16479 set_dest = SET_DEST (set_body);
16480 if (!set_dest || !REG_P (set_dest))
16484 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16485 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16493 /* Retrieve shift count of USE_BODY. */
16494 switch (GET_CODE (use_body))
16497 shift_rtx = XEXP (use_body, 1);
16500 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16501 if (ix86_dep_by_shift_count_body (set_body,
16502 XVECEXP (use_body, 0, i)))
16510 && (GET_CODE (shift_rtx) == ASHIFT
16511 || GET_CODE (shift_rtx) == LSHIFTRT
16512 || GET_CODE (shift_rtx) == ASHIFTRT
16513 || GET_CODE (shift_rtx) == ROTATE
16514 || GET_CODE (shift_rtx) == ROTATERT))
16516 rtx shift_count = XEXP (shift_rtx, 1);
16518 /* Return true if shift count is dest of SET_BODY. */
16519 if (REG_P (shift_count)
16520 && true_regnum (set_dest) == true_regnum (shift_count))
16527 /* Return true if destination reg of SET_INSN is shift count of
16531 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16533 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16534 PATTERN (use_insn));
16537 /* Return TRUE or FALSE depending on whether the unary operator meets the
16538 appropriate constraints. */
16541 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16542 enum machine_mode mode ATTRIBUTE_UNUSED,
16543 rtx operands[2] ATTRIBUTE_UNUSED)
16545 /* If one of operands is memory, source and destination must match. */
16546 if ((MEM_P (operands[0])
16547 || MEM_P (operands[1]))
16548 && ! rtx_equal_p (operands[0], operands[1]))
16553 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16554 are ok, keeping in mind the possible movddup alternative. */
16557 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16559 if (MEM_P (operands[0]))
16560 return rtx_equal_p (operands[0], operands[1 + high]);
16561 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16562 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16566 /* Post-reload splitter for converting an SF or DFmode value in an
16567 SSE register into an unsigned SImode. */
16570 ix86_split_convert_uns_si_sse (rtx operands[])
16572 enum machine_mode vecmode;
16573 rtx value, large, zero_or_two31, input, two31, x;
16575 large = operands[1];
16576 zero_or_two31 = operands[2];
16577 input = operands[3];
16578 two31 = operands[4];
16579 vecmode = GET_MODE (large);
16580 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16582 /* Load up the value into the low element. We must ensure that the other
16583 elements are valid floats -- zero is the easiest such value. */
16586 if (vecmode == V4SFmode)
16587 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16589 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16593 input = gen_rtx_REG (vecmode, REGNO (input));
16594 emit_move_insn (value, CONST0_RTX (vecmode));
16595 if (vecmode == V4SFmode)
16596 emit_insn (gen_sse_movss (value, value, input));
16598 emit_insn (gen_sse2_movsd (value, value, input));
16601 emit_move_insn (large, two31);
16602 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16604 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16605 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16607 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16608 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16610 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16611 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16613 large = gen_rtx_REG (V4SImode, REGNO (large));
16614 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16616 x = gen_rtx_REG (V4SImode, REGNO (value));
16617 if (vecmode == V4SFmode)
16618 emit_insn (gen_sse2_cvttps2dq (x, value));
16620 emit_insn (gen_sse2_cvttpd2dq (x, value));
16623 emit_insn (gen_xorv4si3 (value, value, large));
16626 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16627 Expects the 64-bit DImode to be supplied in a pair of integral
16628 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16629 -mfpmath=sse, !optimize_size only. */
16632 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16634 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16635 rtx int_xmm, fp_xmm;
16636 rtx biases, exponents;
16639 int_xmm = gen_reg_rtx (V4SImode);
16640 if (TARGET_INTER_UNIT_MOVES)
16641 emit_insn (gen_movdi_to_sse (int_xmm, input));
16642 else if (TARGET_SSE_SPLIT_REGS)
16644 emit_clobber (int_xmm);
16645 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16649 x = gen_reg_rtx (V2DImode);
16650 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16651 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16654 x = gen_rtx_CONST_VECTOR (V4SImode,
16655 gen_rtvec (4, GEN_INT (0x43300000UL),
16656 GEN_INT (0x45300000UL),
16657 const0_rtx, const0_rtx));
16658 exponents = validize_mem (force_const_mem (V4SImode, x));
16660 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16661 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16663 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16664 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16665 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16666 (0x1.0p84 + double(fp_value_hi_xmm)).
16667 Note these exponents differ by 32. */
16669 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16671 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16672 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16673 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16674 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16675 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16676 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16677 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16678 biases = validize_mem (force_const_mem (V2DFmode, biases));
16679 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16681 /* Add the upper and lower DFmode values together. */
16683 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16686 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16687 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16688 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16691 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16694 /* Not used, but eases macroization of patterns. */
16696 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16697 rtx input ATTRIBUTE_UNUSED)
16699 gcc_unreachable ();
16702 /* Convert an unsigned SImode value into a DFmode. Only currently used
16703 for SSE, but applicable anywhere. */
16706 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16708 REAL_VALUE_TYPE TWO31r;
16711 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16712 NULL, 1, OPTAB_DIRECT);
16714 fp = gen_reg_rtx (DFmode);
16715 emit_insn (gen_floatsidf2 (fp, x));
16717 real_ldexp (&TWO31r, &dconst1, 31);
16718 x = const_double_from_real_value (TWO31r, DFmode);
16720 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16722 emit_move_insn (target, x);
16725 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16726 32-bit mode; otherwise we have a direct convert instruction. */
16729 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16731 REAL_VALUE_TYPE TWO32r;
16732 rtx fp_lo, fp_hi, x;
16734 fp_lo = gen_reg_rtx (DFmode);
16735 fp_hi = gen_reg_rtx (DFmode);
16737 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16739 real_ldexp (&TWO32r, &dconst1, 32);
16740 x = const_double_from_real_value (TWO32r, DFmode);
16741 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16743 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16745 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16748 emit_move_insn (target, x);
16751 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16752 For x86_32, -mfpmath=sse, !optimize_size only. */
16754 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16756 REAL_VALUE_TYPE ONE16r;
16757 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16759 real_ldexp (&ONE16r, &dconst1, 16);
16760 x = const_double_from_real_value (ONE16r, SFmode);
16761 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16762 NULL, 0, OPTAB_DIRECT);
16763 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16764 NULL, 0, OPTAB_DIRECT);
16765 fp_hi = gen_reg_rtx (SFmode);
16766 fp_lo = gen_reg_rtx (SFmode);
16767 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16768 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16769 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16771 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16773 if (!rtx_equal_p (target, fp_hi))
16774 emit_move_insn (target, fp_hi);
16777 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16778 then replicate the value for all elements of the vector
16782 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16789 v = gen_rtvec (4, value, value, value, value);
16790 return gen_rtx_CONST_VECTOR (V4SImode, v);
16794 v = gen_rtvec (2, value, value);
16795 return gen_rtx_CONST_VECTOR (V2DImode, v);
16799 v = gen_rtvec (8, value, value, value, value,
16800 value, value, value, value);
16802 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16803 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16804 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16805 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16806 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16810 v = gen_rtvec (4, value, value, value, value);
16812 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16813 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16814 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16818 v = gen_rtvec (4, value, value, value, value);
16820 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16821 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16822 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16826 v = gen_rtvec (2, value, value);
16828 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16829 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16832 gcc_unreachable ();
16836 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16837 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16838 for an SSE register. If VECT is true, then replicate the mask for
16839 all elements of the vector register. If INVERT is true, then create
16840 a mask excluding the sign bit. */
16843 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16845 enum machine_mode vec_mode, imode;
16846 HOST_WIDE_INT hi, lo;
16851 /* Find the sign bit, sign extended to 2*HWI. */
16858 mode = GET_MODE_INNER (mode);
16860 lo = 0x80000000, hi = lo < 0;
16867 mode = GET_MODE_INNER (mode);
16869 if (HOST_BITS_PER_WIDE_INT >= 64)
16870 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16872 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16877 vec_mode = VOIDmode;
16878 if (HOST_BITS_PER_WIDE_INT >= 64)
16881 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16888 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16892 lo = ~lo, hi = ~hi;
16898 mask = immed_double_const (lo, hi, imode);
16900 vec = gen_rtvec (2, v, mask);
16901 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
16902 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
16909 gcc_unreachable ();
16913 lo = ~lo, hi = ~hi;
16915 /* Force this value into the low part of a fp vector constant. */
16916 mask = immed_double_const (lo, hi, imode);
16917 mask = gen_lowpart (mode, mask);
16919 if (vec_mode == VOIDmode)
16920 return force_reg (mode, mask);
16922 v = ix86_build_const_vector (vec_mode, vect, mask);
16923 return force_reg (vec_mode, v);
16926 /* Generate code for floating point ABS or NEG. */
16929 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
16932 rtx mask, set, dst, src;
16933 bool use_sse = false;
16934 bool vector_mode = VECTOR_MODE_P (mode);
16935 enum machine_mode vmode = mode;
16939 else if (mode == TFmode)
16941 else if (TARGET_SSE_MATH)
16943 use_sse = SSE_FLOAT_MODE_P (mode);
16944 if (mode == SFmode)
16946 else if (mode == DFmode)
16950 /* NEG and ABS performed with SSE use bitwise mask operations.
16951 Create the appropriate mask now. */
16953 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
16960 set = gen_rtx_fmt_e (code, mode, src);
16961 set = gen_rtx_SET (VOIDmode, dst, set);
16968 use = gen_rtx_USE (VOIDmode, mask);
16970 par = gen_rtvec (2, set, use);
16973 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16974 par = gen_rtvec (3, set, use, clob);
16976 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16982 /* Expand a copysign operation. Special case operand 0 being a constant. */
16985 ix86_expand_copysign (rtx operands[])
16987 enum machine_mode mode, vmode;
16988 rtx dest, op0, op1, mask, nmask;
16990 dest = operands[0];
16994 mode = GET_MODE (dest);
16996 if (mode == SFmode)
16998 else if (mode == DFmode)
17003 if (GET_CODE (op0) == CONST_DOUBLE)
17005 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
17007 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
17008 op0 = simplify_unary_operation (ABS, mode, op0, mode);
17010 if (mode == SFmode || mode == DFmode)
17012 if (op0 == CONST0_RTX (mode))
17013 op0 = CONST0_RTX (vmode);
17016 rtx v = ix86_build_const_vector (vmode, false, op0);
17018 op0 = force_reg (vmode, v);
17021 else if (op0 != CONST0_RTX (mode))
17022 op0 = force_reg (mode, op0);
17024 mask = ix86_build_signbit_mask (vmode, 0, 0);
17026 if (mode == SFmode)
17027 copysign_insn = gen_copysignsf3_const;
17028 else if (mode == DFmode)
17029 copysign_insn = gen_copysigndf3_const;
17031 copysign_insn = gen_copysigntf3_const;
17033 emit_insn (copysign_insn (dest, op0, op1, mask));
17037 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17039 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17040 mask = ix86_build_signbit_mask (vmode, 0, 0);
17042 if (mode == SFmode)
17043 copysign_insn = gen_copysignsf3_var;
17044 else if (mode == DFmode)
17045 copysign_insn = gen_copysigndf3_var;
17047 copysign_insn = gen_copysigntf3_var;
17049 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17053 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17054 be a constant, and so has already been expanded into a vector constant. */
17057 ix86_split_copysign_const (rtx operands[])
17059 enum machine_mode mode, vmode;
17060 rtx dest, op0, mask, x;
17062 dest = operands[0];
17064 mask = operands[3];
17066 mode = GET_MODE (dest);
17067 vmode = GET_MODE (mask);
17069 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17070 x = gen_rtx_AND (vmode, dest, mask);
17071 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17073 if (op0 != CONST0_RTX (vmode))
17075 x = gen_rtx_IOR (vmode, dest, op0);
17076 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17080 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17081 so we have to do two masks. */
17084 ix86_split_copysign_var (rtx operands[])
17086 enum machine_mode mode, vmode;
17087 rtx dest, scratch, op0, op1, mask, nmask, x;
17089 dest = operands[0];
17090 scratch = operands[1];
17093 nmask = operands[4];
17094 mask = operands[5];
17096 mode = GET_MODE (dest);
17097 vmode = GET_MODE (mask);
17099 if (rtx_equal_p (op0, op1))
17101 /* Shouldn't happen often (it's useless, obviously), but when it does
17102 we'd generate incorrect code if we continue below. */
17103 emit_move_insn (dest, op0);
17107 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17109 gcc_assert (REGNO (op1) == REGNO (scratch));
17111 x = gen_rtx_AND (vmode, scratch, mask);
17112 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17115 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17116 x = gen_rtx_NOT (vmode, dest);
17117 x = gen_rtx_AND (vmode, x, op0);
17118 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17122 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17124 x = gen_rtx_AND (vmode, scratch, mask);
17126 else /* alternative 2,4 */
17128 gcc_assert (REGNO (mask) == REGNO (scratch));
17129 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17130 x = gen_rtx_AND (vmode, scratch, op1);
17132 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17134 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17136 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17137 x = gen_rtx_AND (vmode, dest, nmask);
17139 else /* alternative 3,4 */
17141 gcc_assert (REGNO (nmask) == REGNO (dest));
17143 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17144 x = gen_rtx_AND (vmode, dest, op0);
17146 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17149 x = gen_rtx_IOR (vmode, dest, scratch);
17150 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17153 /* Return TRUE or FALSE depending on whether the first SET in INSN
17154 has source and destination with matching CC modes, and that the
17155 CC mode is at least as constrained as REQ_MODE. */
17158 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17161 enum machine_mode set_mode;
17163 set = PATTERN (insn);
17164 if (GET_CODE (set) == PARALLEL)
17165 set = XVECEXP (set, 0, 0);
17166 gcc_assert (GET_CODE (set) == SET);
17167 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17169 set_mode = GET_MODE (SET_DEST (set));
17173 if (req_mode != CCNOmode
17174 && (req_mode != CCmode
17175 || XEXP (SET_SRC (set), 1) != const0_rtx))
17179 if (req_mode == CCGCmode)
17183 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17187 if (req_mode == CCZmode)
17198 gcc_unreachable ();
17201 return GET_MODE (SET_SRC (set)) == set_mode;
17204 /* Generate insn patterns to do an integer compare of OPERANDS. */
17207 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17209 enum machine_mode cmpmode;
17212 cmpmode = SELECT_CC_MODE (code, op0, op1);
17213 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17215 /* This is very simple, but making the interface the same as in the
17216 FP case makes the rest of the code easier. */
17217 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17218 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17220 /* Return the test that should be put into the flags user, i.e.
17221 the bcc, scc, or cmov instruction. */
17222 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17225 /* Figure out whether to use ordered or unordered fp comparisons.
17226 Return the appropriate mode to use. */
17229 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17231 /* ??? In order to make all comparisons reversible, we do all comparisons
17232 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17233 all forms trapping and nontrapping comparisons, we can make inequality
17234 comparisons trapping again, since it results in better code when using
17235 FCOM based compares. */
17236 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17240 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17242 enum machine_mode mode = GET_MODE (op0);
17244 if (SCALAR_FLOAT_MODE_P (mode))
17246 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17247 return ix86_fp_compare_mode (code);
17252 /* Only zero flag is needed. */
17253 case EQ: /* ZF=0 */
17254 case NE: /* ZF!=0 */
17256 /* Codes needing carry flag. */
17257 case GEU: /* CF=0 */
17258 case LTU: /* CF=1 */
17259 /* Detect overflow checks. They need just the carry flag. */
17260 if (GET_CODE (op0) == PLUS
17261 && rtx_equal_p (op1, XEXP (op0, 0)))
17265 case GTU: /* CF=0 & ZF=0 */
17266 case LEU: /* CF=1 | ZF=1 */
17267 /* Detect overflow checks. They need just the carry flag. */
17268 if (GET_CODE (op0) == MINUS
17269 && rtx_equal_p (op1, XEXP (op0, 0)))
17273 /* Codes possibly doable only with sign flag when
17274 comparing against zero. */
17275 case GE: /* SF=OF or SF=0 */
17276 case LT: /* SF<>OF or SF=1 */
17277 if (op1 == const0_rtx)
17280 /* For other cases Carry flag is not required. */
17282 /* Codes doable only with sign flag when comparing
17283 against zero, but we miss jump instruction for it
17284 so we need to use relational tests against overflow
17285 that thus needs to be zero. */
17286 case GT: /* ZF=0 & SF=OF */
17287 case LE: /* ZF=1 | SF<>OF */
17288 if (op1 == const0_rtx)
17292 /* strcmp pattern do (use flags) and combine may ask us for proper
17297 gcc_unreachable ();
17301 /* Return the fixed registers used for condition codes. */
17304 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17311 /* If two condition code modes are compatible, return a condition code
17312 mode which is compatible with both. Otherwise, return
17315 static enum machine_mode
17316 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17321 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17324 if ((m1 == CCGCmode && m2 == CCGOCmode)
17325 || (m1 == CCGOCmode && m2 == CCGCmode))
17331 gcc_unreachable ();
17361 /* These are only compatible with themselves, which we already
17368 /* Return a comparison we can do and that it is equivalent to
17369 swap_condition (code) apart possibly from orderedness.
17370 But, never change orderedness if TARGET_IEEE_FP, returning
17371 UNKNOWN in that case if necessary. */
17373 static enum rtx_code
17374 ix86_fp_swap_condition (enum rtx_code code)
17378 case GT: /* GTU - CF=0 & ZF=0 */
17379 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17380 case GE: /* GEU - CF=0 */
17381 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17382 case UNLT: /* LTU - CF=1 */
17383 return TARGET_IEEE_FP ? UNKNOWN : GT;
17384 case UNLE: /* LEU - CF=1 | ZF=1 */
17385 return TARGET_IEEE_FP ? UNKNOWN : GE;
17387 return swap_condition (code);
17391 /* Return cost of comparison CODE using the best strategy for performance.
17392 All following functions do use number of instructions as a cost metrics.
17393 In future this should be tweaked to compute bytes for optimize_size and
17394 take into account performance of various instructions on various CPUs. */
17397 ix86_fp_comparison_cost (enum rtx_code code)
17401 /* The cost of code using bit-twiddling on %ah. */
17418 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17422 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17425 gcc_unreachable ();
17428 switch (ix86_fp_comparison_strategy (code))
17430 case IX86_FPCMP_COMI:
17431 return arith_cost > 4 ? 3 : 2;
17432 case IX86_FPCMP_SAHF:
17433 return arith_cost > 4 ? 4 : 3;
17439 /* Return strategy to use for floating-point. We assume that fcomi is always
17440 preferrable where available, since that is also true when looking at size
17441 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17443 enum ix86_fpcmp_strategy
17444 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17446 /* Do fcomi/sahf based test when profitable. */
17449 return IX86_FPCMP_COMI;
17451 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17452 return IX86_FPCMP_SAHF;
17454 return IX86_FPCMP_ARITH;
17457 /* Swap, force into registers, or otherwise massage the two operands
17458 to a fp comparison. The operands are updated in place; the new
17459 comparison code is returned. */
17461 static enum rtx_code
17462 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17464 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17465 rtx op0 = *pop0, op1 = *pop1;
17466 enum machine_mode op_mode = GET_MODE (op0);
17467 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17469 /* All of the unordered compare instructions only work on registers.
17470 The same is true of the fcomi compare instructions. The XFmode
17471 compare instructions require registers except when comparing
17472 against zero or when converting operand 1 from fixed point to
17476 && (fpcmp_mode == CCFPUmode
17477 || (op_mode == XFmode
17478 && ! (standard_80387_constant_p (op0) == 1
17479 || standard_80387_constant_p (op1) == 1)
17480 && GET_CODE (op1) != FLOAT)
17481 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17483 op0 = force_reg (op_mode, op0);
17484 op1 = force_reg (op_mode, op1);
17488 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17489 things around if they appear profitable, otherwise force op0
17490 into a register. */
17492 if (standard_80387_constant_p (op0) == 0
17494 && ! (standard_80387_constant_p (op1) == 0
17497 enum rtx_code new_code = ix86_fp_swap_condition (code);
17498 if (new_code != UNKNOWN)
17501 tmp = op0, op0 = op1, op1 = tmp;
17507 op0 = force_reg (op_mode, op0);
17509 if (CONSTANT_P (op1))
17511 int tmp = standard_80387_constant_p (op1);
17513 op1 = validize_mem (force_const_mem (op_mode, op1));
17517 op1 = force_reg (op_mode, op1);
17520 op1 = force_reg (op_mode, op1);
17524 /* Try to rearrange the comparison to make it cheaper. */
17525 if (ix86_fp_comparison_cost (code)
17526 > ix86_fp_comparison_cost (swap_condition (code))
17527 && (REG_P (op1) || can_create_pseudo_p ()))
17530 tmp = op0, op0 = op1, op1 = tmp;
17531 code = swap_condition (code);
17533 op0 = force_reg (op_mode, op0);
17541 /* Convert comparison codes we use to represent FP comparison to integer
17542 code that will result in proper branch. Return UNKNOWN if no such code
17546 ix86_fp_compare_code_to_integer (enum rtx_code code)
17575 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17578 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17580 enum machine_mode fpcmp_mode, intcmp_mode;
17583 fpcmp_mode = ix86_fp_compare_mode (code);
17584 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17586 /* Do fcomi/sahf based test when profitable. */
17587 switch (ix86_fp_comparison_strategy (code))
17589 case IX86_FPCMP_COMI:
17590 intcmp_mode = fpcmp_mode;
17591 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17592 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17597 case IX86_FPCMP_SAHF:
17598 intcmp_mode = fpcmp_mode;
17599 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17600 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17604 scratch = gen_reg_rtx (HImode);
17605 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17606 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17609 case IX86_FPCMP_ARITH:
17610 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17611 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17612 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17614 scratch = gen_reg_rtx (HImode);
17615 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17617 /* In the unordered case, we have to check C2 for NaN's, which
17618 doesn't happen to work out to anything nice combination-wise.
17619 So do some bit twiddling on the value we've got in AH to come
17620 up with an appropriate set of condition codes. */
17622 intcmp_mode = CCNOmode;
17627 if (code == GT || !TARGET_IEEE_FP)
17629 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17634 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17635 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17636 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17637 intcmp_mode = CCmode;
17643 if (code == LT && TARGET_IEEE_FP)
17645 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17646 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17647 intcmp_mode = CCmode;
17652 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17658 if (code == GE || !TARGET_IEEE_FP)
17660 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17665 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17666 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17672 if (code == LE && TARGET_IEEE_FP)
17674 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17675 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17676 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17677 intcmp_mode = CCmode;
17682 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17688 if (code == EQ && TARGET_IEEE_FP)
17690 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17691 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17692 intcmp_mode = CCmode;
17697 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17703 if (code == NE && TARGET_IEEE_FP)
17705 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17706 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17712 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17718 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17722 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17727 gcc_unreachable ();
17735 /* Return the test that should be put into the flags user, i.e.
17736 the bcc, scc, or cmov instruction. */
17737 return gen_rtx_fmt_ee (code, VOIDmode,
17738 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17743 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17747 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17748 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17750 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17752 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17753 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17756 ret = ix86_expand_int_compare (code, op0, op1);
17762 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17764 enum machine_mode mode = GET_MODE (op0);
17776 tmp = ix86_expand_compare (code, op0, op1);
17777 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17778 gen_rtx_LABEL_REF (VOIDmode, label),
17780 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17787 /* Expand DImode branch into multiple compare+branch. */
17789 rtx lo[2], hi[2], label2;
17790 enum rtx_code code1, code2, code3;
17791 enum machine_mode submode;
17793 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17795 tmp = op0, op0 = op1, op1 = tmp;
17796 code = swap_condition (code);
17799 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17800 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17802 submode = mode == DImode ? SImode : DImode;
17804 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17805 avoid two branches. This costs one extra insn, so disable when
17806 optimizing for size. */
17808 if ((code == EQ || code == NE)
17809 && (!optimize_insn_for_size_p ()
17810 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17815 if (hi[1] != const0_rtx)
17816 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17817 NULL_RTX, 0, OPTAB_WIDEN);
17820 if (lo[1] != const0_rtx)
17821 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17822 NULL_RTX, 0, OPTAB_WIDEN);
17824 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17825 NULL_RTX, 0, OPTAB_WIDEN);
17827 ix86_expand_branch (code, tmp, const0_rtx, label);
17831 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17832 op1 is a constant and the low word is zero, then we can just
17833 examine the high word. Similarly for low word -1 and
17834 less-or-equal-than or greater-than. */
17836 if (CONST_INT_P (hi[1]))
17839 case LT: case LTU: case GE: case GEU:
17840 if (lo[1] == const0_rtx)
17842 ix86_expand_branch (code, hi[0], hi[1], label);
17846 case LE: case LEU: case GT: case GTU:
17847 if (lo[1] == constm1_rtx)
17849 ix86_expand_branch (code, hi[0], hi[1], label);
17857 /* Otherwise, we need two or three jumps. */
17859 label2 = gen_label_rtx ();
17862 code2 = swap_condition (code);
17863 code3 = unsigned_condition (code);
17867 case LT: case GT: case LTU: case GTU:
17870 case LE: code1 = LT; code2 = GT; break;
17871 case GE: code1 = GT; code2 = LT; break;
17872 case LEU: code1 = LTU; code2 = GTU; break;
17873 case GEU: code1 = GTU; code2 = LTU; break;
17875 case EQ: code1 = UNKNOWN; code2 = NE; break;
17876 case NE: code2 = UNKNOWN; break;
17879 gcc_unreachable ();
17884 * if (hi(a) < hi(b)) goto true;
17885 * if (hi(a) > hi(b)) goto false;
17886 * if (lo(a) < lo(b)) goto true;
17890 if (code1 != UNKNOWN)
17891 ix86_expand_branch (code1, hi[0], hi[1], label);
17892 if (code2 != UNKNOWN)
17893 ix86_expand_branch (code2, hi[0], hi[1], label2);
17895 ix86_expand_branch (code3, lo[0], lo[1], label);
17897 if (code2 != UNKNOWN)
17898 emit_label (label2);
17903 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
17908 /* Split branch based on floating point condition. */
17910 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
17911 rtx target1, rtx target2, rtx tmp, rtx pushed)
17916 if (target2 != pc_rtx)
17919 code = reverse_condition_maybe_unordered (code);
17924 condition = ix86_expand_fp_compare (code, op1, op2,
17927 /* Remove pushed operand from stack. */
17929 ix86_free_from_memory (GET_MODE (pushed));
17931 i = emit_jump_insn (gen_rtx_SET
17933 gen_rtx_IF_THEN_ELSE (VOIDmode,
17934 condition, target1, target2)));
17935 if (split_branch_probability >= 0)
17936 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
17940 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
17944 gcc_assert (GET_MODE (dest) == QImode);
17946 ret = ix86_expand_compare (code, op0, op1);
17947 PUT_MODE (ret, QImode);
17948 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
17951 /* Expand comparison setting or clearing carry flag. Return true when
17952 successful and set pop for the operation. */
17954 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
17956 enum machine_mode mode =
17957 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
17959 /* Do not handle double-mode compares that go through special path. */
17960 if (mode == (TARGET_64BIT ? TImode : DImode))
17963 if (SCALAR_FLOAT_MODE_P (mode))
17965 rtx compare_op, compare_seq;
17967 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17969 /* Shortcut: following common codes never translate
17970 into carry flag compares. */
17971 if (code == EQ || code == NE || code == UNEQ || code == LTGT
17972 || code == ORDERED || code == UNORDERED)
17975 /* These comparisons require zero flag; swap operands so they won't. */
17976 if ((code == GT || code == UNLE || code == LE || code == UNGT)
17977 && !TARGET_IEEE_FP)
17982 code = swap_condition (code);
17985 /* Try to expand the comparison and verify that we end up with
17986 carry flag based comparison. This fails to be true only when
17987 we decide to expand comparison using arithmetic that is not
17988 too common scenario. */
17990 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17991 compare_seq = get_insns ();
17994 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
17995 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
17996 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
17998 code = GET_CODE (compare_op);
18000 if (code != LTU && code != GEU)
18003 emit_insn (compare_seq);
18008 if (!INTEGRAL_MODE_P (mode))
18017 /* Convert a==0 into (unsigned)a<1. */
18020 if (op1 != const0_rtx)
18023 code = (code == EQ ? LTU : GEU);
18026 /* Convert a>b into b<a or a>=b-1. */
18029 if (CONST_INT_P (op1))
18031 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18032 /* Bail out on overflow. We still can swap operands but that
18033 would force loading of the constant into register. */
18034 if (op1 == const0_rtx
18035 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18037 code = (code == GTU ? GEU : LTU);
18044 code = (code == GTU ? LTU : GEU);
18048 /* Convert a>=0 into (unsigned)a<0x80000000. */
18051 if (mode == DImode || op1 != const0_rtx)
18053 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18054 code = (code == LT ? GEU : LTU);
18058 if (mode == DImode || op1 != constm1_rtx)
18060 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18061 code = (code == LE ? GEU : LTU);
18067 /* Swapping operands may cause constant to appear as first operand. */
18068 if (!nonimmediate_operand (op0, VOIDmode))
18070 if (!can_create_pseudo_p ())
18072 op0 = force_reg (mode, op0);
18074 *pop = ix86_expand_compare (code, op0, op1);
18075 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18080 ix86_expand_int_movcc (rtx operands[])
18082 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18083 rtx compare_seq, compare_op;
18084 enum machine_mode mode = GET_MODE (operands[0]);
18085 bool sign_bit_compare_p = false;
18086 rtx op0 = XEXP (operands[1], 0);
18087 rtx op1 = XEXP (operands[1], 1);
18090 compare_op = ix86_expand_compare (code, op0, op1);
18091 compare_seq = get_insns ();
18094 compare_code = GET_CODE (compare_op);
18096 if ((op1 == const0_rtx && (code == GE || code == LT))
18097 || (op1 == constm1_rtx && (code == GT || code == LE)))
18098 sign_bit_compare_p = true;
18100 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18101 HImode insns, we'd be swallowed in word prefix ops. */
18103 if ((mode != HImode || TARGET_FAST_PREFIX)
18104 && (mode != (TARGET_64BIT ? TImode : DImode))
18105 && CONST_INT_P (operands[2])
18106 && CONST_INT_P (operands[3]))
18108 rtx out = operands[0];
18109 HOST_WIDE_INT ct = INTVAL (operands[2]);
18110 HOST_WIDE_INT cf = INTVAL (operands[3]);
18111 HOST_WIDE_INT diff;
18114 /* Sign bit compares are better done using shifts than we do by using
18116 if (sign_bit_compare_p
18117 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18119 /* Detect overlap between destination and compare sources. */
18122 if (!sign_bit_compare_p)
18125 bool fpcmp = false;
18127 compare_code = GET_CODE (compare_op);
18129 flags = XEXP (compare_op, 0);
18131 if (GET_MODE (flags) == CCFPmode
18132 || GET_MODE (flags) == CCFPUmode)
18136 = ix86_fp_compare_code_to_integer (compare_code);
18139 /* To simplify rest of code, restrict to the GEU case. */
18140 if (compare_code == LTU)
18142 HOST_WIDE_INT tmp = ct;
18145 compare_code = reverse_condition (compare_code);
18146 code = reverse_condition (code);
18151 PUT_CODE (compare_op,
18152 reverse_condition_maybe_unordered
18153 (GET_CODE (compare_op)));
18155 PUT_CODE (compare_op,
18156 reverse_condition (GET_CODE (compare_op)));
18160 if (reg_overlap_mentioned_p (out, op0)
18161 || reg_overlap_mentioned_p (out, op1))
18162 tmp = gen_reg_rtx (mode);
18164 if (mode == DImode)
18165 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18167 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18168 flags, compare_op));
18172 if (code == GT || code == GE)
18173 code = reverse_condition (code);
18176 HOST_WIDE_INT tmp = ct;
18181 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18194 tmp = expand_simple_binop (mode, PLUS,
18196 copy_rtx (tmp), 1, OPTAB_DIRECT);
18207 tmp = expand_simple_binop (mode, IOR,
18209 copy_rtx (tmp), 1, OPTAB_DIRECT);
18211 else if (diff == -1 && ct)
18221 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18223 tmp = expand_simple_binop (mode, PLUS,
18224 copy_rtx (tmp), GEN_INT (cf),
18225 copy_rtx (tmp), 1, OPTAB_DIRECT);
18233 * andl cf - ct, dest
18243 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18246 tmp = expand_simple_binop (mode, AND,
18248 gen_int_mode (cf - ct, mode),
18249 copy_rtx (tmp), 1, OPTAB_DIRECT);
18251 tmp = expand_simple_binop (mode, PLUS,
18252 copy_rtx (tmp), GEN_INT (ct),
18253 copy_rtx (tmp), 1, OPTAB_DIRECT);
18256 if (!rtx_equal_p (tmp, out))
18257 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18264 enum machine_mode cmp_mode = GET_MODE (op0);
18267 tmp = ct, ct = cf, cf = tmp;
18270 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18272 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18274 /* We may be reversing unordered compare to normal compare, that
18275 is not valid in general (we may convert non-trapping condition
18276 to trapping one), however on i386 we currently emit all
18277 comparisons unordered. */
18278 compare_code = reverse_condition_maybe_unordered (compare_code);
18279 code = reverse_condition_maybe_unordered (code);
18283 compare_code = reverse_condition (compare_code);
18284 code = reverse_condition (code);
18288 compare_code = UNKNOWN;
18289 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18290 && CONST_INT_P (op1))
18292 if (op1 == const0_rtx
18293 && (code == LT || code == GE))
18294 compare_code = code;
18295 else if (op1 == constm1_rtx)
18299 else if (code == GT)
18304 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18305 if (compare_code != UNKNOWN
18306 && GET_MODE (op0) == GET_MODE (out)
18307 && (cf == -1 || ct == -1))
18309 /* If lea code below could be used, only optimize
18310 if it results in a 2 insn sequence. */
18312 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18313 || diff == 3 || diff == 5 || diff == 9)
18314 || (compare_code == LT && ct == -1)
18315 || (compare_code == GE && cf == -1))
18318 * notl op1 (if necessary)
18326 code = reverse_condition (code);
18329 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18331 out = expand_simple_binop (mode, IOR,
18333 out, 1, OPTAB_DIRECT);
18334 if (out != operands[0])
18335 emit_move_insn (operands[0], out);
18342 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18343 || diff == 3 || diff == 5 || diff == 9)
18344 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18346 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18352 * lea cf(dest*(ct-cf)),dest
18356 * This also catches the degenerate setcc-only case.
18362 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18365 /* On x86_64 the lea instruction operates on Pmode, so we need
18366 to get arithmetics done in proper mode to match. */
18368 tmp = copy_rtx (out);
18372 out1 = copy_rtx (out);
18373 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18377 tmp = gen_rtx_PLUS (mode, tmp, out1);
18383 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18386 if (!rtx_equal_p (tmp, out))
18389 out = force_operand (tmp, copy_rtx (out));
18391 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18393 if (!rtx_equal_p (out, operands[0]))
18394 emit_move_insn (operands[0], copy_rtx (out));
18400 * General case: Jumpful:
18401 * xorl dest,dest cmpl op1, op2
18402 * cmpl op1, op2 movl ct, dest
18403 * setcc dest jcc 1f
18404 * decl dest movl cf, dest
18405 * andl (cf-ct),dest 1:
18408 * Size 20. Size 14.
18410 * This is reasonably steep, but branch mispredict costs are
18411 * high on modern cpus, so consider failing only if optimizing
18415 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18416 && BRANCH_COST (optimize_insn_for_speed_p (),
18421 enum machine_mode cmp_mode = GET_MODE (op0);
18426 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18428 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18430 /* We may be reversing unordered compare to normal compare,
18431 that is not valid in general (we may convert non-trapping
18432 condition to trapping one), however on i386 we currently
18433 emit all comparisons unordered. */
18434 code = reverse_condition_maybe_unordered (code);
18438 code = reverse_condition (code);
18439 if (compare_code != UNKNOWN)
18440 compare_code = reverse_condition (compare_code);
18444 if (compare_code != UNKNOWN)
18446 /* notl op1 (if needed)
18451 For x < 0 (resp. x <= -1) there will be no notl,
18452 so if possible swap the constants to get rid of the
18454 True/false will be -1/0 while code below (store flag
18455 followed by decrement) is 0/-1, so the constants need
18456 to be exchanged once more. */
18458 if (compare_code == GE || !cf)
18460 code = reverse_condition (code);
18465 HOST_WIDE_INT tmp = cf;
18470 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18474 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18476 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18478 copy_rtx (out), 1, OPTAB_DIRECT);
18481 out = expand_simple_binop (mode, AND, copy_rtx (out),
18482 gen_int_mode (cf - ct, mode),
18483 copy_rtx (out), 1, OPTAB_DIRECT);
18485 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18486 copy_rtx (out), 1, OPTAB_DIRECT);
18487 if (!rtx_equal_p (out, operands[0]))
18488 emit_move_insn (operands[0], copy_rtx (out));
18494 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18496 /* Try a few things more with specific constants and a variable. */
18499 rtx var, orig_out, out, tmp;
18501 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18504 /* If one of the two operands is an interesting constant, load a
18505 constant with the above and mask it in with a logical operation. */
18507 if (CONST_INT_P (operands[2]))
18510 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18511 operands[3] = constm1_rtx, op = and_optab;
18512 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18513 operands[3] = const0_rtx, op = ior_optab;
18517 else if (CONST_INT_P (operands[3]))
18520 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18521 operands[2] = constm1_rtx, op = and_optab;
18522 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18523 operands[2] = const0_rtx, op = ior_optab;
18530 orig_out = operands[0];
18531 tmp = gen_reg_rtx (mode);
18534 /* Recurse to get the constant loaded. */
18535 if (ix86_expand_int_movcc (operands) == 0)
18538 /* Mask in the interesting variable. */
18539 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18541 if (!rtx_equal_p (out, orig_out))
18542 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18548 * For comparison with above,
18558 if (! nonimmediate_operand (operands[2], mode))
18559 operands[2] = force_reg (mode, operands[2]);
18560 if (! nonimmediate_operand (operands[3], mode))
18561 operands[3] = force_reg (mode, operands[3]);
18563 if (! register_operand (operands[2], VOIDmode)
18565 || ! register_operand (operands[3], VOIDmode)))
18566 operands[2] = force_reg (mode, operands[2]);
18569 && ! register_operand (operands[3], VOIDmode))
18570 operands[3] = force_reg (mode, operands[3]);
18572 emit_insn (compare_seq);
18573 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18574 gen_rtx_IF_THEN_ELSE (mode,
18575 compare_op, operands[2],
18580 /* Swap, force into registers, or otherwise massage the two operands
18581 to an sse comparison with a mask result. Thus we differ a bit from
18582 ix86_prepare_fp_compare_args which expects to produce a flags result.
18584 The DEST operand exists to help determine whether to commute commutative
18585 operators. The POP0/POP1 operands are updated in place. The new
18586 comparison code is returned, or UNKNOWN if not implementable. */
18588 static enum rtx_code
18589 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18590 rtx *pop0, rtx *pop1)
18598 /* We have no LTGT as an operator. We could implement it with
18599 NE & ORDERED, but this requires an extra temporary. It's
18600 not clear that it's worth it. */
18607 /* These are supported directly. */
18614 /* For commutative operators, try to canonicalize the destination
18615 operand to be first in the comparison - this helps reload to
18616 avoid extra moves. */
18617 if (!dest || !rtx_equal_p (dest, *pop1))
18625 /* These are not supported directly. Swap the comparison operands
18626 to transform into something that is supported. */
18630 code = swap_condition (code);
18634 gcc_unreachable ();
18640 /* Detect conditional moves that exactly match min/max operational
18641 semantics. Note that this is IEEE safe, as long as we don't
18642 interchange the operands.
18644 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18645 and TRUE if the operation is successful and instructions are emitted. */
18648 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18649 rtx cmp_op1, rtx if_true, rtx if_false)
18651 enum machine_mode mode;
18657 else if (code == UNGE)
18660 if_true = if_false;
18666 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18668 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18673 mode = GET_MODE (dest);
18675 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18676 but MODE may be a vector mode and thus not appropriate. */
18677 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18679 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18682 if_true = force_reg (mode, if_true);
18683 v = gen_rtvec (2, if_true, if_false);
18684 tmp = gen_rtx_UNSPEC (mode, v, u);
18688 code = is_min ? SMIN : SMAX;
18689 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18692 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18696 /* Expand an sse vector comparison. Return the register with the result. */
18699 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18700 rtx op_true, rtx op_false)
18702 enum machine_mode mode = GET_MODE (dest);
18705 cmp_op0 = force_reg (mode, cmp_op0);
18706 if (!nonimmediate_operand (cmp_op1, mode))
18707 cmp_op1 = force_reg (mode, cmp_op1);
18710 || reg_overlap_mentioned_p (dest, op_true)
18711 || reg_overlap_mentioned_p (dest, op_false))
18712 dest = gen_reg_rtx (mode);
18714 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18715 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18720 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18721 operations. This is used for both scalar and vector conditional moves. */
18724 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18726 enum machine_mode mode = GET_MODE (dest);
18729 if (op_false == CONST0_RTX (mode))
18731 op_true = force_reg (mode, op_true);
18732 x = gen_rtx_AND (mode, cmp, op_true);
18733 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18735 else if (op_true == CONST0_RTX (mode))
18737 op_false = force_reg (mode, op_false);
18738 x = gen_rtx_NOT (mode, cmp);
18739 x = gen_rtx_AND (mode, x, op_false);
18740 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18742 else if (TARGET_XOP)
18744 rtx pcmov = gen_rtx_SET (mode, dest,
18745 gen_rtx_IF_THEN_ELSE (mode, cmp,
18752 op_true = force_reg (mode, op_true);
18753 op_false = force_reg (mode, op_false);
18755 t2 = gen_reg_rtx (mode);
18757 t3 = gen_reg_rtx (mode);
18761 x = gen_rtx_AND (mode, op_true, cmp);
18762 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18764 x = gen_rtx_NOT (mode, cmp);
18765 x = gen_rtx_AND (mode, x, op_false);
18766 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18768 x = gen_rtx_IOR (mode, t3, t2);
18769 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18773 /* Expand a floating-point conditional move. Return true if successful. */
18776 ix86_expand_fp_movcc (rtx operands[])
18778 enum machine_mode mode = GET_MODE (operands[0]);
18779 enum rtx_code code = GET_CODE (operands[1]);
18780 rtx tmp, compare_op;
18781 rtx op0 = XEXP (operands[1], 0);
18782 rtx op1 = XEXP (operands[1], 1);
18784 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18786 enum machine_mode cmode;
18788 /* Since we've no cmove for sse registers, don't force bad register
18789 allocation just to gain access to it. Deny movcc when the
18790 comparison mode doesn't match the move mode. */
18791 cmode = GET_MODE (op0);
18792 if (cmode == VOIDmode)
18793 cmode = GET_MODE (op1);
18797 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18798 if (code == UNKNOWN)
18801 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18802 operands[2], operands[3]))
18805 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18806 operands[2], operands[3]);
18807 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18811 /* The floating point conditional move instructions don't directly
18812 support conditions resulting from a signed integer comparison. */
18814 compare_op = ix86_expand_compare (code, op0, op1);
18815 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18817 tmp = gen_reg_rtx (QImode);
18818 ix86_expand_setcc (tmp, code, op0, op1);
18820 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18823 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18824 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18825 operands[2], operands[3])));
18830 /* Expand a floating-point vector conditional move; a vcond operation
18831 rather than a movcc operation. */
18834 ix86_expand_fp_vcond (rtx operands[])
18836 enum rtx_code code = GET_CODE (operands[3]);
18839 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18840 &operands[4], &operands[5]);
18841 if (code == UNKNOWN)
18844 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18845 operands[5], operands[1], operands[2]))
18848 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18849 operands[1], operands[2]);
18850 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18854 /* Expand a signed/unsigned integral vector conditional move. */
18857 ix86_expand_int_vcond (rtx operands[])
18859 enum machine_mode mode = GET_MODE (operands[0]);
18860 enum rtx_code code = GET_CODE (operands[3]);
18861 bool negate = false;
18864 cop0 = operands[4];
18865 cop1 = operands[5];
18867 /* XOP supports all of the comparisons on all vector int types. */
18870 /* Canonicalize the comparison to EQ, GT, GTU. */
18881 code = reverse_condition (code);
18887 code = reverse_condition (code);
18893 code = swap_condition (code);
18894 x = cop0, cop0 = cop1, cop1 = x;
18898 gcc_unreachable ();
18901 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18902 if (mode == V2DImode)
18907 /* SSE4.1 supports EQ. */
18908 if (!TARGET_SSE4_1)
18914 /* SSE4.2 supports GT/GTU. */
18915 if (!TARGET_SSE4_2)
18920 gcc_unreachable ();
18924 /* Unsigned parallel compare is not supported by the hardware.
18925 Play some tricks to turn this into a signed comparison
18929 cop0 = force_reg (mode, cop0);
18937 rtx (*gen_sub3) (rtx, rtx, rtx);
18939 /* Subtract (-(INT MAX) - 1) from both operands to make
18941 mask = ix86_build_signbit_mask (mode, true, false);
18942 gen_sub3 = (mode == V4SImode
18943 ? gen_subv4si3 : gen_subv2di3);
18944 t1 = gen_reg_rtx (mode);
18945 emit_insn (gen_sub3 (t1, cop0, mask));
18947 t2 = gen_reg_rtx (mode);
18948 emit_insn (gen_sub3 (t2, cop1, mask));
18958 /* Perform a parallel unsigned saturating subtraction. */
18959 x = gen_reg_rtx (mode);
18960 emit_insn (gen_rtx_SET (VOIDmode, x,
18961 gen_rtx_US_MINUS (mode, cop0, cop1)));
18964 cop1 = CONST0_RTX (mode);
18970 gcc_unreachable ();
18975 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
18976 operands[1+negate], operands[2-negate]);
18978 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
18979 operands[2-negate]);
18983 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18984 true if we should do zero extension, else sign extension. HIGH_P is
18985 true if we want the N/2 high elements, else the low elements. */
18988 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18990 enum machine_mode imode = GET_MODE (operands[1]);
18991 rtx (*unpack)(rtx, rtx, rtx);
18998 unpack = gen_vec_interleave_highv16qi;
19000 unpack = gen_vec_interleave_lowv16qi;
19004 unpack = gen_vec_interleave_highv8hi;
19006 unpack = gen_vec_interleave_lowv8hi;
19010 unpack = gen_vec_interleave_highv4si;
19012 unpack = gen_vec_interleave_lowv4si;
19015 gcc_unreachable ();
19018 dest = gen_lowpart (imode, operands[0]);
19021 se = force_reg (imode, CONST0_RTX (imode));
19023 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
19024 operands[1], pc_rtx, pc_rtx);
19026 emit_insn (unpack (dest, operands[1], se));
19029 /* This function performs the same task as ix86_expand_sse_unpack,
19030 but with SSE4.1 instructions. */
19033 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
19035 enum machine_mode imode = GET_MODE (operands[1]);
19036 rtx (*unpack)(rtx, rtx);
19043 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
19045 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
19049 unpack = gen_sse4_1_zero_extendv4hiv4si2;
19051 unpack = gen_sse4_1_sign_extendv4hiv4si2;
19055 unpack = gen_sse4_1_zero_extendv2siv2di2;
19057 unpack = gen_sse4_1_sign_extendv2siv2di2;
19060 gcc_unreachable ();
19063 dest = operands[0];
19066 /* Shift higher 8 bytes to lower 8 bytes. */
19067 src = gen_reg_rtx (imode);
19068 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
19069 gen_lowpart (V1TImode, operands[1]),
19075 emit_insn (unpack (dest, src));
19078 /* Expand conditional increment or decrement using adb/sbb instructions.
19079 The default case using setcc followed by the conditional move can be
19080 done by generic code. */
19082 ix86_expand_int_addcc (rtx operands[])
19084 enum rtx_code code = GET_CODE (operands[1]);
19086 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
19088 rtx val = const0_rtx;
19089 bool fpcmp = false;
19090 enum machine_mode mode;
19091 rtx op0 = XEXP (operands[1], 0);
19092 rtx op1 = XEXP (operands[1], 1);
19094 if (operands[3] != const1_rtx
19095 && operands[3] != constm1_rtx)
19097 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
19099 code = GET_CODE (compare_op);
19101 flags = XEXP (compare_op, 0);
19103 if (GET_MODE (flags) == CCFPmode
19104 || GET_MODE (flags) == CCFPUmode)
19107 code = ix86_fp_compare_code_to_integer (code);
19114 PUT_CODE (compare_op,
19115 reverse_condition_maybe_unordered
19116 (GET_CODE (compare_op)));
19118 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
19121 mode = GET_MODE (operands[0]);
19123 /* Construct either adc or sbb insn. */
19124 if ((code == LTU) == (operands[3] == constm1_rtx))
19129 insn = gen_subqi3_carry;
19132 insn = gen_subhi3_carry;
19135 insn = gen_subsi3_carry;
19138 insn = gen_subdi3_carry;
19141 gcc_unreachable ();
19149 insn = gen_addqi3_carry;
19152 insn = gen_addhi3_carry;
19155 insn = gen_addsi3_carry;
19158 insn = gen_adddi3_carry;
19161 gcc_unreachable ();
19164 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
19170 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
19171 but works for floating pointer parameters and nonoffsetable memories.
19172 For pushes, it returns just stack offsets; the values will be saved
19173 in the right order. Maximally three parts are generated. */
19176 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
19181 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
19183 size = (GET_MODE_SIZE (mode) + 4) / 8;
19185 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
19186 gcc_assert (size >= 2 && size <= 4);
19188 /* Optimize constant pool reference to immediates. This is used by fp
19189 moves, that force all constants to memory to allow combining. */
19190 if (MEM_P (operand) && MEM_READONLY_P (operand))
19192 rtx tmp = maybe_get_pool_constant (operand);
19197 if (MEM_P (operand) && !offsettable_memref_p (operand))
19199 /* The only non-offsetable memories we handle are pushes. */
19200 int ok = push_operand (operand, VOIDmode);
19204 operand = copy_rtx (operand);
19205 PUT_MODE (operand, Pmode);
19206 parts[0] = parts[1] = parts[2] = parts[3] = operand;
19210 if (GET_CODE (operand) == CONST_VECTOR)
19212 enum machine_mode imode = int_mode_for_mode (mode);
19213 /* Caution: if we looked through a constant pool memory above,
19214 the operand may actually have a different mode now. That's
19215 ok, since we want to pun this all the way back to an integer. */
19216 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
19217 gcc_assert (operand != NULL);
19223 if (mode == DImode)
19224 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19229 if (REG_P (operand))
19231 gcc_assert (reload_completed);
19232 for (i = 0; i < size; i++)
19233 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
19235 else if (offsettable_memref_p (operand))
19237 operand = adjust_address (operand, SImode, 0);
19238 parts[0] = operand;
19239 for (i = 1; i < size; i++)
19240 parts[i] = adjust_address (operand, SImode, 4 * i);
19242 else if (GET_CODE (operand) == CONST_DOUBLE)
19247 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19251 real_to_target (l, &r, mode);
19252 parts[3] = gen_int_mode (l[3], SImode);
19253 parts[2] = gen_int_mode (l[2], SImode);
19256 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
19257 parts[2] = gen_int_mode (l[2], SImode);
19260 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
19263 gcc_unreachable ();
19265 parts[1] = gen_int_mode (l[1], SImode);
19266 parts[0] = gen_int_mode (l[0], SImode);
19269 gcc_unreachable ();
19274 if (mode == TImode)
19275 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19276 if (mode == XFmode || mode == TFmode)
19278 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
19279 if (REG_P (operand))
19281 gcc_assert (reload_completed);
19282 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
19283 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
19285 else if (offsettable_memref_p (operand))
19287 operand = adjust_address (operand, DImode, 0);
19288 parts[0] = operand;
19289 parts[1] = adjust_address (operand, upper_mode, 8);
19291 else if (GET_CODE (operand) == CONST_DOUBLE)
19296 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19297 real_to_target (l, &r, mode);
19299 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19300 if (HOST_BITS_PER_WIDE_INT >= 64)
19303 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
19304 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
19307 parts[0] = immed_double_const (l[0], l[1], DImode);
19309 if (upper_mode == SImode)
19310 parts[1] = gen_int_mode (l[2], SImode);
19311 else if (HOST_BITS_PER_WIDE_INT >= 64)
19314 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
19315 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
19318 parts[1] = immed_double_const (l[2], l[3], DImode);
19321 gcc_unreachable ();
19328 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19329 Return false when normal moves are needed; true when all required
19330 insns have been emitted. Operands 2-4 contain the input values
19331 int the correct order; operands 5-7 contain the output values. */
19334 ix86_split_long_move (rtx operands[])
19339 int collisions = 0;
19340 enum machine_mode mode = GET_MODE (operands[0]);
19341 bool collisionparts[4];
19343 /* The DFmode expanders may ask us to move double.
19344 For 64bit target this is single move. By hiding the fact
19345 here we simplify i386.md splitters. */
19346 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19348 /* Optimize constant pool reference to immediates. This is used by
19349 fp moves, that force all constants to memory to allow combining. */
19351 if (MEM_P (operands[1])
19352 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19353 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19354 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19355 if (push_operand (operands[0], VOIDmode))
19357 operands[0] = copy_rtx (operands[0]);
19358 PUT_MODE (operands[0], Pmode);
19361 operands[0] = gen_lowpart (DImode, operands[0]);
19362 operands[1] = gen_lowpart (DImode, operands[1]);
19363 emit_move_insn (operands[0], operands[1]);
19367 /* The only non-offsettable memory we handle is push. */
19368 if (push_operand (operands[0], VOIDmode))
19371 gcc_assert (!MEM_P (operands[0])
19372 || offsettable_memref_p (operands[0]));
19374 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19375 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19377 /* When emitting push, take care for source operands on the stack. */
19378 if (push && MEM_P (operands[1])
19379 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19381 rtx src_base = XEXP (part[1][nparts - 1], 0);
19383 /* Compensate for the stack decrement by 4. */
19384 if (!TARGET_64BIT && nparts == 3
19385 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19386 src_base = plus_constant (src_base, 4);
19388 /* src_base refers to the stack pointer and is
19389 automatically decreased by emitted push. */
19390 for (i = 0; i < nparts; i++)
19391 part[1][i] = change_address (part[1][i],
19392 GET_MODE (part[1][i]), src_base);
19395 /* We need to do copy in the right order in case an address register
19396 of the source overlaps the destination. */
19397 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19401 for (i = 0; i < nparts; i++)
19404 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19405 if (collisionparts[i])
19409 /* Collision in the middle part can be handled by reordering. */
19410 if (collisions == 1 && nparts == 3 && collisionparts [1])
19412 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19413 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19415 else if (collisions == 1
19417 && (collisionparts [1] || collisionparts [2]))
19419 if (collisionparts [1])
19421 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19422 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19426 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19427 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19431 /* If there are more collisions, we can't handle it by reordering.
19432 Do an lea to the last part and use only one colliding move. */
19433 else if (collisions > 1)
19439 base = part[0][nparts - 1];
19441 /* Handle the case when the last part isn't valid for lea.
19442 Happens in 64-bit mode storing the 12-byte XFmode. */
19443 if (GET_MODE (base) != Pmode)
19444 base = gen_rtx_REG (Pmode, REGNO (base));
19446 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19447 part[1][0] = replace_equiv_address (part[1][0], base);
19448 for (i = 1; i < nparts; i++)
19450 tmp = plus_constant (base, UNITS_PER_WORD * i);
19451 part[1][i] = replace_equiv_address (part[1][i], tmp);
19462 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19463 emit_insn (gen_addsi3 (stack_pointer_rtx,
19464 stack_pointer_rtx, GEN_INT (-4)));
19465 emit_move_insn (part[0][2], part[1][2]);
19467 else if (nparts == 4)
19469 emit_move_insn (part[0][3], part[1][3]);
19470 emit_move_insn (part[0][2], part[1][2]);
19475 /* In 64bit mode we don't have 32bit push available. In case this is
19476 register, it is OK - we will just use larger counterpart. We also
19477 retype memory - these comes from attempt to avoid REX prefix on
19478 moving of second half of TFmode value. */
19479 if (GET_MODE (part[1][1]) == SImode)
19481 switch (GET_CODE (part[1][1]))
19484 part[1][1] = adjust_address (part[1][1], DImode, 0);
19488 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19492 gcc_unreachable ();
19495 if (GET_MODE (part[1][0]) == SImode)
19496 part[1][0] = part[1][1];
19499 emit_move_insn (part[0][1], part[1][1]);
19500 emit_move_insn (part[0][0], part[1][0]);
19504 /* Choose correct order to not overwrite the source before it is copied. */
19505 if ((REG_P (part[0][0])
19506 && REG_P (part[1][1])
19507 && (REGNO (part[0][0]) == REGNO (part[1][1])
19509 && REGNO (part[0][0]) == REGNO (part[1][2]))
19511 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19513 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19515 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19517 operands[2 + i] = part[0][j];
19518 operands[6 + i] = part[1][j];
19523 for (i = 0; i < nparts; i++)
19525 operands[2 + i] = part[0][i];
19526 operands[6 + i] = part[1][i];
19530 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19531 if (optimize_insn_for_size_p ())
19533 for (j = 0; j < nparts - 1; j++)
19534 if (CONST_INT_P (operands[6 + j])
19535 && operands[6 + j] != const0_rtx
19536 && REG_P (operands[2 + j]))
19537 for (i = j; i < nparts - 1; i++)
19538 if (CONST_INT_P (operands[7 + i])
19539 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19540 operands[7 + i] = operands[2 + j];
19543 for (i = 0; i < nparts; i++)
19544 emit_move_insn (operands[2 + i], operands[6 + i]);
19549 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19550 left shift by a constant, either using a single shift or
19551 a sequence of add instructions. */
19554 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19556 rtx (*insn)(rtx, rtx, rtx);
19559 || (count * ix86_cost->add <= ix86_cost->shift_const
19560 && !optimize_insn_for_size_p ()))
19562 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19563 while (count-- > 0)
19564 emit_insn (insn (operand, operand, operand));
19568 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19569 emit_insn (insn (operand, operand, GEN_INT (count)));
19574 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19576 rtx (*gen_ashl3)(rtx, rtx, rtx);
19577 rtx (*gen_shld)(rtx, rtx, rtx);
19578 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19580 rtx low[2], high[2];
19583 if (CONST_INT_P (operands[2]))
19585 split_double_mode (mode, operands, 2, low, high);
19586 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19588 if (count >= half_width)
19590 emit_move_insn (high[0], low[1]);
19591 emit_move_insn (low[0], const0_rtx);
19593 if (count > half_width)
19594 ix86_expand_ashl_const (high[0], count - half_width, mode);
19598 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19600 if (!rtx_equal_p (operands[0], operands[1]))
19601 emit_move_insn (operands[0], operands[1]);
19603 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19604 ix86_expand_ashl_const (low[0], count, mode);
19609 split_double_mode (mode, operands, 1, low, high);
19611 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19613 if (operands[1] == const1_rtx)
19615 /* Assuming we've chosen a QImode capable registers, then 1 << N
19616 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19617 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19619 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19621 ix86_expand_clear (low[0]);
19622 ix86_expand_clear (high[0]);
19623 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19625 d = gen_lowpart (QImode, low[0]);
19626 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19627 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19628 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19630 d = gen_lowpart (QImode, high[0]);
19631 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19632 s = gen_rtx_NE (QImode, flags, const0_rtx);
19633 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19636 /* Otherwise, we can get the same results by manually performing
19637 a bit extract operation on bit 5/6, and then performing the two
19638 shifts. The two methods of getting 0/1 into low/high are exactly
19639 the same size. Avoiding the shift in the bit extract case helps
19640 pentium4 a bit; no one else seems to care much either way. */
19643 enum machine_mode half_mode;
19644 rtx (*gen_lshr3)(rtx, rtx, rtx);
19645 rtx (*gen_and3)(rtx, rtx, rtx);
19646 rtx (*gen_xor3)(rtx, rtx, rtx);
19647 HOST_WIDE_INT bits;
19650 if (mode == DImode)
19652 half_mode = SImode;
19653 gen_lshr3 = gen_lshrsi3;
19654 gen_and3 = gen_andsi3;
19655 gen_xor3 = gen_xorsi3;
19660 half_mode = DImode;
19661 gen_lshr3 = gen_lshrdi3;
19662 gen_and3 = gen_anddi3;
19663 gen_xor3 = gen_xordi3;
19667 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19668 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19670 x = gen_lowpart (half_mode, operands[2]);
19671 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19673 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19674 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19675 emit_move_insn (low[0], high[0]);
19676 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19679 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19680 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19684 if (operands[1] == constm1_rtx)
19686 /* For -1 << N, we can avoid the shld instruction, because we
19687 know that we're shifting 0...31/63 ones into a -1. */
19688 emit_move_insn (low[0], constm1_rtx);
19689 if (optimize_insn_for_size_p ())
19690 emit_move_insn (high[0], low[0]);
19692 emit_move_insn (high[0], constm1_rtx);
19696 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19698 if (!rtx_equal_p (operands[0], operands[1]))
19699 emit_move_insn (operands[0], operands[1]);
19701 split_double_mode (mode, operands, 1, low, high);
19702 emit_insn (gen_shld (high[0], low[0], operands[2]));
19705 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19707 if (TARGET_CMOVE && scratch)
19709 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19710 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19712 ix86_expand_clear (scratch);
19713 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19717 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19718 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19720 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19725 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19727 rtx (*gen_ashr3)(rtx, rtx, rtx)
19728 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19729 rtx (*gen_shrd)(rtx, rtx, rtx);
19730 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19732 rtx low[2], high[2];
19735 if (CONST_INT_P (operands[2]))
19737 split_double_mode (mode, operands, 2, low, high);
19738 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19740 if (count == GET_MODE_BITSIZE (mode) - 1)
19742 emit_move_insn (high[0], high[1]);
19743 emit_insn (gen_ashr3 (high[0], high[0],
19744 GEN_INT (half_width - 1)));
19745 emit_move_insn (low[0], high[0]);
19748 else if (count >= half_width)
19750 emit_move_insn (low[0], high[1]);
19751 emit_move_insn (high[0], low[0]);
19752 emit_insn (gen_ashr3 (high[0], high[0],
19753 GEN_INT (half_width - 1)));
19755 if (count > half_width)
19756 emit_insn (gen_ashr3 (low[0], low[0],
19757 GEN_INT (count - half_width)));
19761 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19763 if (!rtx_equal_p (operands[0], operands[1]))
19764 emit_move_insn (operands[0], operands[1]);
19766 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19767 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19772 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19774 if (!rtx_equal_p (operands[0], operands[1]))
19775 emit_move_insn (operands[0], operands[1]);
19777 split_double_mode (mode, operands, 1, low, high);
19779 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19780 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19782 if (TARGET_CMOVE && scratch)
19784 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19785 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19787 emit_move_insn (scratch, high[0]);
19788 emit_insn (gen_ashr3 (scratch, scratch,
19789 GEN_INT (half_width - 1)));
19790 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19795 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19796 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19798 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19804 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19806 rtx (*gen_lshr3)(rtx, rtx, rtx)
19807 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19808 rtx (*gen_shrd)(rtx, rtx, rtx);
19809 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19811 rtx low[2], high[2];
19814 if (CONST_INT_P (operands[2]))
19816 split_double_mode (mode, operands, 2, low, high);
19817 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19819 if (count >= half_width)
19821 emit_move_insn (low[0], high[1]);
19822 ix86_expand_clear (high[0]);
19824 if (count > half_width)
19825 emit_insn (gen_lshr3 (low[0], low[0],
19826 GEN_INT (count - half_width)));
19830 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19832 if (!rtx_equal_p (operands[0], operands[1]))
19833 emit_move_insn (operands[0], operands[1]);
19835 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19836 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19841 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19843 if (!rtx_equal_p (operands[0], operands[1]))
19844 emit_move_insn (operands[0], operands[1]);
19846 split_double_mode (mode, operands, 1, low, high);
19848 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19849 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19851 if (TARGET_CMOVE && scratch)
19853 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19854 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19856 ix86_expand_clear (scratch);
19857 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19862 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19863 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19865 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19870 /* Predict just emitted jump instruction to be taken with probability PROB. */
19872 predict_jump (int prob)
19874 rtx insn = get_last_insn ();
19875 gcc_assert (JUMP_P (insn));
19876 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19879 /* Helper function for the string operations below. Dest VARIABLE whether
19880 it is aligned to VALUE bytes. If true, jump to the label. */
19882 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19884 rtx label = gen_label_rtx ();
19885 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19886 if (GET_MODE (variable) == DImode)
19887 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19889 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19890 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19893 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19895 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19899 /* Adjust COUNTER by the VALUE. */
19901 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
19903 rtx (*gen_add)(rtx, rtx, rtx)
19904 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
19906 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
19909 /* Zero extend possibly SImode EXP to Pmode register. */
19911 ix86_zero_extend_to_Pmode (rtx exp)
19914 if (GET_MODE (exp) == VOIDmode)
19915 return force_reg (Pmode, exp);
19916 if (GET_MODE (exp) == Pmode)
19917 return copy_to_mode_reg (Pmode, exp);
19918 r = gen_reg_rtx (Pmode);
19919 emit_insn (gen_zero_extendsidi2 (r, exp));
19923 /* Divide COUNTREG by SCALE. */
19925 scale_counter (rtx countreg, int scale)
19931 if (CONST_INT_P (countreg))
19932 return GEN_INT (INTVAL (countreg) / scale);
19933 gcc_assert (REG_P (countreg));
19935 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
19936 GEN_INT (exact_log2 (scale)),
19937 NULL, 1, OPTAB_DIRECT);
19941 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19942 DImode for constant loop counts. */
19944 static enum machine_mode
19945 counter_mode (rtx count_exp)
19947 if (GET_MODE (count_exp) != VOIDmode)
19948 return GET_MODE (count_exp);
19949 if (!CONST_INT_P (count_exp))
19951 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
19956 /* When SRCPTR is non-NULL, output simple loop to move memory
19957 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19958 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19959 equivalent loop to set memory by VALUE (supposed to be in MODE).
19961 The size is rounded down to whole number of chunk size moved at once.
19962 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19966 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
19967 rtx destptr, rtx srcptr, rtx value,
19968 rtx count, enum machine_mode mode, int unroll,
19971 rtx out_label, top_label, iter, tmp;
19972 enum machine_mode iter_mode = counter_mode (count);
19973 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
19974 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
19980 top_label = gen_label_rtx ();
19981 out_label = gen_label_rtx ();
19982 iter = gen_reg_rtx (iter_mode);
19984 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
19985 NULL, 1, OPTAB_DIRECT);
19986 /* Those two should combine. */
19987 if (piece_size == const1_rtx)
19989 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
19991 predict_jump (REG_BR_PROB_BASE * 10 / 100);
19993 emit_move_insn (iter, const0_rtx);
19995 emit_label (top_label);
19997 tmp = convert_modes (Pmode, iter_mode, iter, true);
19998 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
19999 destmem = change_address (destmem, mode, x_addr);
20003 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
20004 srcmem = change_address (srcmem, mode, y_addr);
20006 /* When unrolling for chips that reorder memory reads and writes,
20007 we can save registers by using single temporary.
20008 Also using 4 temporaries is overkill in 32bit mode. */
20009 if (!TARGET_64BIT && 0)
20011 for (i = 0; i < unroll; i++)
20016 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20018 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20020 emit_move_insn (destmem, srcmem);
20026 gcc_assert (unroll <= 4);
20027 for (i = 0; i < unroll; i++)
20029 tmpreg[i] = gen_reg_rtx (mode);
20033 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20035 emit_move_insn (tmpreg[i], srcmem);
20037 for (i = 0; i < unroll; i++)
20042 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20044 emit_move_insn (destmem, tmpreg[i]);
20049 for (i = 0; i < unroll; i++)
20053 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20054 emit_move_insn (destmem, value);
20057 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
20058 true, OPTAB_LIB_WIDEN);
20060 emit_move_insn (iter, tmp);
20062 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
20064 if (expected_size != -1)
20066 expected_size /= GET_MODE_SIZE (mode) * unroll;
20067 if (expected_size == 0)
20069 else if (expected_size > REG_BR_PROB_BASE)
20070 predict_jump (REG_BR_PROB_BASE - 1);
20072 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
20075 predict_jump (REG_BR_PROB_BASE * 80 / 100);
20076 iter = ix86_zero_extend_to_Pmode (iter);
20077 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
20078 true, OPTAB_LIB_WIDEN);
20079 if (tmp != destptr)
20080 emit_move_insn (destptr, tmp);
20083 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
20084 true, OPTAB_LIB_WIDEN);
20086 emit_move_insn (srcptr, tmp);
20088 emit_label (out_label);
20091 /* Output "rep; mov" instruction.
20092 Arguments have same meaning as for previous function */
20094 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
20095 rtx destptr, rtx srcptr,
20097 enum machine_mode mode)
20103 /* If the size is known, it is shorter to use rep movs. */
20104 if (mode == QImode && CONST_INT_P (count)
20105 && !(INTVAL (count) & 3))
20108 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20109 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20110 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
20111 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
20112 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20113 if (mode != QImode)
20115 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20116 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20117 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20118 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
20119 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20120 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
20124 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20125 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
20127 if (CONST_INT_P (count))
20129 count = GEN_INT (INTVAL (count)
20130 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20131 destmem = shallow_copy_rtx (destmem);
20132 srcmem = shallow_copy_rtx (srcmem);
20133 set_mem_size (destmem, count);
20134 set_mem_size (srcmem, count);
20138 if (MEM_SIZE (destmem))
20139 set_mem_size (destmem, NULL_RTX);
20140 if (MEM_SIZE (srcmem))
20141 set_mem_size (srcmem, NULL_RTX);
20143 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
20147 /* Output "rep; stos" instruction.
20148 Arguments have same meaning as for previous function */
20150 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
20151 rtx count, enum machine_mode mode,
20157 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20158 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20159 value = force_reg (mode, gen_lowpart (mode, value));
20160 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20161 if (mode != QImode)
20163 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20164 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20165 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20168 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20169 if (orig_value == const0_rtx && CONST_INT_P (count))
20171 count = GEN_INT (INTVAL (count)
20172 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20173 destmem = shallow_copy_rtx (destmem);
20174 set_mem_size (destmem, count);
20176 else if (MEM_SIZE (destmem))
20177 set_mem_size (destmem, NULL_RTX);
20178 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
20182 emit_strmov (rtx destmem, rtx srcmem,
20183 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
20185 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
20186 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
20187 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20190 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
20192 expand_movmem_epilogue (rtx destmem, rtx srcmem,
20193 rtx destptr, rtx srcptr, rtx count, int max_size)
20196 if (CONST_INT_P (count))
20198 HOST_WIDE_INT countval = INTVAL (count);
20201 if ((countval & 0x10) && max_size > 16)
20205 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20206 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
20209 gcc_unreachable ();
20212 if ((countval & 0x08) && max_size > 8)
20215 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20218 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20219 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
20223 if ((countval & 0x04) && max_size > 4)
20225 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20228 if ((countval & 0x02) && max_size > 2)
20230 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
20233 if ((countval & 0x01) && max_size > 1)
20235 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
20242 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
20243 count, 1, OPTAB_DIRECT);
20244 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
20245 count, QImode, 1, 4);
20249 /* When there are stringops, we can cheaply increase dest and src pointers.
20250 Otherwise we save code size by maintaining offset (zero is readily
20251 available from preceding rep operation) and using x86 addressing modes.
20253 if (TARGET_SINGLE_STRINGOP)
20257 rtx label = ix86_expand_aligntest (count, 4, true);
20258 src = change_address (srcmem, SImode, srcptr);
20259 dest = change_address (destmem, SImode, destptr);
20260 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20261 emit_label (label);
20262 LABEL_NUSES (label) = 1;
20266 rtx label = ix86_expand_aligntest (count, 2, true);
20267 src = change_address (srcmem, HImode, srcptr);
20268 dest = change_address (destmem, HImode, destptr);
20269 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20270 emit_label (label);
20271 LABEL_NUSES (label) = 1;
20275 rtx label = ix86_expand_aligntest (count, 1, true);
20276 src = change_address (srcmem, QImode, srcptr);
20277 dest = change_address (destmem, QImode, destptr);
20278 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20279 emit_label (label);
20280 LABEL_NUSES (label) = 1;
20285 rtx offset = force_reg (Pmode, const0_rtx);
20290 rtx label = ix86_expand_aligntest (count, 4, true);
20291 src = change_address (srcmem, SImode, srcptr);
20292 dest = change_address (destmem, SImode, destptr);
20293 emit_move_insn (dest, src);
20294 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
20295 true, OPTAB_LIB_WIDEN);
20297 emit_move_insn (offset, tmp);
20298 emit_label (label);
20299 LABEL_NUSES (label) = 1;
20303 rtx label = ix86_expand_aligntest (count, 2, true);
20304 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20305 src = change_address (srcmem, HImode, tmp);
20306 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20307 dest = change_address (destmem, HImode, tmp);
20308 emit_move_insn (dest, src);
20309 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
20310 true, OPTAB_LIB_WIDEN);
20312 emit_move_insn (offset, tmp);
20313 emit_label (label);
20314 LABEL_NUSES (label) = 1;
20318 rtx label = ix86_expand_aligntest (count, 1, true);
20319 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20320 src = change_address (srcmem, QImode, tmp);
20321 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20322 dest = change_address (destmem, QImode, tmp);
20323 emit_move_insn (dest, src);
20324 emit_label (label);
20325 LABEL_NUSES (label) = 1;
20330 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20332 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
20333 rtx count, int max_size)
20336 expand_simple_binop (counter_mode (count), AND, count,
20337 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
20338 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
20339 gen_lowpart (QImode, value), count, QImode,
20343 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20345 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20349 if (CONST_INT_P (count))
20351 HOST_WIDE_INT countval = INTVAL (count);
20354 if ((countval & 0x10) && max_size > 16)
20358 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20359 emit_insn (gen_strset (destptr, dest, value));
20360 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20361 emit_insn (gen_strset (destptr, dest, value));
20364 gcc_unreachable ();
20367 if ((countval & 0x08) && max_size > 8)
20371 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20372 emit_insn (gen_strset (destptr, dest, value));
20376 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20377 emit_insn (gen_strset (destptr, dest, value));
20378 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20379 emit_insn (gen_strset (destptr, dest, value));
20383 if ((countval & 0x04) && max_size > 4)
20385 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20386 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20389 if ((countval & 0x02) && max_size > 2)
20391 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20392 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20395 if ((countval & 0x01) && max_size > 1)
20397 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20398 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20405 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20410 rtx label = ix86_expand_aligntest (count, 16, true);
20413 dest = change_address (destmem, DImode, destptr);
20414 emit_insn (gen_strset (destptr, dest, value));
20415 emit_insn (gen_strset (destptr, dest, value));
20419 dest = change_address (destmem, SImode, destptr);
20420 emit_insn (gen_strset (destptr, dest, value));
20421 emit_insn (gen_strset (destptr, dest, value));
20422 emit_insn (gen_strset (destptr, dest, value));
20423 emit_insn (gen_strset (destptr, dest, value));
20425 emit_label (label);
20426 LABEL_NUSES (label) = 1;
20430 rtx label = ix86_expand_aligntest (count, 8, true);
20433 dest = change_address (destmem, DImode, destptr);
20434 emit_insn (gen_strset (destptr, dest, value));
20438 dest = change_address (destmem, SImode, destptr);
20439 emit_insn (gen_strset (destptr, dest, value));
20440 emit_insn (gen_strset (destptr, dest, value));
20442 emit_label (label);
20443 LABEL_NUSES (label) = 1;
20447 rtx label = ix86_expand_aligntest (count, 4, true);
20448 dest = change_address (destmem, SImode, destptr);
20449 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20450 emit_label (label);
20451 LABEL_NUSES (label) = 1;
20455 rtx label = ix86_expand_aligntest (count, 2, true);
20456 dest = change_address (destmem, HImode, destptr);
20457 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20458 emit_label (label);
20459 LABEL_NUSES (label) = 1;
20463 rtx label = ix86_expand_aligntest (count, 1, true);
20464 dest = change_address (destmem, QImode, destptr);
20465 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20466 emit_label (label);
20467 LABEL_NUSES (label) = 1;
20471 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20472 DESIRED_ALIGNMENT. */
20474 expand_movmem_prologue (rtx destmem, rtx srcmem,
20475 rtx destptr, rtx srcptr, rtx count,
20476 int align, int desired_alignment)
20478 if (align <= 1 && desired_alignment > 1)
20480 rtx label = ix86_expand_aligntest (destptr, 1, false);
20481 srcmem = change_address (srcmem, QImode, srcptr);
20482 destmem = change_address (destmem, QImode, destptr);
20483 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20484 ix86_adjust_counter (count, 1);
20485 emit_label (label);
20486 LABEL_NUSES (label) = 1;
20488 if (align <= 2 && desired_alignment > 2)
20490 rtx label = ix86_expand_aligntest (destptr, 2, false);
20491 srcmem = change_address (srcmem, HImode, srcptr);
20492 destmem = change_address (destmem, HImode, destptr);
20493 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20494 ix86_adjust_counter (count, 2);
20495 emit_label (label);
20496 LABEL_NUSES (label) = 1;
20498 if (align <= 4 && desired_alignment > 4)
20500 rtx label = ix86_expand_aligntest (destptr, 4, false);
20501 srcmem = change_address (srcmem, SImode, srcptr);
20502 destmem = change_address (destmem, SImode, destptr);
20503 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20504 ix86_adjust_counter (count, 4);
20505 emit_label (label);
20506 LABEL_NUSES (label) = 1;
20508 gcc_assert (desired_alignment <= 8);
20511 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20512 ALIGN_BYTES is how many bytes need to be copied. */
20514 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20515 int desired_align, int align_bytes)
20518 rtx src_size, dst_size;
20520 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20521 if (src_align_bytes >= 0)
20522 src_align_bytes = desired_align - src_align_bytes;
20523 src_size = MEM_SIZE (src);
20524 dst_size = MEM_SIZE (dst);
20525 if (align_bytes & 1)
20527 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20528 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20530 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20532 if (align_bytes & 2)
20534 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20535 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20536 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20537 set_mem_align (dst, 2 * BITS_PER_UNIT);
20538 if (src_align_bytes >= 0
20539 && (src_align_bytes & 1) == (align_bytes & 1)
20540 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20541 set_mem_align (src, 2 * BITS_PER_UNIT);
20543 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20545 if (align_bytes & 4)
20547 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20548 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20549 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20550 set_mem_align (dst, 4 * BITS_PER_UNIT);
20551 if (src_align_bytes >= 0)
20553 unsigned int src_align = 0;
20554 if ((src_align_bytes & 3) == (align_bytes & 3))
20556 else if ((src_align_bytes & 1) == (align_bytes & 1))
20558 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20559 set_mem_align (src, src_align * BITS_PER_UNIT);
20562 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20564 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20565 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20566 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20567 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20568 if (src_align_bytes >= 0)
20570 unsigned int src_align = 0;
20571 if ((src_align_bytes & 7) == (align_bytes & 7))
20573 else if ((src_align_bytes & 3) == (align_bytes & 3))
20575 else if ((src_align_bytes & 1) == (align_bytes & 1))
20577 if (src_align > (unsigned int) desired_align)
20578 src_align = desired_align;
20579 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20580 set_mem_align (src, src_align * BITS_PER_UNIT);
20583 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20585 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
20590 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20591 DESIRED_ALIGNMENT. */
20593 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20594 int align, int desired_alignment)
20596 if (align <= 1 && desired_alignment > 1)
20598 rtx label = ix86_expand_aligntest (destptr, 1, false);
20599 destmem = change_address (destmem, QImode, destptr);
20600 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20601 ix86_adjust_counter (count, 1);
20602 emit_label (label);
20603 LABEL_NUSES (label) = 1;
20605 if (align <= 2 && desired_alignment > 2)
20607 rtx label = ix86_expand_aligntest (destptr, 2, false);
20608 destmem = change_address (destmem, HImode, destptr);
20609 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20610 ix86_adjust_counter (count, 2);
20611 emit_label (label);
20612 LABEL_NUSES (label) = 1;
20614 if (align <= 4 && desired_alignment > 4)
20616 rtx label = ix86_expand_aligntest (destptr, 4, false);
20617 destmem = change_address (destmem, SImode, destptr);
20618 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20619 ix86_adjust_counter (count, 4);
20620 emit_label (label);
20621 LABEL_NUSES (label) = 1;
20623 gcc_assert (desired_alignment <= 8);
20626 /* Set enough from DST to align DST known to by aligned by ALIGN to
20627 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20629 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20630 int desired_align, int align_bytes)
20633 rtx dst_size = MEM_SIZE (dst);
20634 if (align_bytes & 1)
20636 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20638 emit_insn (gen_strset (destreg, dst,
20639 gen_lowpart (QImode, value)));
20641 if (align_bytes & 2)
20643 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20644 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20645 set_mem_align (dst, 2 * BITS_PER_UNIT);
20647 emit_insn (gen_strset (destreg, dst,
20648 gen_lowpart (HImode, value)));
20650 if (align_bytes & 4)
20652 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20653 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20654 set_mem_align (dst, 4 * BITS_PER_UNIT);
20656 emit_insn (gen_strset (destreg, dst,
20657 gen_lowpart (SImode, value)));
20659 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20660 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20661 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20663 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20667 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20668 static enum stringop_alg
20669 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20670 int *dynamic_check)
20672 const struct stringop_algs * algs;
20673 bool optimize_for_speed;
20674 /* Algorithms using the rep prefix want at least edi and ecx;
20675 additionally, memset wants eax and memcpy wants esi. Don't
20676 consider such algorithms if the user has appropriated those
20677 registers for their own purposes. */
20678 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20680 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20682 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20683 || (alg != rep_prefix_1_byte \
20684 && alg != rep_prefix_4_byte \
20685 && alg != rep_prefix_8_byte))
20686 const struct processor_costs *cost;
20688 /* Even if the string operation call is cold, we still might spend a lot
20689 of time processing large blocks. */
20690 if (optimize_function_for_size_p (cfun)
20691 || (optimize_insn_for_size_p ()
20692 && expected_size != -1 && expected_size < 256))
20693 optimize_for_speed = false;
20695 optimize_for_speed = true;
20697 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20699 *dynamic_check = -1;
20701 algs = &cost->memset[TARGET_64BIT != 0];
20703 algs = &cost->memcpy[TARGET_64BIT != 0];
20704 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
20705 return stringop_alg;
20706 /* rep; movq or rep; movl is the smallest variant. */
20707 else if (!optimize_for_speed)
20709 if (!count || (count & 3))
20710 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20712 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20714 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20716 else if (expected_size != -1 && expected_size < 4)
20717 return loop_1_byte;
20718 else if (expected_size != -1)
20721 enum stringop_alg alg = libcall;
20722 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20724 /* We get here if the algorithms that were not libcall-based
20725 were rep-prefix based and we are unable to use rep prefixes
20726 based on global register usage. Break out of the loop and
20727 use the heuristic below. */
20728 if (algs->size[i].max == 0)
20730 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20732 enum stringop_alg candidate = algs->size[i].alg;
20734 if (candidate != libcall && ALG_USABLE_P (candidate))
20736 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20737 last non-libcall inline algorithm. */
20738 if (TARGET_INLINE_ALL_STRINGOPS)
20740 /* When the current size is best to be copied by a libcall,
20741 but we are still forced to inline, run the heuristic below
20742 that will pick code for medium sized blocks. */
20743 if (alg != libcall)
20747 else if (ALG_USABLE_P (candidate))
20751 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20753 /* When asked to inline the call anyway, try to pick meaningful choice.
20754 We look for maximal size of block that is faster to copy by hand and
20755 take blocks of at most of that size guessing that average size will
20756 be roughly half of the block.
20758 If this turns out to be bad, we might simply specify the preferred
20759 choice in ix86_costs. */
20760 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20761 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20764 enum stringop_alg alg;
20766 bool any_alg_usable_p = true;
20768 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20770 enum stringop_alg candidate = algs->size[i].alg;
20771 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20773 if (candidate != libcall && candidate
20774 && ALG_USABLE_P (candidate))
20775 max = algs->size[i].max;
20777 /* If there aren't any usable algorithms, then recursing on
20778 smaller sizes isn't going to find anything. Just return the
20779 simple byte-at-a-time copy loop. */
20780 if (!any_alg_usable_p)
20782 /* Pick something reasonable. */
20783 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20784 *dynamic_check = 128;
20785 return loop_1_byte;
20789 alg = decide_alg (count, max / 2, memset, dynamic_check);
20790 gcc_assert (*dynamic_check == -1);
20791 gcc_assert (alg != libcall);
20792 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20793 *dynamic_check = max;
20796 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20797 #undef ALG_USABLE_P
20800 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20801 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20803 decide_alignment (int align,
20804 enum stringop_alg alg,
20807 int desired_align = 0;
20811 gcc_unreachable ();
20813 case unrolled_loop:
20814 desired_align = GET_MODE_SIZE (Pmode);
20816 case rep_prefix_8_byte:
20819 case rep_prefix_4_byte:
20820 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20821 copying whole cacheline at once. */
20822 if (TARGET_PENTIUMPRO)
20827 case rep_prefix_1_byte:
20828 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20829 copying whole cacheline at once. */
20830 if (TARGET_PENTIUMPRO)
20844 if (desired_align < align)
20845 desired_align = align;
20846 if (expected_size != -1 && expected_size < 4)
20847 desired_align = align;
20848 return desired_align;
20851 /* Return the smallest power of 2 greater than VAL. */
20853 smallest_pow2_greater_than (int val)
20861 /* Expand string move (memcpy) operation. Use i386 string operations when
20862 profitable. expand_setmem contains similar code. The code depends upon
20863 architecture, block size and alignment, but always has the same
20866 1) Prologue guard: Conditional that jumps up to epilogues for small
20867 blocks that can be handled by epilogue alone. This is faster but
20868 also needed for correctness, since prologue assume the block is larger
20869 than the desired alignment.
20871 Optional dynamic check for size and libcall for large
20872 blocks is emitted here too, with -minline-stringops-dynamically.
20874 2) Prologue: copy first few bytes in order to get destination aligned
20875 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
20876 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
20877 We emit either a jump tree on power of two sized blocks, or a byte loop.
20879 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20880 with specified algorithm.
20882 4) Epilogue: code copying tail of the block that is too small to be
20883 handled by main body (or up to size guarded by prologue guard). */
20886 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20887 rtx expected_align_exp, rtx expected_size_exp)
20893 rtx jump_around_label = NULL;
20894 HOST_WIDE_INT align = 1;
20895 unsigned HOST_WIDE_INT count = 0;
20896 HOST_WIDE_INT expected_size = -1;
20897 int size_needed = 0, epilogue_size_needed;
20898 int desired_align = 0, align_bytes = 0;
20899 enum stringop_alg alg;
20901 bool need_zero_guard = false;
20903 if (CONST_INT_P (align_exp))
20904 align = INTVAL (align_exp);
20905 /* i386 can do misaligned access on reasonably increased cost. */
20906 if (CONST_INT_P (expected_align_exp)
20907 && INTVAL (expected_align_exp) > align)
20908 align = INTVAL (expected_align_exp);
20909 /* ALIGN is the minimum of destination and source alignment, but we care here
20910 just about destination alignment. */
20911 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
20912 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
20914 if (CONST_INT_P (count_exp))
20915 count = expected_size = INTVAL (count_exp);
20916 if (CONST_INT_P (expected_size_exp) && count == 0)
20917 expected_size = INTVAL (expected_size_exp);
20919 /* Make sure we don't need to care about overflow later on. */
20920 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20923 /* Step 0: Decide on preferred algorithm, desired alignment and
20924 size of chunks to be copied by main loop. */
20926 alg = decide_alg (count, expected_size, false, &dynamic_check);
20927 desired_align = decide_alignment (align, alg, expected_size);
20929 if (!TARGET_ALIGN_STRINGOPS)
20930 align = desired_align;
20932 if (alg == libcall)
20934 gcc_assert (alg != no_stringop);
20936 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
20937 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20938 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
20943 gcc_unreachable ();
20945 need_zero_guard = true;
20946 size_needed = GET_MODE_SIZE (Pmode);
20948 case unrolled_loop:
20949 need_zero_guard = true;
20950 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
20952 case rep_prefix_8_byte:
20955 case rep_prefix_4_byte:
20958 case rep_prefix_1_byte:
20962 need_zero_guard = true;
20967 epilogue_size_needed = size_needed;
20969 /* Step 1: Prologue guard. */
20971 /* Alignment code needs count to be in register. */
20972 if (CONST_INT_P (count_exp) && desired_align > align)
20974 if (INTVAL (count_exp) > desired_align
20975 && INTVAL (count_exp) > size_needed)
20978 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20979 if (align_bytes <= 0)
20982 align_bytes = desired_align - align_bytes;
20984 if (align_bytes == 0)
20985 count_exp = force_reg (counter_mode (count_exp), count_exp);
20987 gcc_assert (desired_align >= 1 && align >= 1);
20989 /* Ensure that alignment prologue won't copy past end of block. */
20990 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20992 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20993 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20994 Make sure it is power of 2. */
20995 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20999 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21001 /* If main algorithm works on QImode, no epilogue is needed.
21002 For small sizes just don't align anything. */
21003 if (size_needed == 1)
21004 desired_align = align;
21011 label = gen_label_rtx ();
21012 emit_cmp_and_jump_insns (count_exp,
21013 GEN_INT (epilogue_size_needed),
21014 LTU, 0, counter_mode (count_exp), 1, label);
21015 if (expected_size == -1 || expected_size < epilogue_size_needed)
21016 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21018 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21022 /* Emit code to decide on runtime whether library call or inline should be
21024 if (dynamic_check != -1)
21026 if (CONST_INT_P (count_exp))
21028 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
21030 emit_block_move_via_libcall (dst, src, count_exp, false);
21031 count_exp = const0_rtx;
21037 rtx hot_label = gen_label_rtx ();
21038 jump_around_label = gen_label_rtx ();
21039 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21040 LEU, 0, GET_MODE (count_exp), 1, hot_label);
21041 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21042 emit_block_move_via_libcall (dst, src, count_exp, false);
21043 emit_jump (jump_around_label);
21044 emit_label (hot_label);
21048 /* Step 2: Alignment prologue. */
21050 if (desired_align > align)
21052 if (align_bytes == 0)
21054 /* Except for the first move in epilogue, we no longer know
21055 constant offset in aliasing info. It don't seems to worth
21056 the pain to maintain it for the first move, so throw away
21058 src = change_address (src, BLKmode, srcreg);
21059 dst = change_address (dst, BLKmode, destreg);
21060 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
21065 /* If we know how many bytes need to be stored before dst is
21066 sufficiently aligned, maintain aliasing info accurately. */
21067 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
21068 desired_align, align_bytes);
21069 count_exp = plus_constant (count_exp, -align_bytes);
21070 count -= align_bytes;
21072 if (need_zero_guard
21073 && (count < (unsigned HOST_WIDE_INT) size_needed
21074 || (align_bytes == 0
21075 && count < ((unsigned HOST_WIDE_INT) size_needed
21076 + desired_align - align))))
21078 /* It is possible that we copied enough so the main loop will not
21080 gcc_assert (size_needed > 1);
21081 if (label == NULL_RTX)
21082 label = gen_label_rtx ();
21083 emit_cmp_and_jump_insns (count_exp,
21084 GEN_INT (size_needed),
21085 LTU, 0, counter_mode (count_exp), 1, label);
21086 if (expected_size == -1
21087 || expected_size < (desired_align - align) / 2 + size_needed)
21088 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21090 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21093 if (label && size_needed == 1)
21095 emit_label (label);
21096 LABEL_NUSES (label) = 1;
21098 epilogue_size_needed = 1;
21100 else if (label == NULL_RTX)
21101 epilogue_size_needed = size_needed;
21103 /* Step 3: Main loop. */
21109 gcc_unreachable ();
21111 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21112 count_exp, QImode, 1, expected_size);
21115 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21116 count_exp, Pmode, 1, expected_size);
21118 case unrolled_loop:
21119 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
21120 registers for 4 temporaries anyway. */
21121 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21122 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
21125 case rep_prefix_8_byte:
21126 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21129 case rep_prefix_4_byte:
21130 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21133 case rep_prefix_1_byte:
21134 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21138 /* Adjust properly the offset of src and dest memory for aliasing. */
21139 if (CONST_INT_P (count_exp))
21141 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
21142 (count / size_needed) * size_needed);
21143 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21144 (count / size_needed) * size_needed);
21148 src = change_address (src, BLKmode, srcreg);
21149 dst = change_address (dst, BLKmode, destreg);
21152 /* Step 4: Epilogue to copy the remaining bytes. */
21156 /* When the main loop is done, COUNT_EXP might hold original count,
21157 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21158 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21159 bytes. Compensate if needed. */
21161 if (size_needed < epilogue_size_needed)
21164 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21165 GEN_INT (size_needed - 1), count_exp, 1,
21167 if (tmp != count_exp)
21168 emit_move_insn (count_exp, tmp);
21170 emit_label (label);
21171 LABEL_NUSES (label) = 1;
21174 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21175 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
21176 epilogue_size_needed);
21177 if (jump_around_label)
21178 emit_label (jump_around_label);
21182 /* Helper function for memcpy. For QImode value 0xXY produce
21183 0xXYXYXYXY of wide specified by MODE. This is essentially
21184 a * 0x10101010, but we can do slightly better than
21185 synth_mult by unwinding the sequence by hand on CPUs with
21188 promote_duplicated_reg (enum machine_mode mode, rtx val)
21190 enum machine_mode valmode = GET_MODE (val);
21192 int nops = mode == DImode ? 3 : 2;
21194 gcc_assert (mode == SImode || mode == DImode);
21195 if (val == const0_rtx)
21196 return copy_to_mode_reg (mode, const0_rtx);
21197 if (CONST_INT_P (val))
21199 HOST_WIDE_INT v = INTVAL (val) & 255;
21203 if (mode == DImode)
21204 v |= (v << 16) << 16;
21205 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
21208 if (valmode == VOIDmode)
21210 if (valmode != QImode)
21211 val = gen_lowpart (QImode, val);
21212 if (mode == QImode)
21214 if (!TARGET_PARTIAL_REG_STALL)
21216 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
21217 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
21218 <= (ix86_cost->shift_const + ix86_cost->add) * nops
21219 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
21221 rtx reg = convert_modes (mode, QImode, val, true);
21222 tmp = promote_duplicated_reg (mode, const1_rtx);
21223 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
21228 rtx reg = convert_modes (mode, QImode, val, true);
21230 if (!TARGET_PARTIAL_REG_STALL)
21231 if (mode == SImode)
21232 emit_insn (gen_movsi_insv_1 (reg, reg));
21234 emit_insn (gen_movdi_insv_1 (reg, reg));
21237 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
21238 NULL, 1, OPTAB_DIRECT);
21240 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21242 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
21243 NULL, 1, OPTAB_DIRECT);
21244 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21245 if (mode == SImode)
21247 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
21248 NULL, 1, OPTAB_DIRECT);
21249 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21254 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
21255 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
21256 alignment from ALIGN to DESIRED_ALIGN. */
21258 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
21263 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
21264 promoted_val = promote_duplicated_reg (DImode, val);
21265 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
21266 promoted_val = promote_duplicated_reg (SImode, val);
21267 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
21268 promoted_val = promote_duplicated_reg (HImode, val);
21270 promoted_val = val;
21272 return promoted_val;
21275 /* Expand string clear operation (bzero). Use i386 string operations when
21276 profitable. See expand_movmem comment for explanation of individual
21277 steps performed. */
21279 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
21280 rtx expected_align_exp, rtx expected_size_exp)
21285 rtx jump_around_label = NULL;
21286 HOST_WIDE_INT align = 1;
21287 unsigned HOST_WIDE_INT count = 0;
21288 HOST_WIDE_INT expected_size = -1;
21289 int size_needed = 0, epilogue_size_needed;
21290 int desired_align = 0, align_bytes = 0;
21291 enum stringop_alg alg;
21292 rtx promoted_val = NULL;
21293 bool force_loopy_epilogue = false;
21295 bool need_zero_guard = false;
21297 if (CONST_INT_P (align_exp))
21298 align = INTVAL (align_exp);
21299 /* i386 can do misaligned access on reasonably increased cost. */
21300 if (CONST_INT_P (expected_align_exp)
21301 && INTVAL (expected_align_exp) > align)
21302 align = INTVAL (expected_align_exp);
21303 if (CONST_INT_P (count_exp))
21304 count = expected_size = INTVAL (count_exp);
21305 if (CONST_INT_P (expected_size_exp) && count == 0)
21306 expected_size = INTVAL (expected_size_exp);
21308 /* Make sure we don't need to care about overflow later on. */
21309 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21312 /* Step 0: Decide on preferred algorithm, desired alignment and
21313 size of chunks to be copied by main loop. */
21315 alg = decide_alg (count, expected_size, true, &dynamic_check);
21316 desired_align = decide_alignment (align, alg, expected_size);
21318 if (!TARGET_ALIGN_STRINGOPS)
21319 align = desired_align;
21321 if (alg == libcall)
21323 gcc_assert (alg != no_stringop);
21325 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
21326 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21331 gcc_unreachable ();
21333 need_zero_guard = true;
21334 size_needed = GET_MODE_SIZE (Pmode);
21336 case unrolled_loop:
21337 need_zero_guard = true;
21338 size_needed = GET_MODE_SIZE (Pmode) * 4;
21340 case rep_prefix_8_byte:
21343 case rep_prefix_4_byte:
21346 case rep_prefix_1_byte:
21350 need_zero_guard = true;
21354 epilogue_size_needed = size_needed;
21356 /* Step 1: Prologue guard. */
21358 /* Alignment code needs count to be in register. */
21359 if (CONST_INT_P (count_exp) && desired_align > align)
21361 if (INTVAL (count_exp) > desired_align
21362 && INTVAL (count_exp) > size_needed)
21365 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21366 if (align_bytes <= 0)
21369 align_bytes = desired_align - align_bytes;
21371 if (align_bytes == 0)
21373 enum machine_mode mode = SImode;
21374 if (TARGET_64BIT && (count & ~0xffffffff))
21376 count_exp = force_reg (mode, count_exp);
21379 /* Do the cheap promotion to allow better CSE across the
21380 main loop and epilogue (ie one load of the big constant in the
21381 front of all code. */
21382 if (CONST_INT_P (val_exp))
21383 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21384 desired_align, align);
21385 /* Ensure that alignment prologue won't copy past end of block. */
21386 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21388 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21389 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21390 Make sure it is power of 2. */
21391 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21393 /* To improve performance of small blocks, we jump around the VAL
21394 promoting mode. This mean that if the promoted VAL is not constant,
21395 we might not use it in the epilogue and have to use byte
21397 if (epilogue_size_needed > 2 && !promoted_val)
21398 force_loopy_epilogue = true;
21401 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21403 /* If main algorithm works on QImode, no epilogue is needed.
21404 For small sizes just don't align anything. */
21405 if (size_needed == 1)
21406 desired_align = align;
21413 label = gen_label_rtx ();
21414 emit_cmp_and_jump_insns (count_exp,
21415 GEN_INT (epilogue_size_needed),
21416 LTU, 0, counter_mode (count_exp), 1, label);
21417 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21418 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21420 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21423 if (dynamic_check != -1)
21425 rtx hot_label = gen_label_rtx ();
21426 jump_around_label = gen_label_rtx ();
21427 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21428 LEU, 0, counter_mode (count_exp), 1, hot_label);
21429 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21430 set_storage_via_libcall (dst, count_exp, val_exp, false);
21431 emit_jump (jump_around_label);
21432 emit_label (hot_label);
21435 /* Step 2: Alignment prologue. */
21437 /* Do the expensive promotion once we branched off the small blocks. */
21439 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21440 desired_align, align);
21441 gcc_assert (desired_align >= 1 && align >= 1);
21443 if (desired_align > align)
21445 if (align_bytes == 0)
21447 /* Except for the first move in epilogue, we no longer know
21448 constant offset in aliasing info. It don't seems to worth
21449 the pain to maintain it for the first move, so throw away
21451 dst = change_address (dst, BLKmode, destreg);
21452 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21457 /* If we know how many bytes need to be stored before dst is
21458 sufficiently aligned, maintain aliasing info accurately. */
21459 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21460 desired_align, align_bytes);
21461 count_exp = plus_constant (count_exp, -align_bytes);
21462 count -= align_bytes;
21464 if (need_zero_guard
21465 && (count < (unsigned HOST_WIDE_INT) size_needed
21466 || (align_bytes == 0
21467 && count < ((unsigned HOST_WIDE_INT) size_needed
21468 + desired_align - align))))
21470 /* It is possible that we copied enough so the main loop will not
21472 gcc_assert (size_needed > 1);
21473 if (label == NULL_RTX)
21474 label = gen_label_rtx ();
21475 emit_cmp_and_jump_insns (count_exp,
21476 GEN_INT (size_needed),
21477 LTU, 0, counter_mode (count_exp), 1, label);
21478 if (expected_size == -1
21479 || expected_size < (desired_align - align) / 2 + size_needed)
21480 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21482 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21485 if (label && size_needed == 1)
21487 emit_label (label);
21488 LABEL_NUSES (label) = 1;
21490 promoted_val = val_exp;
21491 epilogue_size_needed = 1;
21493 else if (label == NULL_RTX)
21494 epilogue_size_needed = size_needed;
21496 /* Step 3: Main loop. */
21502 gcc_unreachable ();
21504 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21505 count_exp, QImode, 1, expected_size);
21508 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21509 count_exp, Pmode, 1, expected_size);
21511 case unrolled_loop:
21512 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21513 count_exp, Pmode, 4, expected_size);
21515 case rep_prefix_8_byte:
21516 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21519 case rep_prefix_4_byte:
21520 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21523 case rep_prefix_1_byte:
21524 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21528 /* Adjust properly the offset of src and dest memory for aliasing. */
21529 if (CONST_INT_P (count_exp))
21530 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21531 (count / size_needed) * size_needed);
21533 dst = change_address (dst, BLKmode, destreg);
21535 /* Step 4: Epilogue to copy the remaining bytes. */
21539 /* When the main loop is done, COUNT_EXP might hold original count,
21540 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21541 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21542 bytes. Compensate if needed. */
21544 if (size_needed < epilogue_size_needed)
21547 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21548 GEN_INT (size_needed - 1), count_exp, 1,
21550 if (tmp != count_exp)
21551 emit_move_insn (count_exp, tmp);
21553 emit_label (label);
21554 LABEL_NUSES (label) = 1;
21557 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21559 if (force_loopy_epilogue)
21560 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21561 epilogue_size_needed);
21563 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21564 epilogue_size_needed);
21566 if (jump_around_label)
21567 emit_label (jump_around_label);
21571 /* Expand the appropriate insns for doing strlen if not just doing
21574 out = result, initialized with the start address
21575 align_rtx = alignment of the address.
21576 scratch = scratch register, initialized with the startaddress when
21577 not aligned, otherwise undefined
21579 This is just the body. It needs the initializations mentioned above and
21580 some address computing at the end. These things are done in i386.md. */
21583 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21587 rtx align_2_label = NULL_RTX;
21588 rtx align_3_label = NULL_RTX;
21589 rtx align_4_label = gen_label_rtx ();
21590 rtx end_0_label = gen_label_rtx ();
21592 rtx tmpreg = gen_reg_rtx (SImode);
21593 rtx scratch = gen_reg_rtx (SImode);
21597 if (CONST_INT_P (align_rtx))
21598 align = INTVAL (align_rtx);
21600 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21602 /* Is there a known alignment and is it less than 4? */
21605 rtx scratch1 = gen_reg_rtx (Pmode);
21606 emit_move_insn (scratch1, out);
21607 /* Is there a known alignment and is it not 2? */
21610 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21611 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21613 /* Leave just the 3 lower bits. */
21614 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21615 NULL_RTX, 0, OPTAB_WIDEN);
21617 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21618 Pmode, 1, align_4_label);
21619 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21620 Pmode, 1, align_2_label);
21621 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21622 Pmode, 1, align_3_label);
21626 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21627 check if is aligned to 4 - byte. */
21629 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21630 NULL_RTX, 0, OPTAB_WIDEN);
21632 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21633 Pmode, 1, align_4_label);
21636 mem = change_address (src, QImode, out);
21638 /* Now compare the bytes. */
21640 /* Compare the first n unaligned byte on a byte per byte basis. */
21641 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21642 QImode, 1, end_0_label);
21644 /* Increment the address. */
21645 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21647 /* Not needed with an alignment of 2 */
21650 emit_label (align_2_label);
21652 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21655 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21657 emit_label (align_3_label);
21660 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21663 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21666 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21667 align this loop. It gives only huge programs, but does not help to
21669 emit_label (align_4_label);
21671 mem = change_address (src, SImode, out);
21672 emit_move_insn (scratch, mem);
21673 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21675 /* This formula yields a nonzero result iff one of the bytes is zero.
21676 This saves three branches inside loop and many cycles. */
21678 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21679 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21680 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21681 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21682 gen_int_mode (0x80808080, SImode)));
21683 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21688 rtx reg = gen_reg_rtx (SImode);
21689 rtx reg2 = gen_reg_rtx (Pmode);
21690 emit_move_insn (reg, tmpreg);
21691 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21693 /* If zero is not in the first two bytes, move two bytes forward. */
21694 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21695 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21696 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21697 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21698 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21701 /* Emit lea manually to avoid clobbering of flags. */
21702 emit_insn (gen_rtx_SET (SImode, reg2,
21703 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21705 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21706 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21707 emit_insn (gen_rtx_SET (VOIDmode, out,
21708 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21714 rtx end_2_label = gen_label_rtx ();
21715 /* Is zero in the first two bytes? */
21717 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21718 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21719 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21720 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21721 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21723 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21724 JUMP_LABEL (tmp) = end_2_label;
21726 /* Not in the first two. Move two bytes forward. */
21727 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21728 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21730 emit_label (end_2_label);
21734 /* Avoid branch in fixing the byte. */
21735 tmpreg = gen_lowpart (QImode, tmpreg);
21736 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21737 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21738 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21739 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21741 emit_label (end_0_label);
21744 /* Expand strlen. */
21747 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21749 rtx addr, scratch1, scratch2, scratch3, scratch4;
21751 /* The generic case of strlen expander is long. Avoid it's
21752 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21754 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21755 && !TARGET_INLINE_ALL_STRINGOPS
21756 && !optimize_insn_for_size_p ()
21757 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21760 addr = force_reg (Pmode, XEXP (src, 0));
21761 scratch1 = gen_reg_rtx (Pmode);
21763 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21764 && !optimize_insn_for_size_p ())
21766 /* Well it seems that some optimizer does not combine a call like
21767 foo(strlen(bar), strlen(bar));
21768 when the move and the subtraction is done here. It does calculate
21769 the length just once when these instructions are done inside of
21770 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21771 often used and I use one fewer register for the lifetime of
21772 output_strlen_unroll() this is better. */
21774 emit_move_insn (out, addr);
21776 ix86_expand_strlensi_unroll_1 (out, src, align);
21778 /* strlensi_unroll_1 returns the address of the zero at the end of
21779 the string, like memchr(), so compute the length by subtracting
21780 the start address. */
21781 emit_insn (ix86_gen_sub3 (out, out, addr));
21787 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21788 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21791 scratch2 = gen_reg_rtx (Pmode);
21792 scratch3 = gen_reg_rtx (Pmode);
21793 scratch4 = force_reg (Pmode, constm1_rtx);
21795 emit_move_insn (scratch3, addr);
21796 eoschar = force_reg (QImode, eoschar);
21798 src = replace_equiv_address_nv (src, scratch3);
21800 /* If .md starts supporting :P, this can be done in .md. */
21801 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21802 scratch4), UNSPEC_SCAS);
21803 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21804 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21805 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21810 /* For given symbol (function) construct code to compute address of it's PLT
21811 entry in large x86-64 PIC model. */
21813 construct_plt_address (rtx symbol)
21815 rtx tmp = gen_reg_rtx (Pmode);
21816 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21818 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21819 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21821 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21822 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21827 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21829 rtx pop, int sibcall)
21831 rtx use = NULL, call;
21833 if (pop == const0_rtx)
21835 gcc_assert (!TARGET_64BIT || !pop);
21837 if (TARGET_MACHO && !TARGET_64BIT)
21840 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21841 fnaddr = machopic_indirect_call_target (fnaddr);
21846 /* Static functions and indirect calls don't need the pic register. */
21847 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21848 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21849 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21850 use_reg (&use, pic_offset_table_rtx);
21853 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21855 rtx al = gen_rtx_REG (QImode, AX_REG);
21856 emit_move_insn (al, callarg2);
21857 use_reg (&use, al);
21860 if (ix86_cmodel == CM_LARGE_PIC
21862 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21863 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21864 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21866 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21867 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21869 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
21870 fnaddr = gen_rtx_MEM (QImode, fnaddr);
21873 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21875 call = gen_rtx_SET (VOIDmode, retval, call);
21878 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21879 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21880 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
21883 && ix86_cfun_abi () == MS_ABI
21884 && (!callarg2 || INTVAL (callarg2) != -2))
21886 /* We need to represent that SI and DI registers are clobbered
21888 static int clobbered_registers[] = {
21889 XMM6_REG, XMM7_REG, XMM8_REG,
21890 XMM9_REG, XMM10_REG, XMM11_REG,
21891 XMM12_REG, XMM13_REG, XMM14_REG,
21892 XMM15_REG, SI_REG, DI_REG
21895 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
21896 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
21897 UNSPEC_MS_TO_SYSV_CALL);
21901 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
21902 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
21905 (SSE_REGNO_P (clobbered_registers[i])
21907 clobbered_registers[i]));
21909 call = gen_rtx_PARALLEL (VOIDmode,
21910 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
21914 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
21915 if (TARGET_VZEROUPPER)
21920 if (cfun->machine->callee_pass_avx256_p)
21922 if (cfun->machine->callee_return_avx256_p)
21923 avx256 = callee_return_pass_avx256;
21925 avx256 = callee_pass_avx256;
21927 else if (cfun->machine->callee_return_avx256_p)
21928 avx256 = callee_return_avx256;
21930 avx256 = call_no_avx256;
21932 if (reload_completed)
21933 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
21936 unspec = gen_rtx_UNSPEC (VOIDmode,
21937 gen_rtvec (1, GEN_INT (avx256)),
21938 UNSPEC_CALL_NEEDS_VZEROUPPER);
21939 call = gen_rtx_PARALLEL (VOIDmode,
21940 gen_rtvec (2, call, unspec));
21944 call = emit_call_insn (call);
21946 CALL_INSN_FUNCTION_USAGE (call) = use;
21952 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
21954 rtx call = XVECEXP (PATTERN (insn), 0, 0);
21955 emit_insn (gen_avx_vzeroupper (vzeroupper));
21956 emit_call_insn (call);
21959 /* Output the assembly for a call instruction. */
21962 ix86_output_call_insn (rtx insn, rtx call_op, int addr_op)
21964 bool direct_p = constant_call_address_operand (call_op, Pmode);
21965 bool seh_nop_p = false;
21967 gcc_assert (addr_op == 0 || addr_op == 1);
21969 if (SIBLING_CALL_P (insn))
21972 return addr_op ? "jmp\t%P1" : "jmp\t%P0";
21973 /* SEH epilogue detection requires the indirect branch case
21974 to include REX.W. */
21975 else if (TARGET_SEH)
21976 return addr_op ? "rex.W jmp %A1" : "rex.W jmp %A0";
21978 return addr_op ? "jmp\t%A1" : "jmp\t%A0";
21981 /* SEH unwinding can require an extra nop to be emitted in several
21982 circumstances. Determine if we have one of those. */
21987 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
21989 /* If we get to another real insn, we don't need the nop. */
21993 /* If we get to the epilogue note, prevent a catch region from
21994 being adjacent to the standard epilogue sequence. If non-
21995 call-exceptions, we'll have done this during epilogue emission. */
21996 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
21997 && !flag_non_call_exceptions
21998 && !can_throw_internal (insn))
22005 /* If we didn't find a real insn following the call, prevent the
22006 unwinder from looking into the next function. */
22014 return addr_op ? "call\t%P1\n\tnop" : "call\t%P0\n\tnop";
22016 return addr_op ? "call\t%P1" : "call\t%P0";
22021 return addr_op ? "call\t%A1\n\tnop" : "call\t%A0\n\tnop";
22023 return addr_op ? "call\t%A1" : "call\t%A0";
22027 /* Clear stack slot assignments remembered from previous functions.
22028 This is called from INIT_EXPANDERS once before RTL is emitted for each
22031 static struct machine_function *
22032 ix86_init_machine_status (void)
22034 struct machine_function *f;
22036 f = ggc_alloc_cleared_machine_function ();
22037 f->use_fast_prologue_epilogue_nregs = -1;
22038 f->tls_descriptor_call_expanded_p = 0;
22039 f->call_abi = ix86_abi;
22044 /* Return a MEM corresponding to a stack slot with mode MODE.
22045 Allocate a new slot if necessary.
22047 The RTL for a function can have several slots available: N is
22048 which slot to use. */
22051 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
22053 struct stack_local_entry *s;
22055 gcc_assert (n < MAX_386_STACK_LOCALS);
22057 /* Virtual slot is valid only before vregs are instantiated. */
22058 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
22060 for (s = ix86_stack_locals; s; s = s->next)
22061 if (s->mode == mode && s->n == n)
22062 return copy_rtx (s->rtl);
22064 s = ggc_alloc_stack_local_entry ();
22067 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
22069 s->next = ix86_stack_locals;
22070 ix86_stack_locals = s;
22074 /* Construct the SYMBOL_REF for the tls_get_addr function. */
22076 static GTY(()) rtx ix86_tls_symbol;
22078 ix86_tls_get_addr (void)
22081 if (!ix86_tls_symbol)
22083 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
22084 (TARGET_ANY_GNU_TLS
22086 ? "___tls_get_addr"
22087 : "__tls_get_addr");
22090 return ix86_tls_symbol;
22093 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
22095 static GTY(()) rtx ix86_tls_module_base_symbol;
22097 ix86_tls_module_base (void)
22100 if (!ix86_tls_module_base_symbol)
22102 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
22103 "_TLS_MODULE_BASE_");
22104 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
22105 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
22108 return ix86_tls_module_base_symbol;
22111 /* Calculate the length of the memory address in the instruction
22112 encoding. Does not include the one-byte modrm, opcode, or prefix. */
22115 memory_address_length (rtx addr)
22117 struct ix86_address parts;
22118 rtx base, index, disp;
22122 if (GET_CODE (addr) == PRE_DEC
22123 || GET_CODE (addr) == POST_INC
22124 || GET_CODE (addr) == PRE_MODIFY
22125 || GET_CODE (addr) == POST_MODIFY)
22128 ok = ix86_decompose_address (addr, &parts);
22131 if (parts.base && GET_CODE (parts.base) == SUBREG)
22132 parts.base = SUBREG_REG (parts.base);
22133 if (parts.index && GET_CODE (parts.index) == SUBREG)
22134 parts.index = SUBREG_REG (parts.index);
22137 index = parts.index;
22142 - esp as the base always wants an index,
22143 - ebp as the base always wants a displacement,
22144 - r12 as the base always wants an index,
22145 - r13 as the base always wants a displacement. */
22147 /* Register Indirect. */
22148 if (base && !index && !disp)
22150 /* esp (for its index) and ebp (for its displacement) need
22151 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
22154 && (addr == arg_pointer_rtx
22155 || addr == frame_pointer_rtx
22156 || REGNO (addr) == SP_REG
22157 || REGNO (addr) == BP_REG
22158 || REGNO (addr) == R12_REG
22159 || REGNO (addr) == R13_REG))
22163 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
22164 is not disp32, but disp32(%rip), so for disp32
22165 SIB byte is needed, unless print_operand_address
22166 optimizes it into disp32(%rip) or (%rip) is implied
22168 else if (disp && !base && !index)
22175 if (GET_CODE (disp) == CONST)
22176 symbol = XEXP (disp, 0);
22177 if (GET_CODE (symbol) == PLUS
22178 && CONST_INT_P (XEXP (symbol, 1)))
22179 symbol = XEXP (symbol, 0);
22181 if (GET_CODE (symbol) != LABEL_REF
22182 && (GET_CODE (symbol) != SYMBOL_REF
22183 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
22184 && (GET_CODE (symbol) != UNSPEC
22185 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
22186 && XINT (symbol, 1) != UNSPEC_PCREL
22187 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
22194 /* Find the length of the displacement constant. */
22197 if (base && satisfies_constraint_K (disp))
22202 /* ebp always wants a displacement. Similarly r13. */
22203 else if (base && REG_P (base)
22204 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
22207 /* An index requires the two-byte modrm form.... */
22209 /* ...like esp (or r12), which always wants an index. */
22210 || base == arg_pointer_rtx
22211 || base == frame_pointer_rtx
22212 || (base && REG_P (base)
22213 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
22230 /* Compute default value for "length_immediate" attribute. When SHORTFORM
22231 is set, expect that insn have 8bit immediate alternative. */
22233 ix86_attr_length_immediate_default (rtx insn, int shortform)
22237 extract_insn_cached (insn);
22238 for (i = recog_data.n_operands - 1; i >= 0; --i)
22239 if (CONSTANT_P (recog_data.operand[i]))
22241 enum attr_mode mode = get_attr_mode (insn);
22244 if (shortform && CONST_INT_P (recog_data.operand[i]))
22246 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
22253 ival = trunc_int_for_mode (ival, HImode);
22256 ival = trunc_int_for_mode (ival, SImode);
22261 if (IN_RANGE (ival, -128, 127))
22278 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
22283 fatal_insn ("unknown insn mode", insn);
22288 /* Compute default value for "length_address" attribute. */
22290 ix86_attr_length_address_default (rtx insn)
22294 if (get_attr_type (insn) == TYPE_LEA)
22296 rtx set = PATTERN (insn), addr;
22298 if (GET_CODE (set) == PARALLEL)
22299 set = XVECEXP (set, 0, 0);
22301 gcc_assert (GET_CODE (set) == SET);
22303 addr = SET_SRC (set);
22304 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
22306 if (GET_CODE (addr) == ZERO_EXTEND)
22307 addr = XEXP (addr, 0);
22308 if (GET_CODE (addr) == SUBREG)
22309 addr = SUBREG_REG (addr);
22312 return memory_address_length (addr);
22315 extract_insn_cached (insn);
22316 for (i = recog_data.n_operands - 1; i >= 0; --i)
22317 if (MEM_P (recog_data.operand[i]))
22319 constrain_operands_cached (reload_completed);
22320 if (which_alternative != -1)
22322 const char *constraints = recog_data.constraints[i];
22323 int alt = which_alternative;
22325 while (*constraints == '=' || *constraints == '+')
22328 while (*constraints++ != ',')
22330 /* Skip ignored operands. */
22331 if (*constraints == 'X')
22334 return memory_address_length (XEXP (recog_data.operand[i], 0));
22339 /* Compute default value for "length_vex" attribute. It includes
22340 2 or 3 byte VEX prefix and 1 opcode byte. */
22343 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
22348 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22349 byte VEX prefix. */
22350 if (!has_0f_opcode || has_vex_w)
22353 /* We can always use 2 byte VEX prefix in 32bit. */
22357 extract_insn_cached (insn);
22359 for (i = recog_data.n_operands - 1; i >= 0; --i)
22360 if (REG_P (recog_data.operand[i]))
22362 /* REX.W bit uses 3 byte VEX prefix. */
22363 if (GET_MODE (recog_data.operand[i]) == DImode
22364 && GENERAL_REG_P (recog_data.operand[i]))
22369 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22370 if (MEM_P (recog_data.operand[i])
22371 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22378 /* Return the maximum number of instructions a cpu can issue. */
22381 ix86_issue_rate (void)
22385 case PROCESSOR_PENTIUM:
22386 case PROCESSOR_ATOM:
22390 case PROCESSOR_PENTIUMPRO:
22391 case PROCESSOR_PENTIUM4:
22392 case PROCESSOR_CORE2_32:
22393 case PROCESSOR_CORE2_64:
22394 case PROCESSOR_COREI7_32:
22395 case PROCESSOR_COREI7_64:
22396 case PROCESSOR_ATHLON:
22398 case PROCESSOR_AMDFAM10:
22399 case PROCESSOR_NOCONA:
22400 case PROCESSOR_GENERIC32:
22401 case PROCESSOR_GENERIC64:
22402 case PROCESSOR_BDVER1:
22403 case PROCESSOR_BTVER1:
22411 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
22412 by DEP_INSN and nothing set by DEP_INSN. */
22415 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22419 /* Simplify the test for uninteresting insns. */
22420 if (insn_type != TYPE_SETCC
22421 && insn_type != TYPE_ICMOV
22422 && insn_type != TYPE_FCMOV
22423 && insn_type != TYPE_IBR)
22426 if ((set = single_set (dep_insn)) != 0)
22428 set = SET_DEST (set);
22431 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22432 && XVECLEN (PATTERN (dep_insn), 0) == 2
22433 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22434 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22436 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22437 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22442 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22445 /* This test is true if the dependent insn reads the flags but
22446 not any other potentially set register. */
22447 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22450 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22456 /* Return true iff USE_INSN has a memory address with operands set by
22460 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22463 extract_insn_cached (use_insn);
22464 for (i = recog_data.n_operands - 1; i >= 0; --i)
22465 if (MEM_P (recog_data.operand[i]))
22467 rtx addr = XEXP (recog_data.operand[i], 0);
22468 return modified_in_p (addr, set_insn) != 0;
22474 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22476 enum attr_type insn_type, dep_insn_type;
22477 enum attr_memory memory;
22479 int dep_insn_code_number;
22481 /* Anti and output dependencies have zero cost on all CPUs. */
22482 if (REG_NOTE_KIND (link) != 0)
22485 dep_insn_code_number = recog_memoized (dep_insn);
22487 /* If we can't recognize the insns, we can't really do anything. */
22488 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22491 insn_type = get_attr_type (insn);
22492 dep_insn_type = get_attr_type (dep_insn);
22496 case PROCESSOR_PENTIUM:
22497 /* Address Generation Interlock adds a cycle of latency. */
22498 if (insn_type == TYPE_LEA)
22500 rtx addr = PATTERN (insn);
22502 if (GET_CODE (addr) == PARALLEL)
22503 addr = XVECEXP (addr, 0, 0);
22505 gcc_assert (GET_CODE (addr) == SET);
22507 addr = SET_SRC (addr);
22508 if (modified_in_p (addr, dep_insn))
22511 else if (ix86_agi_dependent (dep_insn, insn))
22514 /* ??? Compares pair with jump/setcc. */
22515 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22518 /* Floating point stores require value to be ready one cycle earlier. */
22519 if (insn_type == TYPE_FMOV
22520 && get_attr_memory (insn) == MEMORY_STORE
22521 && !ix86_agi_dependent (dep_insn, insn))
22525 case PROCESSOR_PENTIUMPRO:
22526 memory = get_attr_memory (insn);
22528 /* INT->FP conversion is expensive. */
22529 if (get_attr_fp_int_src (dep_insn))
22532 /* There is one cycle extra latency between an FP op and a store. */
22533 if (insn_type == TYPE_FMOV
22534 && (set = single_set (dep_insn)) != NULL_RTX
22535 && (set2 = single_set (insn)) != NULL_RTX
22536 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22537 && MEM_P (SET_DEST (set2)))
22540 /* Show ability of reorder buffer to hide latency of load by executing
22541 in parallel with previous instruction in case
22542 previous instruction is not needed to compute the address. */
22543 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22544 && !ix86_agi_dependent (dep_insn, insn))
22546 /* Claim moves to take one cycle, as core can issue one load
22547 at time and the next load can start cycle later. */
22548 if (dep_insn_type == TYPE_IMOV
22549 || dep_insn_type == TYPE_FMOV)
22557 memory = get_attr_memory (insn);
22559 /* The esp dependency is resolved before the instruction is really
22561 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22562 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22565 /* INT->FP conversion is expensive. */
22566 if (get_attr_fp_int_src (dep_insn))
22569 /* Show ability of reorder buffer to hide latency of load by executing
22570 in parallel with previous instruction in case
22571 previous instruction is not needed to compute the address. */
22572 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22573 && !ix86_agi_dependent (dep_insn, insn))
22575 /* Claim moves to take one cycle, as core can issue one load
22576 at time and the next load can start cycle later. */
22577 if (dep_insn_type == TYPE_IMOV
22578 || dep_insn_type == TYPE_FMOV)
22587 case PROCESSOR_ATHLON:
22589 case PROCESSOR_AMDFAM10:
22590 case PROCESSOR_BDVER1:
22591 case PROCESSOR_BTVER1:
22592 case PROCESSOR_ATOM:
22593 case PROCESSOR_GENERIC32:
22594 case PROCESSOR_GENERIC64:
22595 memory = get_attr_memory (insn);
22597 /* Show ability of reorder buffer to hide latency of load by executing
22598 in parallel with previous instruction in case
22599 previous instruction is not needed to compute the address. */
22600 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22601 && !ix86_agi_dependent (dep_insn, insn))
22603 enum attr_unit unit = get_attr_unit (insn);
22606 /* Because of the difference between the length of integer and
22607 floating unit pipeline preparation stages, the memory operands
22608 for floating point are cheaper.
22610 ??? For Athlon it the difference is most probably 2. */
22611 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22614 loadcost = TARGET_ATHLON ? 2 : 0;
22616 if (cost >= loadcost)
22629 /* How many alternative schedules to try. This should be as wide as the
22630 scheduling freedom in the DFA, but no wider. Making this value too
22631 large results extra work for the scheduler. */
22634 ia32_multipass_dfa_lookahead (void)
22638 case PROCESSOR_PENTIUM:
22641 case PROCESSOR_PENTIUMPRO:
22645 case PROCESSOR_CORE2_32:
22646 case PROCESSOR_CORE2_64:
22647 case PROCESSOR_COREI7_32:
22648 case PROCESSOR_COREI7_64:
22649 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22650 as many instructions can be executed on a cycle, i.e.,
22651 issue_rate. I wonder why tuning for many CPUs does not do this. */
22652 return ix86_issue_rate ();
22661 /* Model decoder of Core 2/i7.
22662 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22663 track the instruction fetch block boundaries and make sure that long
22664 (9+ bytes) instructions are assigned to D0. */
22666 /* Maximum length of an insn that can be handled by
22667 a secondary decoder unit. '8' for Core 2/i7. */
22668 static int core2i7_secondary_decoder_max_insn_size;
22670 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22671 '16' for Core 2/i7. */
22672 static int core2i7_ifetch_block_size;
22674 /* Maximum number of instructions decoder can handle per cycle.
22675 '6' for Core 2/i7. */
22676 static int core2i7_ifetch_block_max_insns;
22678 typedef struct ix86_first_cycle_multipass_data_ *
22679 ix86_first_cycle_multipass_data_t;
22680 typedef const struct ix86_first_cycle_multipass_data_ *
22681 const_ix86_first_cycle_multipass_data_t;
22683 /* A variable to store target state across calls to max_issue within
22685 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22686 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22688 /* Initialize DATA. */
22690 core2i7_first_cycle_multipass_init (void *_data)
22692 ix86_first_cycle_multipass_data_t data
22693 = (ix86_first_cycle_multipass_data_t) _data;
22695 data->ifetch_block_len = 0;
22696 data->ifetch_block_n_insns = 0;
22697 data->ready_try_change = NULL;
22698 data->ready_try_change_size = 0;
22701 /* Advancing the cycle; reset ifetch block counts. */
22703 core2i7_dfa_post_advance_cycle (void)
22705 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22707 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22709 data->ifetch_block_len = 0;
22710 data->ifetch_block_n_insns = 0;
22713 static int min_insn_size (rtx);
22715 /* Filter out insns from ready_try that the core will not be able to issue
22716 on current cycle due to decoder. */
22718 core2i7_first_cycle_multipass_filter_ready_try
22719 (const_ix86_first_cycle_multipass_data_t data,
22720 char *ready_try, int n_ready, bool first_cycle_insn_p)
22727 if (ready_try[n_ready])
22730 insn = get_ready_element (n_ready);
22731 insn_size = min_insn_size (insn);
22733 if (/* If this is a too long an insn for a secondary decoder ... */
22734 (!first_cycle_insn_p
22735 && insn_size > core2i7_secondary_decoder_max_insn_size)
22736 /* ... or it would not fit into the ifetch block ... */
22737 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22738 /* ... or the decoder is full already ... */
22739 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22740 /* ... mask the insn out. */
22742 ready_try[n_ready] = 1;
22744 if (data->ready_try_change)
22745 SET_BIT (data->ready_try_change, n_ready);
22750 /* Prepare for a new round of multipass lookahead scheduling. */
22752 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22753 bool first_cycle_insn_p)
22755 ix86_first_cycle_multipass_data_t data
22756 = (ix86_first_cycle_multipass_data_t) _data;
22757 const_ix86_first_cycle_multipass_data_t prev_data
22758 = ix86_first_cycle_multipass_data;
22760 /* Restore the state from the end of the previous round. */
22761 data->ifetch_block_len = prev_data->ifetch_block_len;
22762 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22764 /* Filter instructions that cannot be issued on current cycle due to
22765 decoder restrictions. */
22766 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22767 first_cycle_insn_p);
22770 /* INSN is being issued in current solution. Account for its impact on
22771 the decoder model. */
22773 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22774 rtx insn, const void *_prev_data)
22776 ix86_first_cycle_multipass_data_t data
22777 = (ix86_first_cycle_multipass_data_t) _data;
22778 const_ix86_first_cycle_multipass_data_t prev_data
22779 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22781 int insn_size = min_insn_size (insn);
22783 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22784 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22785 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22786 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22788 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22789 if (!data->ready_try_change)
22791 data->ready_try_change = sbitmap_alloc (n_ready);
22792 data->ready_try_change_size = n_ready;
22794 else if (data->ready_try_change_size < n_ready)
22796 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22798 data->ready_try_change_size = n_ready;
22800 sbitmap_zero (data->ready_try_change);
22802 /* Filter out insns from ready_try that the core will not be able to issue
22803 on current cycle due to decoder. */
22804 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22808 /* Revert the effect on ready_try. */
22810 core2i7_first_cycle_multipass_backtrack (const void *_data,
22812 int n_ready ATTRIBUTE_UNUSED)
22814 const_ix86_first_cycle_multipass_data_t data
22815 = (const_ix86_first_cycle_multipass_data_t) _data;
22816 unsigned int i = 0;
22817 sbitmap_iterator sbi;
22819 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22820 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22826 /* Save the result of multipass lookahead scheduling for the next round. */
22828 core2i7_first_cycle_multipass_end (const void *_data)
22830 const_ix86_first_cycle_multipass_data_t data
22831 = (const_ix86_first_cycle_multipass_data_t) _data;
22832 ix86_first_cycle_multipass_data_t next_data
22833 = ix86_first_cycle_multipass_data;
22837 next_data->ifetch_block_len = data->ifetch_block_len;
22838 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22842 /* Deallocate target data. */
22844 core2i7_first_cycle_multipass_fini (void *_data)
22846 ix86_first_cycle_multipass_data_t data
22847 = (ix86_first_cycle_multipass_data_t) _data;
22849 if (data->ready_try_change)
22851 sbitmap_free (data->ready_try_change);
22852 data->ready_try_change = NULL;
22853 data->ready_try_change_size = 0;
22857 /* Prepare for scheduling pass. */
22859 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22860 int verbose ATTRIBUTE_UNUSED,
22861 int max_uid ATTRIBUTE_UNUSED)
22863 /* Install scheduling hooks for current CPU. Some of these hooks are used
22864 in time-critical parts of the scheduler, so we only set them up when
22865 they are actually used. */
22868 case PROCESSOR_CORE2_32:
22869 case PROCESSOR_CORE2_64:
22870 case PROCESSOR_COREI7_32:
22871 case PROCESSOR_COREI7_64:
22872 targetm.sched.dfa_post_advance_cycle
22873 = core2i7_dfa_post_advance_cycle;
22874 targetm.sched.first_cycle_multipass_init
22875 = core2i7_first_cycle_multipass_init;
22876 targetm.sched.first_cycle_multipass_begin
22877 = core2i7_first_cycle_multipass_begin;
22878 targetm.sched.first_cycle_multipass_issue
22879 = core2i7_first_cycle_multipass_issue;
22880 targetm.sched.first_cycle_multipass_backtrack
22881 = core2i7_first_cycle_multipass_backtrack;
22882 targetm.sched.first_cycle_multipass_end
22883 = core2i7_first_cycle_multipass_end;
22884 targetm.sched.first_cycle_multipass_fini
22885 = core2i7_first_cycle_multipass_fini;
22887 /* Set decoder parameters. */
22888 core2i7_secondary_decoder_max_insn_size = 8;
22889 core2i7_ifetch_block_size = 16;
22890 core2i7_ifetch_block_max_insns = 6;
22894 targetm.sched.dfa_post_advance_cycle = NULL;
22895 targetm.sched.first_cycle_multipass_init = NULL;
22896 targetm.sched.first_cycle_multipass_begin = NULL;
22897 targetm.sched.first_cycle_multipass_issue = NULL;
22898 targetm.sched.first_cycle_multipass_backtrack = NULL;
22899 targetm.sched.first_cycle_multipass_end = NULL;
22900 targetm.sched.first_cycle_multipass_fini = NULL;
22906 /* Compute the alignment given to a constant that is being placed in memory.
22907 EXP is the constant and ALIGN is the alignment that the object would
22909 The value of this function is used instead of that alignment to align
22913 ix86_constant_alignment (tree exp, int align)
22915 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
22916 || TREE_CODE (exp) == INTEGER_CST)
22918 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
22920 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
22923 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
22924 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
22925 return BITS_PER_WORD;
22930 /* Compute the alignment for a static variable.
22931 TYPE is the data type, and ALIGN is the alignment that
22932 the object would ordinarily have. The value of this function is used
22933 instead of that alignment to align the object. */
22936 ix86_data_alignment (tree type, int align)
22938 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
22940 if (AGGREGATE_TYPE_P (type)
22941 && TYPE_SIZE (type)
22942 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22943 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
22944 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
22945 && align < max_align)
22948 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22949 to 16byte boundary. */
22952 if (AGGREGATE_TYPE_P (type)
22953 && TYPE_SIZE (type)
22954 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22955 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
22956 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22960 if (TREE_CODE (type) == ARRAY_TYPE)
22962 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22964 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22967 else if (TREE_CODE (type) == COMPLEX_TYPE)
22970 if (TYPE_MODE (type) == DCmode && align < 64)
22972 if ((TYPE_MODE (type) == XCmode
22973 || TYPE_MODE (type) == TCmode) && align < 128)
22976 else if ((TREE_CODE (type) == RECORD_TYPE
22977 || TREE_CODE (type) == UNION_TYPE
22978 || TREE_CODE (type) == QUAL_UNION_TYPE)
22979 && TYPE_FIELDS (type))
22981 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22983 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22986 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22987 || TREE_CODE (type) == INTEGER_TYPE)
22989 if (TYPE_MODE (type) == DFmode && align < 64)
22991 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22998 /* Compute the alignment for a local variable or a stack slot. EXP is
22999 the data type or decl itself, MODE is the widest mode available and
23000 ALIGN is the alignment that the object would ordinarily have. The
23001 value of this macro is used instead of that alignment to align the
23005 ix86_local_alignment (tree exp, enum machine_mode mode,
23006 unsigned int align)
23010 if (exp && DECL_P (exp))
23012 type = TREE_TYPE (exp);
23021 /* Don't do dynamic stack realignment for long long objects with
23022 -mpreferred-stack-boundary=2. */
23025 && ix86_preferred_stack_boundary < 64
23026 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
23027 && (!type || !TYPE_USER_ALIGN (type))
23028 && (!decl || !DECL_USER_ALIGN (decl)))
23031 /* If TYPE is NULL, we are allocating a stack slot for caller-save
23032 register in MODE. We will return the largest alignment of XF
23036 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
23037 align = GET_MODE_ALIGNMENT (DFmode);
23041 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23042 to 16byte boundary. Exact wording is:
23044 An array uses the same alignment as its elements, except that a local or
23045 global array variable of length at least 16 bytes or
23046 a C99 variable-length array variable always has alignment of at least 16 bytes.
23048 This was added to allow use of aligned SSE instructions at arrays. This
23049 rule is meant for static storage (where compiler can not do the analysis
23050 by itself). We follow it for automatic variables only when convenient.
23051 We fully control everything in the function compiled and functions from
23052 other unit can not rely on the alignment.
23054 Exclude va_list type. It is the common case of local array where
23055 we can not benefit from the alignment. */
23056 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
23059 if (AGGREGATE_TYPE_P (type)
23060 && (va_list_type_node == NULL_TREE
23061 || (TYPE_MAIN_VARIANT (type)
23062 != TYPE_MAIN_VARIANT (va_list_type_node)))
23063 && TYPE_SIZE (type)
23064 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23065 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
23066 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23069 if (TREE_CODE (type) == ARRAY_TYPE)
23071 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23073 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23076 else if (TREE_CODE (type) == COMPLEX_TYPE)
23078 if (TYPE_MODE (type) == DCmode && align < 64)
23080 if ((TYPE_MODE (type) == XCmode
23081 || TYPE_MODE (type) == TCmode) && align < 128)
23084 else if ((TREE_CODE (type) == RECORD_TYPE
23085 || TREE_CODE (type) == UNION_TYPE
23086 || TREE_CODE (type) == QUAL_UNION_TYPE)
23087 && TYPE_FIELDS (type))
23089 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23091 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23094 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23095 || TREE_CODE (type) == INTEGER_TYPE)
23098 if (TYPE_MODE (type) == DFmode && align < 64)
23100 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23106 /* Compute the minimum required alignment for dynamic stack realignment
23107 purposes for a local variable, parameter or a stack slot. EXP is
23108 the data type or decl itself, MODE is its mode and ALIGN is the
23109 alignment that the object would ordinarily have. */
23112 ix86_minimum_alignment (tree exp, enum machine_mode mode,
23113 unsigned int align)
23117 if (exp && DECL_P (exp))
23119 type = TREE_TYPE (exp);
23128 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
23131 /* Don't do dynamic stack realignment for long long objects with
23132 -mpreferred-stack-boundary=2. */
23133 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
23134 && (!type || !TYPE_USER_ALIGN (type))
23135 && (!decl || !DECL_USER_ALIGN (decl)))
23141 /* Find a location for the static chain incoming to a nested function.
23142 This is a register, unless all free registers are used by arguments. */
23145 ix86_static_chain (const_tree fndecl, bool incoming_p)
23149 if (!DECL_STATIC_CHAIN (fndecl))
23154 /* We always use R10 in 64-bit mode. */
23160 /* By default in 32-bit mode we use ECX to pass the static chain. */
23163 fntype = TREE_TYPE (fndecl);
23164 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
23166 /* Fastcall functions use ecx/edx for arguments, which leaves
23167 us with EAX for the static chain. */
23170 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
23172 /* Thiscall functions use ecx for arguments, which leaves
23173 us with EAX for the static chain. */
23176 else if (ix86_function_regparm (fntype, fndecl) == 3)
23178 /* For regparm 3, we have no free call-clobbered registers in
23179 which to store the static chain. In order to implement this,
23180 we have the trampoline push the static chain to the stack.
23181 However, we can't push a value below the return address when
23182 we call the nested function directly, so we have to use an
23183 alternate entry point. For this we use ESI, and have the
23184 alternate entry point push ESI, so that things appear the
23185 same once we're executing the nested function. */
23188 if (fndecl == current_function_decl)
23189 ix86_static_chain_on_stack = true;
23190 return gen_frame_mem (SImode,
23191 plus_constant (arg_pointer_rtx, -8));
23197 return gen_rtx_REG (Pmode, regno);
23200 /* Emit RTL insns to initialize the variable parts of a trampoline.
23201 FNDECL is the decl of the target address; M_TRAMP is a MEM for
23202 the trampoline, and CHAIN_VALUE is an RTX for the static chain
23203 to be passed to the target function. */
23206 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
23210 fnaddr = XEXP (DECL_RTL (fndecl), 0);
23217 /* Depending on the static chain location, either load a register
23218 with a constant, or push the constant to the stack. All of the
23219 instructions are the same size. */
23220 chain = ix86_static_chain (fndecl, true);
23223 if (REGNO (chain) == CX_REG)
23225 else if (REGNO (chain) == AX_REG)
23228 gcc_unreachable ();
23233 mem = adjust_address (m_tramp, QImode, 0);
23234 emit_move_insn (mem, gen_int_mode (opcode, QImode));
23236 mem = adjust_address (m_tramp, SImode, 1);
23237 emit_move_insn (mem, chain_value);
23239 /* Compute offset from the end of the jmp to the target function.
23240 In the case in which the trampoline stores the static chain on
23241 the stack, we need to skip the first insn which pushes the
23242 (call-saved) register static chain; this push is 1 byte. */
23243 disp = expand_binop (SImode, sub_optab, fnaddr,
23244 plus_constant (XEXP (m_tramp, 0),
23245 MEM_P (chain) ? 9 : 10),
23246 NULL_RTX, 1, OPTAB_DIRECT);
23248 mem = adjust_address (m_tramp, QImode, 5);
23249 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
23251 mem = adjust_address (m_tramp, SImode, 6);
23252 emit_move_insn (mem, disp);
23258 /* Load the function address to r11. Try to load address using
23259 the shorter movl instead of movabs. We may want to support
23260 movq for kernel mode, but kernel does not use trampolines at
23262 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
23264 fnaddr = copy_to_mode_reg (DImode, fnaddr);
23266 mem = adjust_address (m_tramp, HImode, offset);
23267 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
23269 mem = adjust_address (m_tramp, SImode, offset + 2);
23270 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
23275 mem = adjust_address (m_tramp, HImode, offset);
23276 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
23278 mem = adjust_address (m_tramp, DImode, offset + 2);
23279 emit_move_insn (mem, fnaddr);
23283 /* Load static chain using movabs to r10. */
23284 mem = adjust_address (m_tramp, HImode, offset);
23285 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
23287 mem = adjust_address (m_tramp, DImode, offset + 2);
23288 emit_move_insn (mem, chain_value);
23291 /* Jump to r11; the last (unused) byte is a nop, only there to
23292 pad the write out to a single 32-bit store. */
23293 mem = adjust_address (m_tramp, SImode, offset);
23294 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
23297 gcc_assert (offset <= TRAMPOLINE_SIZE);
23300 #ifdef ENABLE_EXECUTE_STACK
23301 #ifdef CHECK_EXECUTE_STACK_ENABLED
23302 if (CHECK_EXECUTE_STACK_ENABLED)
23304 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
23305 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
23309 /* The following file contains several enumerations and data structures
23310 built from the definitions in i386-builtin-types.def. */
23312 #include "i386-builtin-types.inc"
23314 /* Table for the ix86 builtin non-function types. */
23315 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
23317 /* Retrieve an element from the above table, building some of
23318 the types lazily. */
23321 ix86_get_builtin_type (enum ix86_builtin_type tcode)
23323 unsigned int index;
23326 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
23328 type = ix86_builtin_type_tab[(int) tcode];
23332 gcc_assert (tcode > IX86_BT_LAST_PRIM);
23333 if (tcode <= IX86_BT_LAST_VECT)
23335 enum machine_mode mode;
23337 index = tcode - IX86_BT_LAST_PRIM - 1;
23338 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
23339 mode = ix86_builtin_type_vect_mode[index];
23341 type = build_vector_type_for_mode (itype, mode);
23347 index = tcode - IX86_BT_LAST_VECT - 1;
23348 if (tcode <= IX86_BT_LAST_PTR)
23349 quals = TYPE_UNQUALIFIED;
23351 quals = TYPE_QUAL_CONST;
23353 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
23354 if (quals != TYPE_UNQUALIFIED)
23355 itype = build_qualified_type (itype, quals);
23357 type = build_pointer_type (itype);
23360 ix86_builtin_type_tab[(int) tcode] = type;
23364 /* Table for the ix86 builtin function types. */
23365 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23367 /* Retrieve an element from the above table, building some of
23368 the types lazily. */
23371 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23375 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23377 type = ix86_builtin_func_type_tab[(int) tcode];
23381 if (tcode <= IX86_BT_LAST_FUNC)
23383 unsigned start = ix86_builtin_func_start[(int) tcode];
23384 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23385 tree rtype, atype, args = void_list_node;
23388 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23389 for (i = after - 1; i > start; --i)
23391 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23392 args = tree_cons (NULL, atype, args);
23395 type = build_function_type (rtype, args);
23399 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23400 enum ix86_builtin_func_type icode;
23402 icode = ix86_builtin_func_alias_base[index];
23403 type = ix86_get_builtin_func_type (icode);
23406 ix86_builtin_func_type_tab[(int) tcode] = type;
23411 /* Codes for all the SSE/MMX builtins. */
23414 IX86_BUILTIN_ADDPS,
23415 IX86_BUILTIN_ADDSS,
23416 IX86_BUILTIN_DIVPS,
23417 IX86_BUILTIN_DIVSS,
23418 IX86_BUILTIN_MULPS,
23419 IX86_BUILTIN_MULSS,
23420 IX86_BUILTIN_SUBPS,
23421 IX86_BUILTIN_SUBSS,
23423 IX86_BUILTIN_CMPEQPS,
23424 IX86_BUILTIN_CMPLTPS,
23425 IX86_BUILTIN_CMPLEPS,
23426 IX86_BUILTIN_CMPGTPS,
23427 IX86_BUILTIN_CMPGEPS,
23428 IX86_BUILTIN_CMPNEQPS,
23429 IX86_BUILTIN_CMPNLTPS,
23430 IX86_BUILTIN_CMPNLEPS,
23431 IX86_BUILTIN_CMPNGTPS,
23432 IX86_BUILTIN_CMPNGEPS,
23433 IX86_BUILTIN_CMPORDPS,
23434 IX86_BUILTIN_CMPUNORDPS,
23435 IX86_BUILTIN_CMPEQSS,
23436 IX86_BUILTIN_CMPLTSS,
23437 IX86_BUILTIN_CMPLESS,
23438 IX86_BUILTIN_CMPNEQSS,
23439 IX86_BUILTIN_CMPNLTSS,
23440 IX86_BUILTIN_CMPNLESS,
23441 IX86_BUILTIN_CMPNGTSS,
23442 IX86_BUILTIN_CMPNGESS,
23443 IX86_BUILTIN_CMPORDSS,
23444 IX86_BUILTIN_CMPUNORDSS,
23446 IX86_BUILTIN_COMIEQSS,
23447 IX86_BUILTIN_COMILTSS,
23448 IX86_BUILTIN_COMILESS,
23449 IX86_BUILTIN_COMIGTSS,
23450 IX86_BUILTIN_COMIGESS,
23451 IX86_BUILTIN_COMINEQSS,
23452 IX86_BUILTIN_UCOMIEQSS,
23453 IX86_BUILTIN_UCOMILTSS,
23454 IX86_BUILTIN_UCOMILESS,
23455 IX86_BUILTIN_UCOMIGTSS,
23456 IX86_BUILTIN_UCOMIGESS,
23457 IX86_BUILTIN_UCOMINEQSS,
23459 IX86_BUILTIN_CVTPI2PS,
23460 IX86_BUILTIN_CVTPS2PI,
23461 IX86_BUILTIN_CVTSI2SS,
23462 IX86_BUILTIN_CVTSI642SS,
23463 IX86_BUILTIN_CVTSS2SI,
23464 IX86_BUILTIN_CVTSS2SI64,
23465 IX86_BUILTIN_CVTTPS2PI,
23466 IX86_BUILTIN_CVTTSS2SI,
23467 IX86_BUILTIN_CVTTSS2SI64,
23469 IX86_BUILTIN_MAXPS,
23470 IX86_BUILTIN_MAXSS,
23471 IX86_BUILTIN_MINPS,
23472 IX86_BUILTIN_MINSS,
23474 IX86_BUILTIN_LOADUPS,
23475 IX86_BUILTIN_STOREUPS,
23476 IX86_BUILTIN_MOVSS,
23478 IX86_BUILTIN_MOVHLPS,
23479 IX86_BUILTIN_MOVLHPS,
23480 IX86_BUILTIN_LOADHPS,
23481 IX86_BUILTIN_LOADLPS,
23482 IX86_BUILTIN_STOREHPS,
23483 IX86_BUILTIN_STORELPS,
23485 IX86_BUILTIN_MASKMOVQ,
23486 IX86_BUILTIN_MOVMSKPS,
23487 IX86_BUILTIN_PMOVMSKB,
23489 IX86_BUILTIN_MOVNTPS,
23490 IX86_BUILTIN_MOVNTQ,
23492 IX86_BUILTIN_LOADDQU,
23493 IX86_BUILTIN_STOREDQU,
23495 IX86_BUILTIN_PACKSSWB,
23496 IX86_BUILTIN_PACKSSDW,
23497 IX86_BUILTIN_PACKUSWB,
23499 IX86_BUILTIN_PADDB,
23500 IX86_BUILTIN_PADDW,
23501 IX86_BUILTIN_PADDD,
23502 IX86_BUILTIN_PADDQ,
23503 IX86_BUILTIN_PADDSB,
23504 IX86_BUILTIN_PADDSW,
23505 IX86_BUILTIN_PADDUSB,
23506 IX86_BUILTIN_PADDUSW,
23507 IX86_BUILTIN_PSUBB,
23508 IX86_BUILTIN_PSUBW,
23509 IX86_BUILTIN_PSUBD,
23510 IX86_BUILTIN_PSUBQ,
23511 IX86_BUILTIN_PSUBSB,
23512 IX86_BUILTIN_PSUBSW,
23513 IX86_BUILTIN_PSUBUSB,
23514 IX86_BUILTIN_PSUBUSW,
23517 IX86_BUILTIN_PANDN,
23521 IX86_BUILTIN_PAVGB,
23522 IX86_BUILTIN_PAVGW,
23524 IX86_BUILTIN_PCMPEQB,
23525 IX86_BUILTIN_PCMPEQW,
23526 IX86_BUILTIN_PCMPEQD,
23527 IX86_BUILTIN_PCMPGTB,
23528 IX86_BUILTIN_PCMPGTW,
23529 IX86_BUILTIN_PCMPGTD,
23531 IX86_BUILTIN_PMADDWD,
23533 IX86_BUILTIN_PMAXSW,
23534 IX86_BUILTIN_PMAXUB,
23535 IX86_BUILTIN_PMINSW,
23536 IX86_BUILTIN_PMINUB,
23538 IX86_BUILTIN_PMULHUW,
23539 IX86_BUILTIN_PMULHW,
23540 IX86_BUILTIN_PMULLW,
23542 IX86_BUILTIN_PSADBW,
23543 IX86_BUILTIN_PSHUFW,
23545 IX86_BUILTIN_PSLLW,
23546 IX86_BUILTIN_PSLLD,
23547 IX86_BUILTIN_PSLLQ,
23548 IX86_BUILTIN_PSRAW,
23549 IX86_BUILTIN_PSRAD,
23550 IX86_BUILTIN_PSRLW,
23551 IX86_BUILTIN_PSRLD,
23552 IX86_BUILTIN_PSRLQ,
23553 IX86_BUILTIN_PSLLWI,
23554 IX86_BUILTIN_PSLLDI,
23555 IX86_BUILTIN_PSLLQI,
23556 IX86_BUILTIN_PSRAWI,
23557 IX86_BUILTIN_PSRADI,
23558 IX86_BUILTIN_PSRLWI,
23559 IX86_BUILTIN_PSRLDI,
23560 IX86_BUILTIN_PSRLQI,
23562 IX86_BUILTIN_PUNPCKHBW,
23563 IX86_BUILTIN_PUNPCKHWD,
23564 IX86_BUILTIN_PUNPCKHDQ,
23565 IX86_BUILTIN_PUNPCKLBW,
23566 IX86_BUILTIN_PUNPCKLWD,
23567 IX86_BUILTIN_PUNPCKLDQ,
23569 IX86_BUILTIN_SHUFPS,
23571 IX86_BUILTIN_RCPPS,
23572 IX86_BUILTIN_RCPSS,
23573 IX86_BUILTIN_RSQRTPS,
23574 IX86_BUILTIN_RSQRTPS_NR,
23575 IX86_BUILTIN_RSQRTSS,
23576 IX86_BUILTIN_RSQRTF,
23577 IX86_BUILTIN_SQRTPS,
23578 IX86_BUILTIN_SQRTPS_NR,
23579 IX86_BUILTIN_SQRTSS,
23581 IX86_BUILTIN_UNPCKHPS,
23582 IX86_BUILTIN_UNPCKLPS,
23584 IX86_BUILTIN_ANDPS,
23585 IX86_BUILTIN_ANDNPS,
23587 IX86_BUILTIN_XORPS,
23590 IX86_BUILTIN_LDMXCSR,
23591 IX86_BUILTIN_STMXCSR,
23592 IX86_BUILTIN_SFENCE,
23594 /* 3DNow! Original */
23595 IX86_BUILTIN_FEMMS,
23596 IX86_BUILTIN_PAVGUSB,
23597 IX86_BUILTIN_PF2ID,
23598 IX86_BUILTIN_PFACC,
23599 IX86_BUILTIN_PFADD,
23600 IX86_BUILTIN_PFCMPEQ,
23601 IX86_BUILTIN_PFCMPGE,
23602 IX86_BUILTIN_PFCMPGT,
23603 IX86_BUILTIN_PFMAX,
23604 IX86_BUILTIN_PFMIN,
23605 IX86_BUILTIN_PFMUL,
23606 IX86_BUILTIN_PFRCP,
23607 IX86_BUILTIN_PFRCPIT1,
23608 IX86_BUILTIN_PFRCPIT2,
23609 IX86_BUILTIN_PFRSQIT1,
23610 IX86_BUILTIN_PFRSQRT,
23611 IX86_BUILTIN_PFSUB,
23612 IX86_BUILTIN_PFSUBR,
23613 IX86_BUILTIN_PI2FD,
23614 IX86_BUILTIN_PMULHRW,
23616 /* 3DNow! Athlon Extensions */
23617 IX86_BUILTIN_PF2IW,
23618 IX86_BUILTIN_PFNACC,
23619 IX86_BUILTIN_PFPNACC,
23620 IX86_BUILTIN_PI2FW,
23621 IX86_BUILTIN_PSWAPDSI,
23622 IX86_BUILTIN_PSWAPDSF,
23625 IX86_BUILTIN_ADDPD,
23626 IX86_BUILTIN_ADDSD,
23627 IX86_BUILTIN_DIVPD,
23628 IX86_BUILTIN_DIVSD,
23629 IX86_BUILTIN_MULPD,
23630 IX86_BUILTIN_MULSD,
23631 IX86_BUILTIN_SUBPD,
23632 IX86_BUILTIN_SUBSD,
23634 IX86_BUILTIN_CMPEQPD,
23635 IX86_BUILTIN_CMPLTPD,
23636 IX86_BUILTIN_CMPLEPD,
23637 IX86_BUILTIN_CMPGTPD,
23638 IX86_BUILTIN_CMPGEPD,
23639 IX86_BUILTIN_CMPNEQPD,
23640 IX86_BUILTIN_CMPNLTPD,
23641 IX86_BUILTIN_CMPNLEPD,
23642 IX86_BUILTIN_CMPNGTPD,
23643 IX86_BUILTIN_CMPNGEPD,
23644 IX86_BUILTIN_CMPORDPD,
23645 IX86_BUILTIN_CMPUNORDPD,
23646 IX86_BUILTIN_CMPEQSD,
23647 IX86_BUILTIN_CMPLTSD,
23648 IX86_BUILTIN_CMPLESD,
23649 IX86_BUILTIN_CMPNEQSD,
23650 IX86_BUILTIN_CMPNLTSD,
23651 IX86_BUILTIN_CMPNLESD,
23652 IX86_BUILTIN_CMPORDSD,
23653 IX86_BUILTIN_CMPUNORDSD,
23655 IX86_BUILTIN_COMIEQSD,
23656 IX86_BUILTIN_COMILTSD,
23657 IX86_BUILTIN_COMILESD,
23658 IX86_BUILTIN_COMIGTSD,
23659 IX86_BUILTIN_COMIGESD,
23660 IX86_BUILTIN_COMINEQSD,
23661 IX86_BUILTIN_UCOMIEQSD,
23662 IX86_BUILTIN_UCOMILTSD,
23663 IX86_BUILTIN_UCOMILESD,
23664 IX86_BUILTIN_UCOMIGTSD,
23665 IX86_BUILTIN_UCOMIGESD,
23666 IX86_BUILTIN_UCOMINEQSD,
23668 IX86_BUILTIN_MAXPD,
23669 IX86_BUILTIN_MAXSD,
23670 IX86_BUILTIN_MINPD,
23671 IX86_BUILTIN_MINSD,
23673 IX86_BUILTIN_ANDPD,
23674 IX86_BUILTIN_ANDNPD,
23676 IX86_BUILTIN_XORPD,
23678 IX86_BUILTIN_SQRTPD,
23679 IX86_BUILTIN_SQRTSD,
23681 IX86_BUILTIN_UNPCKHPD,
23682 IX86_BUILTIN_UNPCKLPD,
23684 IX86_BUILTIN_SHUFPD,
23686 IX86_BUILTIN_LOADUPD,
23687 IX86_BUILTIN_STOREUPD,
23688 IX86_BUILTIN_MOVSD,
23690 IX86_BUILTIN_LOADHPD,
23691 IX86_BUILTIN_LOADLPD,
23693 IX86_BUILTIN_CVTDQ2PD,
23694 IX86_BUILTIN_CVTDQ2PS,
23696 IX86_BUILTIN_CVTPD2DQ,
23697 IX86_BUILTIN_CVTPD2PI,
23698 IX86_BUILTIN_CVTPD2PS,
23699 IX86_BUILTIN_CVTTPD2DQ,
23700 IX86_BUILTIN_CVTTPD2PI,
23702 IX86_BUILTIN_CVTPI2PD,
23703 IX86_BUILTIN_CVTSI2SD,
23704 IX86_BUILTIN_CVTSI642SD,
23706 IX86_BUILTIN_CVTSD2SI,
23707 IX86_BUILTIN_CVTSD2SI64,
23708 IX86_BUILTIN_CVTSD2SS,
23709 IX86_BUILTIN_CVTSS2SD,
23710 IX86_BUILTIN_CVTTSD2SI,
23711 IX86_BUILTIN_CVTTSD2SI64,
23713 IX86_BUILTIN_CVTPS2DQ,
23714 IX86_BUILTIN_CVTPS2PD,
23715 IX86_BUILTIN_CVTTPS2DQ,
23717 IX86_BUILTIN_MOVNTI,
23718 IX86_BUILTIN_MOVNTPD,
23719 IX86_BUILTIN_MOVNTDQ,
23721 IX86_BUILTIN_MOVQ128,
23724 IX86_BUILTIN_MASKMOVDQU,
23725 IX86_BUILTIN_MOVMSKPD,
23726 IX86_BUILTIN_PMOVMSKB128,
23728 IX86_BUILTIN_PACKSSWB128,
23729 IX86_BUILTIN_PACKSSDW128,
23730 IX86_BUILTIN_PACKUSWB128,
23732 IX86_BUILTIN_PADDB128,
23733 IX86_BUILTIN_PADDW128,
23734 IX86_BUILTIN_PADDD128,
23735 IX86_BUILTIN_PADDQ128,
23736 IX86_BUILTIN_PADDSB128,
23737 IX86_BUILTIN_PADDSW128,
23738 IX86_BUILTIN_PADDUSB128,
23739 IX86_BUILTIN_PADDUSW128,
23740 IX86_BUILTIN_PSUBB128,
23741 IX86_BUILTIN_PSUBW128,
23742 IX86_BUILTIN_PSUBD128,
23743 IX86_BUILTIN_PSUBQ128,
23744 IX86_BUILTIN_PSUBSB128,
23745 IX86_BUILTIN_PSUBSW128,
23746 IX86_BUILTIN_PSUBUSB128,
23747 IX86_BUILTIN_PSUBUSW128,
23749 IX86_BUILTIN_PAND128,
23750 IX86_BUILTIN_PANDN128,
23751 IX86_BUILTIN_POR128,
23752 IX86_BUILTIN_PXOR128,
23754 IX86_BUILTIN_PAVGB128,
23755 IX86_BUILTIN_PAVGW128,
23757 IX86_BUILTIN_PCMPEQB128,
23758 IX86_BUILTIN_PCMPEQW128,
23759 IX86_BUILTIN_PCMPEQD128,
23760 IX86_BUILTIN_PCMPGTB128,
23761 IX86_BUILTIN_PCMPGTW128,
23762 IX86_BUILTIN_PCMPGTD128,
23764 IX86_BUILTIN_PMADDWD128,
23766 IX86_BUILTIN_PMAXSW128,
23767 IX86_BUILTIN_PMAXUB128,
23768 IX86_BUILTIN_PMINSW128,
23769 IX86_BUILTIN_PMINUB128,
23771 IX86_BUILTIN_PMULUDQ,
23772 IX86_BUILTIN_PMULUDQ128,
23773 IX86_BUILTIN_PMULHUW128,
23774 IX86_BUILTIN_PMULHW128,
23775 IX86_BUILTIN_PMULLW128,
23777 IX86_BUILTIN_PSADBW128,
23778 IX86_BUILTIN_PSHUFHW,
23779 IX86_BUILTIN_PSHUFLW,
23780 IX86_BUILTIN_PSHUFD,
23782 IX86_BUILTIN_PSLLDQI128,
23783 IX86_BUILTIN_PSLLWI128,
23784 IX86_BUILTIN_PSLLDI128,
23785 IX86_BUILTIN_PSLLQI128,
23786 IX86_BUILTIN_PSRAWI128,
23787 IX86_BUILTIN_PSRADI128,
23788 IX86_BUILTIN_PSRLDQI128,
23789 IX86_BUILTIN_PSRLWI128,
23790 IX86_BUILTIN_PSRLDI128,
23791 IX86_BUILTIN_PSRLQI128,
23793 IX86_BUILTIN_PSLLDQ128,
23794 IX86_BUILTIN_PSLLW128,
23795 IX86_BUILTIN_PSLLD128,
23796 IX86_BUILTIN_PSLLQ128,
23797 IX86_BUILTIN_PSRAW128,
23798 IX86_BUILTIN_PSRAD128,
23799 IX86_BUILTIN_PSRLW128,
23800 IX86_BUILTIN_PSRLD128,
23801 IX86_BUILTIN_PSRLQ128,
23803 IX86_BUILTIN_PUNPCKHBW128,
23804 IX86_BUILTIN_PUNPCKHWD128,
23805 IX86_BUILTIN_PUNPCKHDQ128,
23806 IX86_BUILTIN_PUNPCKHQDQ128,
23807 IX86_BUILTIN_PUNPCKLBW128,
23808 IX86_BUILTIN_PUNPCKLWD128,
23809 IX86_BUILTIN_PUNPCKLDQ128,
23810 IX86_BUILTIN_PUNPCKLQDQ128,
23812 IX86_BUILTIN_CLFLUSH,
23813 IX86_BUILTIN_MFENCE,
23814 IX86_BUILTIN_LFENCE,
23816 IX86_BUILTIN_BSRSI,
23817 IX86_BUILTIN_BSRDI,
23818 IX86_BUILTIN_RDPMC,
23819 IX86_BUILTIN_RDTSC,
23820 IX86_BUILTIN_RDTSCP,
23821 IX86_BUILTIN_ROLQI,
23822 IX86_BUILTIN_ROLHI,
23823 IX86_BUILTIN_RORQI,
23824 IX86_BUILTIN_RORHI,
23827 IX86_BUILTIN_ADDSUBPS,
23828 IX86_BUILTIN_HADDPS,
23829 IX86_BUILTIN_HSUBPS,
23830 IX86_BUILTIN_MOVSHDUP,
23831 IX86_BUILTIN_MOVSLDUP,
23832 IX86_BUILTIN_ADDSUBPD,
23833 IX86_BUILTIN_HADDPD,
23834 IX86_BUILTIN_HSUBPD,
23835 IX86_BUILTIN_LDDQU,
23837 IX86_BUILTIN_MONITOR,
23838 IX86_BUILTIN_MWAIT,
23841 IX86_BUILTIN_PHADDW,
23842 IX86_BUILTIN_PHADDD,
23843 IX86_BUILTIN_PHADDSW,
23844 IX86_BUILTIN_PHSUBW,
23845 IX86_BUILTIN_PHSUBD,
23846 IX86_BUILTIN_PHSUBSW,
23847 IX86_BUILTIN_PMADDUBSW,
23848 IX86_BUILTIN_PMULHRSW,
23849 IX86_BUILTIN_PSHUFB,
23850 IX86_BUILTIN_PSIGNB,
23851 IX86_BUILTIN_PSIGNW,
23852 IX86_BUILTIN_PSIGND,
23853 IX86_BUILTIN_PALIGNR,
23854 IX86_BUILTIN_PABSB,
23855 IX86_BUILTIN_PABSW,
23856 IX86_BUILTIN_PABSD,
23858 IX86_BUILTIN_PHADDW128,
23859 IX86_BUILTIN_PHADDD128,
23860 IX86_BUILTIN_PHADDSW128,
23861 IX86_BUILTIN_PHSUBW128,
23862 IX86_BUILTIN_PHSUBD128,
23863 IX86_BUILTIN_PHSUBSW128,
23864 IX86_BUILTIN_PMADDUBSW128,
23865 IX86_BUILTIN_PMULHRSW128,
23866 IX86_BUILTIN_PSHUFB128,
23867 IX86_BUILTIN_PSIGNB128,
23868 IX86_BUILTIN_PSIGNW128,
23869 IX86_BUILTIN_PSIGND128,
23870 IX86_BUILTIN_PALIGNR128,
23871 IX86_BUILTIN_PABSB128,
23872 IX86_BUILTIN_PABSW128,
23873 IX86_BUILTIN_PABSD128,
23875 /* AMDFAM10 - SSE4A New Instructions. */
23876 IX86_BUILTIN_MOVNTSD,
23877 IX86_BUILTIN_MOVNTSS,
23878 IX86_BUILTIN_EXTRQI,
23879 IX86_BUILTIN_EXTRQ,
23880 IX86_BUILTIN_INSERTQI,
23881 IX86_BUILTIN_INSERTQ,
23884 IX86_BUILTIN_BLENDPD,
23885 IX86_BUILTIN_BLENDPS,
23886 IX86_BUILTIN_BLENDVPD,
23887 IX86_BUILTIN_BLENDVPS,
23888 IX86_BUILTIN_PBLENDVB128,
23889 IX86_BUILTIN_PBLENDW128,
23894 IX86_BUILTIN_INSERTPS128,
23896 IX86_BUILTIN_MOVNTDQA,
23897 IX86_BUILTIN_MPSADBW128,
23898 IX86_BUILTIN_PACKUSDW128,
23899 IX86_BUILTIN_PCMPEQQ,
23900 IX86_BUILTIN_PHMINPOSUW128,
23902 IX86_BUILTIN_PMAXSB128,
23903 IX86_BUILTIN_PMAXSD128,
23904 IX86_BUILTIN_PMAXUD128,
23905 IX86_BUILTIN_PMAXUW128,
23907 IX86_BUILTIN_PMINSB128,
23908 IX86_BUILTIN_PMINSD128,
23909 IX86_BUILTIN_PMINUD128,
23910 IX86_BUILTIN_PMINUW128,
23912 IX86_BUILTIN_PMOVSXBW128,
23913 IX86_BUILTIN_PMOVSXBD128,
23914 IX86_BUILTIN_PMOVSXBQ128,
23915 IX86_BUILTIN_PMOVSXWD128,
23916 IX86_BUILTIN_PMOVSXWQ128,
23917 IX86_BUILTIN_PMOVSXDQ128,
23919 IX86_BUILTIN_PMOVZXBW128,
23920 IX86_BUILTIN_PMOVZXBD128,
23921 IX86_BUILTIN_PMOVZXBQ128,
23922 IX86_BUILTIN_PMOVZXWD128,
23923 IX86_BUILTIN_PMOVZXWQ128,
23924 IX86_BUILTIN_PMOVZXDQ128,
23926 IX86_BUILTIN_PMULDQ128,
23927 IX86_BUILTIN_PMULLD128,
23929 IX86_BUILTIN_ROUNDPD,
23930 IX86_BUILTIN_ROUNDPS,
23931 IX86_BUILTIN_ROUNDSD,
23932 IX86_BUILTIN_ROUNDSS,
23934 IX86_BUILTIN_FLOORPD,
23935 IX86_BUILTIN_CEILPD,
23936 IX86_BUILTIN_TRUNCPD,
23937 IX86_BUILTIN_RINTPD,
23938 IX86_BUILTIN_FLOORPS,
23939 IX86_BUILTIN_CEILPS,
23940 IX86_BUILTIN_TRUNCPS,
23941 IX86_BUILTIN_RINTPS,
23943 IX86_BUILTIN_PTESTZ,
23944 IX86_BUILTIN_PTESTC,
23945 IX86_BUILTIN_PTESTNZC,
23947 IX86_BUILTIN_VEC_INIT_V2SI,
23948 IX86_BUILTIN_VEC_INIT_V4HI,
23949 IX86_BUILTIN_VEC_INIT_V8QI,
23950 IX86_BUILTIN_VEC_EXT_V2DF,
23951 IX86_BUILTIN_VEC_EXT_V2DI,
23952 IX86_BUILTIN_VEC_EXT_V4SF,
23953 IX86_BUILTIN_VEC_EXT_V4SI,
23954 IX86_BUILTIN_VEC_EXT_V8HI,
23955 IX86_BUILTIN_VEC_EXT_V2SI,
23956 IX86_BUILTIN_VEC_EXT_V4HI,
23957 IX86_BUILTIN_VEC_EXT_V16QI,
23958 IX86_BUILTIN_VEC_SET_V2DI,
23959 IX86_BUILTIN_VEC_SET_V4SF,
23960 IX86_BUILTIN_VEC_SET_V4SI,
23961 IX86_BUILTIN_VEC_SET_V8HI,
23962 IX86_BUILTIN_VEC_SET_V4HI,
23963 IX86_BUILTIN_VEC_SET_V16QI,
23965 IX86_BUILTIN_VEC_PACK_SFIX,
23968 IX86_BUILTIN_CRC32QI,
23969 IX86_BUILTIN_CRC32HI,
23970 IX86_BUILTIN_CRC32SI,
23971 IX86_BUILTIN_CRC32DI,
23973 IX86_BUILTIN_PCMPESTRI128,
23974 IX86_BUILTIN_PCMPESTRM128,
23975 IX86_BUILTIN_PCMPESTRA128,
23976 IX86_BUILTIN_PCMPESTRC128,
23977 IX86_BUILTIN_PCMPESTRO128,
23978 IX86_BUILTIN_PCMPESTRS128,
23979 IX86_BUILTIN_PCMPESTRZ128,
23980 IX86_BUILTIN_PCMPISTRI128,
23981 IX86_BUILTIN_PCMPISTRM128,
23982 IX86_BUILTIN_PCMPISTRA128,
23983 IX86_BUILTIN_PCMPISTRC128,
23984 IX86_BUILTIN_PCMPISTRO128,
23985 IX86_BUILTIN_PCMPISTRS128,
23986 IX86_BUILTIN_PCMPISTRZ128,
23988 IX86_BUILTIN_PCMPGTQ,
23990 /* AES instructions */
23991 IX86_BUILTIN_AESENC128,
23992 IX86_BUILTIN_AESENCLAST128,
23993 IX86_BUILTIN_AESDEC128,
23994 IX86_BUILTIN_AESDECLAST128,
23995 IX86_BUILTIN_AESIMC128,
23996 IX86_BUILTIN_AESKEYGENASSIST128,
23998 /* PCLMUL instruction */
23999 IX86_BUILTIN_PCLMULQDQ128,
24002 IX86_BUILTIN_ADDPD256,
24003 IX86_BUILTIN_ADDPS256,
24004 IX86_BUILTIN_ADDSUBPD256,
24005 IX86_BUILTIN_ADDSUBPS256,
24006 IX86_BUILTIN_ANDPD256,
24007 IX86_BUILTIN_ANDPS256,
24008 IX86_BUILTIN_ANDNPD256,
24009 IX86_BUILTIN_ANDNPS256,
24010 IX86_BUILTIN_BLENDPD256,
24011 IX86_BUILTIN_BLENDPS256,
24012 IX86_BUILTIN_BLENDVPD256,
24013 IX86_BUILTIN_BLENDVPS256,
24014 IX86_BUILTIN_DIVPD256,
24015 IX86_BUILTIN_DIVPS256,
24016 IX86_BUILTIN_DPPS256,
24017 IX86_BUILTIN_HADDPD256,
24018 IX86_BUILTIN_HADDPS256,
24019 IX86_BUILTIN_HSUBPD256,
24020 IX86_BUILTIN_HSUBPS256,
24021 IX86_BUILTIN_MAXPD256,
24022 IX86_BUILTIN_MAXPS256,
24023 IX86_BUILTIN_MINPD256,
24024 IX86_BUILTIN_MINPS256,
24025 IX86_BUILTIN_MULPD256,
24026 IX86_BUILTIN_MULPS256,
24027 IX86_BUILTIN_ORPD256,
24028 IX86_BUILTIN_ORPS256,
24029 IX86_BUILTIN_SHUFPD256,
24030 IX86_BUILTIN_SHUFPS256,
24031 IX86_BUILTIN_SUBPD256,
24032 IX86_BUILTIN_SUBPS256,
24033 IX86_BUILTIN_XORPD256,
24034 IX86_BUILTIN_XORPS256,
24035 IX86_BUILTIN_CMPSD,
24036 IX86_BUILTIN_CMPSS,
24037 IX86_BUILTIN_CMPPD,
24038 IX86_BUILTIN_CMPPS,
24039 IX86_BUILTIN_CMPPD256,
24040 IX86_BUILTIN_CMPPS256,
24041 IX86_BUILTIN_CVTDQ2PD256,
24042 IX86_BUILTIN_CVTDQ2PS256,
24043 IX86_BUILTIN_CVTPD2PS256,
24044 IX86_BUILTIN_CVTPS2DQ256,
24045 IX86_BUILTIN_CVTPS2PD256,
24046 IX86_BUILTIN_CVTTPD2DQ256,
24047 IX86_BUILTIN_CVTPD2DQ256,
24048 IX86_BUILTIN_CVTTPS2DQ256,
24049 IX86_BUILTIN_EXTRACTF128PD256,
24050 IX86_BUILTIN_EXTRACTF128PS256,
24051 IX86_BUILTIN_EXTRACTF128SI256,
24052 IX86_BUILTIN_VZEROALL,
24053 IX86_BUILTIN_VZEROUPPER,
24054 IX86_BUILTIN_VPERMILVARPD,
24055 IX86_BUILTIN_VPERMILVARPS,
24056 IX86_BUILTIN_VPERMILVARPD256,
24057 IX86_BUILTIN_VPERMILVARPS256,
24058 IX86_BUILTIN_VPERMILPD,
24059 IX86_BUILTIN_VPERMILPS,
24060 IX86_BUILTIN_VPERMILPD256,
24061 IX86_BUILTIN_VPERMILPS256,
24062 IX86_BUILTIN_VPERMIL2PD,
24063 IX86_BUILTIN_VPERMIL2PS,
24064 IX86_BUILTIN_VPERMIL2PD256,
24065 IX86_BUILTIN_VPERMIL2PS256,
24066 IX86_BUILTIN_VPERM2F128PD256,
24067 IX86_BUILTIN_VPERM2F128PS256,
24068 IX86_BUILTIN_VPERM2F128SI256,
24069 IX86_BUILTIN_VBROADCASTSS,
24070 IX86_BUILTIN_VBROADCASTSD256,
24071 IX86_BUILTIN_VBROADCASTSS256,
24072 IX86_BUILTIN_VBROADCASTPD256,
24073 IX86_BUILTIN_VBROADCASTPS256,
24074 IX86_BUILTIN_VINSERTF128PD256,
24075 IX86_BUILTIN_VINSERTF128PS256,
24076 IX86_BUILTIN_VINSERTF128SI256,
24077 IX86_BUILTIN_LOADUPD256,
24078 IX86_BUILTIN_LOADUPS256,
24079 IX86_BUILTIN_STOREUPD256,
24080 IX86_BUILTIN_STOREUPS256,
24081 IX86_BUILTIN_LDDQU256,
24082 IX86_BUILTIN_MOVNTDQ256,
24083 IX86_BUILTIN_MOVNTPD256,
24084 IX86_BUILTIN_MOVNTPS256,
24085 IX86_BUILTIN_LOADDQU256,
24086 IX86_BUILTIN_STOREDQU256,
24087 IX86_BUILTIN_MASKLOADPD,
24088 IX86_BUILTIN_MASKLOADPS,
24089 IX86_BUILTIN_MASKSTOREPD,
24090 IX86_BUILTIN_MASKSTOREPS,
24091 IX86_BUILTIN_MASKLOADPD256,
24092 IX86_BUILTIN_MASKLOADPS256,
24093 IX86_BUILTIN_MASKSTOREPD256,
24094 IX86_BUILTIN_MASKSTOREPS256,
24095 IX86_BUILTIN_MOVSHDUP256,
24096 IX86_BUILTIN_MOVSLDUP256,
24097 IX86_BUILTIN_MOVDDUP256,
24099 IX86_BUILTIN_SQRTPD256,
24100 IX86_BUILTIN_SQRTPS256,
24101 IX86_BUILTIN_SQRTPS_NR256,
24102 IX86_BUILTIN_RSQRTPS256,
24103 IX86_BUILTIN_RSQRTPS_NR256,
24105 IX86_BUILTIN_RCPPS256,
24107 IX86_BUILTIN_ROUNDPD256,
24108 IX86_BUILTIN_ROUNDPS256,
24110 IX86_BUILTIN_FLOORPD256,
24111 IX86_BUILTIN_CEILPD256,
24112 IX86_BUILTIN_TRUNCPD256,
24113 IX86_BUILTIN_RINTPD256,
24114 IX86_BUILTIN_FLOORPS256,
24115 IX86_BUILTIN_CEILPS256,
24116 IX86_BUILTIN_TRUNCPS256,
24117 IX86_BUILTIN_RINTPS256,
24119 IX86_BUILTIN_UNPCKHPD256,
24120 IX86_BUILTIN_UNPCKLPD256,
24121 IX86_BUILTIN_UNPCKHPS256,
24122 IX86_BUILTIN_UNPCKLPS256,
24124 IX86_BUILTIN_SI256_SI,
24125 IX86_BUILTIN_PS256_PS,
24126 IX86_BUILTIN_PD256_PD,
24127 IX86_BUILTIN_SI_SI256,
24128 IX86_BUILTIN_PS_PS256,
24129 IX86_BUILTIN_PD_PD256,
24131 IX86_BUILTIN_VTESTZPD,
24132 IX86_BUILTIN_VTESTCPD,
24133 IX86_BUILTIN_VTESTNZCPD,
24134 IX86_BUILTIN_VTESTZPS,
24135 IX86_BUILTIN_VTESTCPS,
24136 IX86_BUILTIN_VTESTNZCPS,
24137 IX86_BUILTIN_VTESTZPD256,
24138 IX86_BUILTIN_VTESTCPD256,
24139 IX86_BUILTIN_VTESTNZCPD256,
24140 IX86_BUILTIN_VTESTZPS256,
24141 IX86_BUILTIN_VTESTCPS256,
24142 IX86_BUILTIN_VTESTNZCPS256,
24143 IX86_BUILTIN_PTESTZ256,
24144 IX86_BUILTIN_PTESTC256,
24145 IX86_BUILTIN_PTESTNZC256,
24147 IX86_BUILTIN_MOVMSKPD256,
24148 IX86_BUILTIN_MOVMSKPS256,
24150 /* TFmode support builtins. */
24152 IX86_BUILTIN_HUGE_VALQ,
24153 IX86_BUILTIN_FABSQ,
24154 IX86_BUILTIN_COPYSIGNQ,
24156 /* Vectorizer support builtins. */
24157 IX86_BUILTIN_CPYSGNPS,
24158 IX86_BUILTIN_CPYSGNPD,
24159 IX86_BUILTIN_CPYSGNPS256,
24160 IX86_BUILTIN_CPYSGNPD256,
24162 IX86_BUILTIN_CVTUDQ2PS,
24164 IX86_BUILTIN_VEC_PERM_V2DF,
24165 IX86_BUILTIN_VEC_PERM_V4SF,
24166 IX86_BUILTIN_VEC_PERM_V2DI,
24167 IX86_BUILTIN_VEC_PERM_V4SI,
24168 IX86_BUILTIN_VEC_PERM_V8HI,
24169 IX86_BUILTIN_VEC_PERM_V16QI,
24170 IX86_BUILTIN_VEC_PERM_V2DI_U,
24171 IX86_BUILTIN_VEC_PERM_V4SI_U,
24172 IX86_BUILTIN_VEC_PERM_V8HI_U,
24173 IX86_BUILTIN_VEC_PERM_V16QI_U,
24174 IX86_BUILTIN_VEC_PERM_V4DF,
24175 IX86_BUILTIN_VEC_PERM_V8SF,
24177 /* FMA4 and XOP instructions. */
24178 IX86_BUILTIN_VFMADDSS,
24179 IX86_BUILTIN_VFMADDSD,
24180 IX86_BUILTIN_VFMADDPS,
24181 IX86_BUILTIN_VFMADDPD,
24182 IX86_BUILTIN_VFMADDPS256,
24183 IX86_BUILTIN_VFMADDPD256,
24184 IX86_BUILTIN_VFMADDSUBPS,
24185 IX86_BUILTIN_VFMADDSUBPD,
24186 IX86_BUILTIN_VFMADDSUBPS256,
24187 IX86_BUILTIN_VFMADDSUBPD256,
24189 IX86_BUILTIN_VPCMOV,
24190 IX86_BUILTIN_VPCMOV_V2DI,
24191 IX86_BUILTIN_VPCMOV_V4SI,
24192 IX86_BUILTIN_VPCMOV_V8HI,
24193 IX86_BUILTIN_VPCMOV_V16QI,
24194 IX86_BUILTIN_VPCMOV_V4SF,
24195 IX86_BUILTIN_VPCMOV_V2DF,
24196 IX86_BUILTIN_VPCMOV256,
24197 IX86_BUILTIN_VPCMOV_V4DI256,
24198 IX86_BUILTIN_VPCMOV_V8SI256,
24199 IX86_BUILTIN_VPCMOV_V16HI256,
24200 IX86_BUILTIN_VPCMOV_V32QI256,
24201 IX86_BUILTIN_VPCMOV_V8SF256,
24202 IX86_BUILTIN_VPCMOV_V4DF256,
24204 IX86_BUILTIN_VPPERM,
24206 IX86_BUILTIN_VPMACSSWW,
24207 IX86_BUILTIN_VPMACSWW,
24208 IX86_BUILTIN_VPMACSSWD,
24209 IX86_BUILTIN_VPMACSWD,
24210 IX86_BUILTIN_VPMACSSDD,
24211 IX86_BUILTIN_VPMACSDD,
24212 IX86_BUILTIN_VPMACSSDQL,
24213 IX86_BUILTIN_VPMACSSDQH,
24214 IX86_BUILTIN_VPMACSDQL,
24215 IX86_BUILTIN_VPMACSDQH,
24216 IX86_BUILTIN_VPMADCSSWD,
24217 IX86_BUILTIN_VPMADCSWD,
24219 IX86_BUILTIN_VPHADDBW,
24220 IX86_BUILTIN_VPHADDBD,
24221 IX86_BUILTIN_VPHADDBQ,
24222 IX86_BUILTIN_VPHADDWD,
24223 IX86_BUILTIN_VPHADDWQ,
24224 IX86_BUILTIN_VPHADDDQ,
24225 IX86_BUILTIN_VPHADDUBW,
24226 IX86_BUILTIN_VPHADDUBD,
24227 IX86_BUILTIN_VPHADDUBQ,
24228 IX86_BUILTIN_VPHADDUWD,
24229 IX86_BUILTIN_VPHADDUWQ,
24230 IX86_BUILTIN_VPHADDUDQ,
24231 IX86_BUILTIN_VPHSUBBW,
24232 IX86_BUILTIN_VPHSUBWD,
24233 IX86_BUILTIN_VPHSUBDQ,
24235 IX86_BUILTIN_VPROTB,
24236 IX86_BUILTIN_VPROTW,
24237 IX86_BUILTIN_VPROTD,
24238 IX86_BUILTIN_VPROTQ,
24239 IX86_BUILTIN_VPROTB_IMM,
24240 IX86_BUILTIN_VPROTW_IMM,
24241 IX86_BUILTIN_VPROTD_IMM,
24242 IX86_BUILTIN_VPROTQ_IMM,
24244 IX86_BUILTIN_VPSHLB,
24245 IX86_BUILTIN_VPSHLW,
24246 IX86_BUILTIN_VPSHLD,
24247 IX86_BUILTIN_VPSHLQ,
24248 IX86_BUILTIN_VPSHAB,
24249 IX86_BUILTIN_VPSHAW,
24250 IX86_BUILTIN_VPSHAD,
24251 IX86_BUILTIN_VPSHAQ,
24253 IX86_BUILTIN_VFRCZSS,
24254 IX86_BUILTIN_VFRCZSD,
24255 IX86_BUILTIN_VFRCZPS,
24256 IX86_BUILTIN_VFRCZPD,
24257 IX86_BUILTIN_VFRCZPS256,
24258 IX86_BUILTIN_VFRCZPD256,
24260 IX86_BUILTIN_VPCOMEQUB,
24261 IX86_BUILTIN_VPCOMNEUB,
24262 IX86_BUILTIN_VPCOMLTUB,
24263 IX86_BUILTIN_VPCOMLEUB,
24264 IX86_BUILTIN_VPCOMGTUB,
24265 IX86_BUILTIN_VPCOMGEUB,
24266 IX86_BUILTIN_VPCOMFALSEUB,
24267 IX86_BUILTIN_VPCOMTRUEUB,
24269 IX86_BUILTIN_VPCOMEQUW,
24270 IX86_BUILTIN_VPCOMNEUW,
24271 IX86_BUILTIN_VPCOMLTUW,
24272 IX86_BUILTIN_VPCOMLEUW,
24273 IX86_BUILTIN_VPCOMGTUW,
24274 IX86_BUILTIN_VPCOMGEUW,
24275 IX86_BUILTIN_VPCOMFALSEUW,
24276 IX86_BUILTIN_VPCOMTRUEUW,
24278 IX86_BUILTIN_VPCOMEQUD,
24279 IX86_BUILTIN_VPCOMNEUD,
24280 IX86_BUILTIN_VPCOMLTUD,
24281 IX86_BUILTIN_VPCOMLEUD,
24282 IX86_BUILTIN_VPCOMGTUD,
24283 IX86_BUILTIN_VPCOMGEUD,
24284 IX86_BUILTIN_VPCOMFALSEUD,
24285 IX86_BUILTIN_VPCOMTRUEUD,
24287 IX86_BUILTIN_VPCOMEQUQ,
24288 IX86_BUILTIN_VPCOMNEUQ,
24289 IX86_BUILTIN_VPCOMLTUQ,
24290 IX86_BUILTIN_VPCOMLEUQ,
24291 IX86_BUILTIN_VPCOMGTUQ,
24292 IX86_BUILTIN_VPCOMGEUQ,
24293 IX86_BUILTIN_VPCOMFALSEUQ,
24294 IX86_BUILTIN_VPCOMTRUEUQ,
24296 IX86_BUILTIN_VPCOMEQB,
24297 IX86_BUILTIN_VPCOMNEB,
24298 IX86_BUILTIN_VPCOMLTB,
24299 IX86_BUILTIN_VPCOMLEB,
24300 IX86_BUILTIN_VPCOMGTB,
24301 IX86_BUILTIN_VPCOMGEB,
24302 IX86_BUILTIN_VPCOMFALSEB,
24303 IX86_BUILTIN_VPCOMTRUEB,
24305 IX86_BUILTIN_VPCOMEQW,
24306 IX86_BUILTIN_VPCOMNEW,
24307 IX86_BUILTIN_VPCOMLTW,
24308 IX86_BUILTIN_VPCOMLEW,
24309 IX86_BUILTIN_VPCOMGTW,
24310 IX86_BUILTIN_VPCOMGEW,
24311 IX86_BUILTIN_VPCOMFALSEW,
24312 IX86_BUILTIN_VPCOMTRUEW,
24314 IX86_BUILTIN_VPCOMEQD,
24315 IX86_BUILTIN_VPCOMNED,
24316 IX86_BUILTIN_VPCOMLTD,
24317 IX86_BUILTIN_VPCOMLED,
24318 IX86_BUILTIN_VPCOMGTD,
24319 IX86_BUILTIN_VPCOMGED,
24320 IX86_BUILTIN_VPCOMFALSED,
24321 IX86_BUILTIN_VPCOMTRUED,
24323 IX86_BUILTIN_VPCOMEQQ,
24324 IX86_BUILTIN_VPCOMNEQ,
24325 IX86_BUILTIN_VPCOMLTQ,
24326 IX86_BUILTIN_VPCOMLEQ,
24327 IX86_BUILTIN_VPCOMGTQ,
24328 IX86_BUILTIN_VPCOMGEQ,
24329 IX86_BUILTIN_VPCOMFALSEQ,
24330 IX86_BUILTIN_VPCOMTRUEQ,
24332 /* LWP instructions. */
24333 IX86_BUILTIN_LLWPCB,
24334 IX86_BUILTIN_SLWPCB,
24335 IX86_BUILTIN_LWPVAL32,
24336 IX86_BUILTIN_LWPVAL64,
24337 IX86_BUILTIN_LWPINS32,
24338 IX86_BUILTIN_LWPINS64,
24342 /* BMI instructions. */
24343 IX86_BUILTIN_BEXTR32,
24344 IX86_BUILTIN_BEXTR64,
24347 /* TBM instructions. */
24348 IX86_BUILTIN_BEXTRI32,
24349 IX86_BUILTIN_BEXTRI64,
24352 /* FSGSBASE instructions. */
24353 IX86_BUILTIN_RDFSBASE32,
24354 IX86_BUILTIN_RDFSBASE64,
24355 IX86_BUILTIN_RDGSBASE32,
24356 IX86_BUILTIN_RDGSBASE64,
24357 IX86_BUILTIN_WRFSBASE32,
24358 IX86_BUILTIN_WRFSBASE64,
24359 IX86_BUILTIN_WRGSBASE32,
24360 IX86_BUILTIN_WRGSBASE64,
24362 /* RDRND instructions. */
24363 IX86_BUILTIN_RDRAND16_STEP,
24364 IX86_BUILTIN_RDRAND32_STEP,
24365 IX86_BUILTIN_RDRAND64_STEP,
24367 /* F16C instructions. */
24368 IX86_BUILTIN_CVTPH2PS,
24369 IX86_BUILTIN_CVTPH2PS256,
24370 IX86_BUILTIN_CVTPS2PH,
24371 IX86_BUILTIN_CVTPS2PH256,
24373 /* CFString built-in for darwin */
24374 IX86_BUILTIN_CFSTRING,
24379 /* Table for the ix86 builtin decls. */
24380 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24382 /* Table of all of the builtin functions that are possible with different ISA's
24383 but are waiting to be built until a function is declared to use that
24385 struct builtin_isa {
24386 const char *name; /* function name */
24387 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24388 int isa; /* isa_flags this builtin is defined for */
24389 bool const_p; /* true if the declaration is constant */
24390 bool set_and_not_built_p;
24393 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24396 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24397 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24398 function decl in the ix86_builtins array. Returns the function decl or
24399 NULL_TREE, if the builtin was not added.
24401 If the front end has a special hook for builtin functions, delay adding
24402 builtin functions that aren't in the current ISA until the ISA is changed
24403 with function specific optimization. Doing so, can save about 300K for the
24404 default compiler. When the builtin is expanded, check at that time whether
24407 If the front end doesn't have a special hook, record all builtins, even if
24408 it isn't an instruction set in the current ISA in case the user uses
24409 function specific options for a different ISA, so that we don't get scope
24410 errors if a builtin is added in the middle of a function scope. */
24413 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
24414 enum ix86_builtins code)
24416 tree decl = NULL_TREE;
24418 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24420 ix86_builtins_isa[(int) code].isa = mask;
24422 mask &= ~OPTION_MASK_ISA_64BIT;
24424 || (mask & ix86_isa_flags) != 0
24425 || (lang_hooks.builtin_function
24426 == lang_hooks.builtin_function_ext_scope))
24429 tree type = ix86_get_builtin_func_type (tcode);
24430 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24432 ix86_builtins[(int) code] = decl;
24433 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24437 ix86_builtins[(int) code] = NULL_TREE;
24438 ix86_builtins_isa[(int) code].tcode = tcode;
24439 ix86_builtins_isa[(int) code].name = name;
24440 ix86_builtins_isa[(int) code].const_p = false;
24441 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24448 /* Like def_builtin, but also marks the function decl "const". */
24451 def_builtin_const (int mask, const char *name,
24452 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24454 tree decl = def_builtin (mask, name, tcode, code);
24456 TREE_READONLY (decl) = 1;
24458 ix86_builtins_isa[(int) code].const_p = true;
24463 /* Add any new builtin functions for a given ISA that may not have been
24464 declared. This saves a bit of space compared to adding all of the
24465 declarations to the tree, even if we didn't use them. */
24468 ix86_add_new_builtins (int isa)
24472 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24474 if ((ix86_builtins_isa[i].isa & isa) != 0
24475 && ix86_builtins_isa[i].set_and_not_built_p)
24479 /* Don't define the builtin again. */
24480 ix86_builtins_isa[i].set_and_not_built_p = false;
24482 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24483 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24484 type, i, BUILT_IN_MD, NULL,
24487 ix86_builtins[i] = decl;
24488 if (ix86_builtins_isa[i].const_p)
24489 TREE_READONLY (decl) = 1;
24494 /* Bits for builtin_description.flag. */
24496 /* Set when we don't support the comparison natively, and should
24497 swap_comparison in order to support it. */
24498 #define BUILTIN_DESC_SWAP_OPERANDS 1
24500 struct builtin_description
24502 const unsigned int mask;
24503 const enum insn_code icode;
24504 const char *const name;
24505 const enum ix86_builtins code;
24506 const enum rtx_code comparison;
24510 static const struct builtin_description bdesc_comi[] =
24512 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24513 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24514 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24515 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24516 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24517 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24518 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24519 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24520 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24521 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24522 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24523 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24525 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24538 static const struct builtin_description bdesc_pcmpestr[] =
24541 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24542 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24543 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24544 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24545 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24546 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24547 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24550 static const struct builtin_description bdesc_pcmpistr[] =
24553 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24554 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24555 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24556 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24557 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24558 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24559 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24562 /* Special builtins with variable number of arguments. */
24563 static const struct builtin_description bdesc_special_args[] =
24565 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24566 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24569 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24572 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24575 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24576 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24577 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24579 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24580 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24581 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24582 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24584 /* SSE or 3DNow!A */
24585 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24586 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24603 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24606 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24609 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24610 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24613 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24614 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24616 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24617 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24618 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24619 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24620 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24623 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24632 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24634 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
24635 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
24636 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
24637 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
24638 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
24639 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
24640 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
24641 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
24643 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24644 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24645 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24646 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24647 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24648 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24651 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24652 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24653 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24654 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24655 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24656 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24657 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24658 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24661 /* Builtins with variable number of arguments. */
24662 static const struct builtin_description bdesc_args[] =
24664 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24665 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24666 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24667 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24668 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24669 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24670 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24673 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24674 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24675 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24676 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24677 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24678 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24680 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24681 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24682 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24683 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24684 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24685 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24686 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24687 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24689 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24690 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24692 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24693 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24694 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24695 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24697 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24698 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24699 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24700 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24701 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24702 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24704 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24705 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24706 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24707 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24708 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24709 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24711 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24712 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24713 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24715 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24717 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24718 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24719 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24720 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24721 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24722 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24724 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24725 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24726 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24727 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24728 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24729 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24731 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24732 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24733 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24734 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24737 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24738 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24739 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24740 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24742 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24743 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24744 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24745 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24746 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24747 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24748 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24749 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24750 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24751 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24752 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24753 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24754 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24755 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24756 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24759 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24760 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24761 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24762 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24763 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24764 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24767 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24768 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24769 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24770 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24771 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24772 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24773 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24774 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24775 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24776 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24777 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24778 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24780 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24782 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24783 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24784 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24785 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24786 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24787 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24788 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24789 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24791 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24792 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24793 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24794 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24795 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24796 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24797 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24798 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24799 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24800 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24801 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24802 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24803 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24804 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24805 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24806 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24807 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24808 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24809 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24810 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24811 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24812 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24814 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24815 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24816 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24817 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24819 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24820 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24821 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24822 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24824 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24826 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24827 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24828 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24829 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24830 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24832 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24833 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24834 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24836 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24838 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24839 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24840 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24842 /* SSE MMX or 3Dnow!A */
24843 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24844 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24845 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24847 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24848 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24849 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24850 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24852 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24853 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24855 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24858 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24860 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24861 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24862 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24863 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24864 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24865 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24866 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24867 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24868 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24869 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24870 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24871 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24873 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24874 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24875 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24876 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24877 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24878 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24880 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24881 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24882 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24883 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24884 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24886 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24888 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24889 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24890 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24891 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24893 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24894 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24895 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24897 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24898 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24899 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24900 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24901 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24902 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24903 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24904 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24906 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24907 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24908 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24909 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24910 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
24911 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24912 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24913 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24914 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24915 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24916 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24917 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24918 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24919 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24920 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24921 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24922 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24923 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24924 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24925 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24927 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24928 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24929 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24930 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24932 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24933 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24934 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24935 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24937 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24939 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24940 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24941 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24943 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
24945 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24946 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24947 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24948 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24949 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24950 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24951 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24952 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24957 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24963 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24964 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
24966 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24968 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24969 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24977 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24981 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24982 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24983 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24984 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24986 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24987 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24988 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24989 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24990 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24991 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24992 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24993 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24999 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
25002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
25003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
25007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
25008 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
25009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
25010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
25012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25013 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25014 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25015 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25016 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25017 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25018 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25021 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25022 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25023 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25024 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25025 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25026 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25028 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25029 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25030 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25031 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
25034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
25039 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
25040 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
25042 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25045 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25046 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25049 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
25050 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25052 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25053 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25054 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25055 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25056 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25057 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25060 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
25061 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
25062 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25063 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
25064 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
25065 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
25067 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25068 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25069 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25070 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25071 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25072 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25073 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25074 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25075 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25076 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25077 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25078 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25079 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
25080 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
25081 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25082 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25083 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25084 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25085 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25086 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25087 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25088 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25089 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25090 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25093 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
25094 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
25097 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25098 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25099 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
25100 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
25101 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25102 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25103 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25104 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
25105 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
25106 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
25108 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25109 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25110 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25111 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25112 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25113 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25114 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25115 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25116 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25117 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25118 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25119 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25120 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25122 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25123 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25124 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25125 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25126 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25127 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25128 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25129 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25130 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25131 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25132 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25133 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25136 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25137 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25138 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25139 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25141 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
25142 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
25143 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
25144 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
25146 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
25147 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
25148 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
25149 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
25151 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25152 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25153 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25156 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25157 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
25158 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
25159 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25160 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25163 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
25164 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
25165 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
25166 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25169 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
25170 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25172 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25173 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25174 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25175 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25178 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
25181 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25182 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25183 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25184 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25185 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25186 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25187 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25188 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25189 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25190 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25191 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25192 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25193 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25194 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25195 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25196 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25197 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25198 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25199 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25200 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25201 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25202 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25203 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25204 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25205 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25206 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25208 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
25209 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
25210 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
25211 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
25213 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25214 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25215 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
25216 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
25217 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25218 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25219 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25220 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25221 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25222 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25223 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25224 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25225 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25226 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
25227 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
25228 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
25229 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
25230 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
25231 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
25232 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25233 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
25234 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25235 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25236 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25237 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25238 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25239 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
25240 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25241 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25242 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25243 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25244 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
25245 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
25246 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
25248 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25249 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25250 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25252 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25253 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25254 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25255 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25256 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25258 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25260 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25261 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25263 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
25264 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
25265 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
25266 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
25268 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
25269 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
25270 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
25271 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
25273 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25274 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25275 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25276 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25278 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
25279 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
25280 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
25281 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
25282 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
25283 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
25285 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25286 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25287 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25288 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25289 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25290 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25291 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25292 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25293 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25294 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25295 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25296 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25297 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25298 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25299 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25301 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
25302 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
25304 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25305 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25307 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25310 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25311 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25312 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25315 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25316 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25319 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
25320 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
25321 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
25322 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
25325 /* FMA4 and XOP. */
25326 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
25327 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
25328 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
25329 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
25330 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
25331 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
25332 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
25333 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
25334 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
25335 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
25336 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
25337 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
25338 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
25339 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
25340 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
25341 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
25342 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
25343 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
25344 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
25345 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
25346 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
25347 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
25348 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
25349 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
25350 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
25351 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
25352 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
25353 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
25354 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
25355 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
25356 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
25357 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
25358 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
25359 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
25360 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
25361 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25362 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25363 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25364 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25365 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25366 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25367 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25368 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25369 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25370 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25371 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25372 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25373 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25374 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25375 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25376 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25377 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25379 static const struct builtin_description bdesc_multi_arg[] =
25381 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25382 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25383 UNKNOWN, (int)MULTI_ARG_3_SF },
25384 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25385 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25386 UNKNOWN, (int)MULTI_ARG_3_DF },
25388 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
25389 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
25390 UNKNOWN, (int)MULTI_ARG_3_SF },
25391 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
25392 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
25393 UNKNOWN, (int)MULTI_ARG_3_DF },
25394 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
25395 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
25396 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25397 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
25398 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
25399 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25401 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
25402 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
25403 UNKNOWN, (int)MULTI_ARG_3_SF },
25404 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
25405 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
25406 UNKNOWN, (int)MULTI_ARG_3_DF },
25407 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
25408 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
25409 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25410 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
25411 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
25412 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
25415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
25416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
25417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
25418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
25419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
25420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25446 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
25447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
25448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
25449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
25450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25453 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25574 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25575 in the current target ISA to allow the user to compile particular modules
25576 with different target specific options that differ from the command line
25579 ix86_init_mmx_sse_builtins (void)
25581 const struct builtin_description * d;
25582 enum ix86_builtin_func_type ftype;
25585 /* Add all special builtins with variable number of operands. */
25586 for (i = 0, d = bdesc_special_args;
25587 i < ARRAY_SIZE (bdesc_special_args);
25593 ftype = (enum ix86_builtin_func_type) d->flag;
25594 def_builtin (d->mask, d->name, ftype, d->code);
25597 /* Add all builtins with variable number of operands. */
25598 for (i = 0, d = bdesc_args;
25599 i < ARRAY_SIZE (bdesc_args);
25605 ftype = (enum ix86_builtin_func_type) d->flag;
25606 def_builtin_const (d->mask, d->name, ftype, d->code);
25609 /* pcmpestr[im] insns. */
25610 for (i = 0, d = bdesc_pcmpestr;
25611 i < ARRAY_SIZE (bdesc_pcmpestr);
25614 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25615 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25617 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25618 def_builtin_const (d->mask, d->name, ftype, d->code);
25621 /* pcmpistr[im] insns. */
25622 for (i = 0, d = bdesc_pcmpistr;
25623 i < ARRAY_SIZE (bdesc_pcmpistr);
25626 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25627 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25629 ftype = INT_FTYPE_V16QI_V16QI_INT;
25630 def_builtin_const (d->mask, d->name, ftype, d->code);
25633 /* comi/ucomi insns. */
25634 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25636 if (d->mask == OPTION_MASK_ISA_SSE2)
25637 ftype = INT_FTYPE_V2DF_V2DF;
25639 ftype = INT_FTYPE_V4SF_V4SF;
25640 def_builtin_const (d->mask, d->name, ftype, d->code);
25644 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25645 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25646 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25647 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25649 /* SSE or 3DNow!A */
25650 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25651 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25652 IX86_BUILTIN_MASKMOVQ);
25655 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25656 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25658 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25659 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25660 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25661 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25664 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25665 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25666 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25667 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25670 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25671 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25672 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25673 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25674 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25675 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25676 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25677 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25678 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25679 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25680 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25681 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25684 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25685 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25688 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
25689 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
25690 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
25691 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
25692 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
25693 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
25694 IX86_BUILTIN_RDRAND64_STEP);
25696 /* MMX access to the vec_init patterns. */
25697 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25698 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25700 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25701 V4HI_FTYPE_HI_HI_HI_HI,
25702 IX86_BUILTIN_VEC_INIT_V4HI);
25704 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25705 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25706 IX86_BUILTIN_VEC_INIT_V8QI);
25708 /* Access to the vec_extract patterns. */
25709 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25710 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25711 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25712 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25713 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25714 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25715 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25716 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25717 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25718 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25720 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25721 "__builtin_ia32_vec_ext_v4hi",
25722 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25724 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25725 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25727 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25728 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25730 /* Access to the vec_set patterns. */
25731 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25732 "__builtin_ia32_vec_set_v2di",
25733 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25735 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25736 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25738 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25739 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25741 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25742 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25744 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25745 "__builtin_ia32_vec_set_v4hi",
25746 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25748 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25749 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25751 /* Add FMA4 multi-arg argument instructions */
25752 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25757 ftype = (enum ix86_builtin_func_type) d->flag;
25758 def_builtin_const (d->mask, d->name, ftype, d->code);
25762 /* Internal method for ix86_init_builtins. */
25765 ix86_init_builtins_va_builtins_abi (void)
25767 tree ms_va_ref, sysv_va_ref;
25768 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25769 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25770 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25771 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25775 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25776 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25777 ms_va_ref = build_reference_type (ms_va_list_type_node);
25779 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25782 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25783 fnvoid_va_start_ms =
25784 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25785 fnvoid_va_end_sysv =
25786 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25787 fnvoid_va_start_sysv =
25788 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25790 fnvoid_va_copy_ms =
25791 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25793 fnvoid_va_copy_sysv =
25794 build_function_type_list (void_type_node, sysv_va_ref,
25795 sysv_va_ref, NULL_TREE);
25797 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25798 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25799 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25800 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25801 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25802 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25803 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25804 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25805 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25806 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25807 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25808 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25812 ix86_init_builtin_types (void)
25814 tree float128_type_node, float80_type_node;
25816 /* The __float80 type. */
25817 float80_type_node = long_double_type_node;
25818 if (TYPE_MODE (float80_type_node) != XFmode)
25820 /* The __float80 type. */
25821 float80_type_node = make_node (REAL_TYPE);
25823 TYPE_PRECISION (float80_type_node) = 80;
25824 layout_type (float80_type_node);
25826 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25828 /* The __float128 type. */
25829 float128_type_node = make_node (REAL_TYPE);
25830 TYPE_PRECISION (float128_type_node) = 128;
25831 layout_type (float128_type_node);
25832 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25834 /* This macro is built by i386-builtin-types.awk. */
25835 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25839 ix86_init_builtins (void)
25843 ix86_init_builtin_types ();
25845 /* TFmode support builtins. */
25846 def_builtin_const (0, "__builtin_infq",
25847 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25848 def_builtin_const (0, "__builtin_huge_valq",
25849 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25851 /* We will expand them to normal call if SSE2 isn't available since
25852 they are used by libgcc. */
25853 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25854 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25855 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25856 TREE_READONLY (t) = 1;
25857 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25859 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25860 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25861 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25862 TREE_READONLY (t) = 1;
25863 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25865 ix86_init_mmx_sse_builtins ();
25868 ix86_init_builtins_va_builtins_abi ();
25870 #ifdef SUBTARGET_INIT_BUILTINS
25871 SUBTARGET_INIT_BUILTINS;
25875 /* Return the ix86 builtin for CODE. */
25878 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25880 if (code >= IX86_BUILTIN_MAX)
25881 return error_mark_node;
25883 return ix86_builtins[code];
25886 /* Errors in the source file can cause expand_expr to return const0_rtx
25887 where we expect a vector. To avoid crashing, use one of the vector
25888 clear instructions. */
25890 safe_vector_operand (rtx x, enum machine_mode mode)
25892 if (x == const0_rtx)
25893 x = CONST0_RTX (mode);
25897 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25900 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
25903 tree arg0 = CALL_EXPR_ARG (exp, 0);
25904 tree arg1 = CALL_EXPR_ARG (exp, 1);
25905 rtx op0 = expand_normal (arg0);
25906 rtx op1 = expand_normal (arg1);
25907 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25908 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25909 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25911 if (VECTOR_MODE_P (mode0))
25912 op0 = safe_vector_operand (op0, mode0);
25913 if (VECTOR_MODE_P (mode1))
25914 op1 = safe_vector_operand (op1, mode1);
25916 if (optimize || !target
25917 || GET_MODE (target) != tmode
25918 || !insn_data[icode].operand[0].predicate (target, tmode))
25919 target = gen_reg_rtx (tmode);
25921 if (GET_MODE (op1) == SImode && mode1 == TImode)
25923 rtx x = gen_reg_rtx (V4SImode);
25924 emit_insn (gen_sse2_loadd (x, op1));
25925 op1 = gen_lowpart (TImode, x);
25928 if (!insn_data[icode].operand[1].predicate (op0, mode0))
25929 op0 = copy_to_mode_reg (mode0, op0);
25930 if (!insn_data[icode].operand[2].predicate (op1, mode1))
25931 op1 = copy_to_mode_reg (mode1, op1);
25933 pat = GEN_FCN (icode) (target, op0, op1);
25942 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
25945 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
25946 enum ix86_builtin_func_type m_type,
25947 enum rtx_code sub_code)
25952 bool comparison_p = false;
25954 bool last_arg_constant = false;
25955 int num_memory = 0;
25958 enum machine_mode mode;
25961 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25965 case MULTI_ARG_4_DF2_DI_I:
25966 case MULTI_ARG_4_DF2_DI_I1:
25967 case MULTI_ARG_4_SF2_SI_I:
25968 case MULTI_ARG_4_SF2_SI_I1:
25970 last_arg_constant = true;
25973 case MULTI_ARG_3_SF:
25974 case MULTI_ARG_3_DF:
25975 case MULTI_ARG_3_SF2:
25976 case MULTI_ARG_3_DF2:
25977 case MULTI_ARG_3_DI:
25978 case MULTI_ARG_3_SI:
25979 case MULTI_ARG_3_SI_DI:
25980 case MULTI_ARG_3_HI:
25981 case MULTI_ARG_3_HI_SI:
25982 case MULTI_ARG_3_QI:
25983 case MULTI_ARG_3_DI2:
25984 case MULTI_ARG_3_SI2:
25985 case MULTI_ARG_3_HI2:
25986 case MULTI_ARG_3_QI2:
25990 case MULTI_ARG_2_SF:
25991 case MULTI_ARG_2_DF:
25992 case MULTI_ARG_2_DI:
25993 case MULTI_ARG_2_SI:
25994 case MULTI_ARG_2_HI:
25995 case MULTI_ARG_2_QI:
25999 case MULTI_ARG_2_DI_IMM:
26000 case MULTI_ARG_2_SI_IMM:
26001 case MULTI_ARG_2_HI_IMM:
26002 case MULTI_ARG_2_QI_IMM:
26004 last_arg_constant = true;
26007 case MULTI_ARG_1_SF:
26008 case MULTI_ARG_1_DF:
26009 case MULTI_ARG_1_SF2:
26010 case MULTI_ARG_1_DF2:
26011 case MULTI_ARG_1_DI:
26012 case MULTI_ARG_1_SI:
26013 case MULTI_ARG_1_HI:
26014 case MULTI_ARG_1_QI:
26015 case MULTI_ARG_1_SI_DI:
26016 case MULTI_ARG_1_HI_DI:
26017 case MULTI_ARG_1_HI_SI:
26018 case MULTI_ARG_1_QI_DI:
26019 case MULTI_ARG_1_QI_SI:
26020 case MULTI_ARG_1_QI_HI:
26024 case MULTI_ARG_2_DI_CMP:
26025 case MULTI_ARG_2_SI_CMP:
26026 case MULTI_ARG_2_HI_CMP:
26027 case MULTI_ARG_2_QI_CMP:
26029 comparison_p = true;
26032 case MULTI_ARG_2_SF_TF:
26033 case MULTI_ARG_2_DF_TF:
26034 case MULTI_ARG_2_DI_TF:
26035 case MULTI_ARG_2_SI_TF:
26036 case MULTI_ARG_2_HI_TF:
26037 case MULTI_ARG_2_QI_TF:
26043 gcc_unreachable ();
26046 if (optimize || !target
26047 || GET_MODE (target) != tmode
26048 || !insn_data[icode].operand[0].predicate (target, tmode))
26049 target = gen_reg_rtx (tmode);
26051 gcc_assert (nargs <= 4);
26053 for (i = 0; i < nargs; i++)
26055 tree arg = CALL_EXPR_ARG (exp, i);
26056 rtx op = expand_normal (arg);
26057 int adjust = (comparison_p) ? 1 : 0;
26058 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
26060 if (last_arg_constant && i == nargs-1)
26062 if (!CONST_INT_P (op))
26064 error ("last argument must be an immediate");
26065 return gen_reg_rtx (tmode);
26070 if (VECTOR_MODE_P (mode))
26071 op = safe_vector_operand (op, mode);
26073 /* If we aren't optimizing, only allow one memory operand to be
26075 if (memory_operand (op, mode))
26078 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
26081 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
26083 op = force_reg (mode, op);
26087 args[i].mode = mode;
26093 pat = GEN_FCN (icode) (target, args[0].op);
26098 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
26099 GEN_INT ((int)sub_code));
26100 else if (! comparison_p)
26101 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26104 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
26108 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
26113 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26117 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
26121 gcc_unreachable ();
26131 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
26132 insns with vec_merge. */
26135 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
26139 tree arg0 = CALL_EXPR_ARG (exp, 0);
26140 rtx op1, op0 = expand_normal (arg0);
26141 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26142 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26144 if (optimize || !target
26145 || GET_MODE (target) != tmode
26146 || !insn_data[icode].operand[0].predicate (target, tmode))
26147 target = gen_reg_rtx (tmode);
26149 if (VECTOR_MODE_P (mode0))
26150 op0 = safe_vector_operand (op0, mode0);
26152 if ((optimize && !register_operand (op0, mode0))
26153 || !insn_data[icode].operand[1].predicate (op0, mode0))
26154 op0 = copy_to_mode_reg (mode0, op0);
26157 if (!insn_data[icode].operand[2].predicate (op1, mode0))
26158 op1 = copy_to_mode_reg (mode0, op1);
26160 pat = GEN_FCN (icode) (target, op0, op1);
26167 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
26170 ix86_expand_sse_compare (const struct builtin_description *d,
26171 tree exp, rtx target, bool swap)
26174 tree arg0 = CALL_EXPR_ARG (exp, 0);
26175 tree arg1 = CALL_EXPR_ARG (exp, 1);
26176 rtx op0 = expand_normal (arg0);
26177 rtx op1 = expand_normal (arg1);
26179 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26180 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26181 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
26182 enum rtx_code comparison = d->comparison;
26184 if (VECTOR_MODE_P (mode0))
26185 op0 = safe_vector_operand (op0, mode0);
26186 if (VECTOR_MODE_P (mode1))
26187 op1 = safe_vector_operand (op1, mode1);
26189 /* Swap operands if we have a comparison that isn't available in
26193 rtx tmp = gen_reg_rtx (mode1);
26194 emit_move_insn (tmp, op1);
26199 if (optimize || !target
26200 || GET_MODE (target) != tmode
26201 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26202 target = gen_reg_rtx (tmode);
26204 if ((optimize && !register_operand (op0, mode0))
26205 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
26206 op0 = copy_to_mode_reg (mode0, op0);
26207 if ((optimize && !register_operand (op1, mode1))
26208 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
26209 op1 = copy_to_mode_reg (mode1, op1);
26211 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
26212 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
26219 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
26222 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
26226 tree arg0 = CALL_EXPR_ARG (exp, 0);
26227 tree arg1 = CALL_EXPR_ARG (exp, 1);
26228 rtx op0 = expand_normal (arg0);
26229 rtx op1 = expand_normal (arg1);
26230 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26231 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26232 enum rtx_code comparison = d->comparison;
26234 if (VECTOR_MODE_P (mode0))
26235 op0 = safe_vector_operand (op0, mode0);
26236 if (VECTOR_MODE_P (mode1))
26237 op1 = safe_vector_operand (op1, mode1);
26239 /* Swap operands if we have a comparison that isn't available in
26241 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
26248 target = gen_reg_rtx (SImode);
26249 emit_move_insn (target, const0_rtx);
26250 target = gen_rtx_SUBREG (QImode, target, 0);
26252 if ((optimize && !register_operand (op0, mode0))
26253 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26254 op0 = copy_to_mode_reg (mode0, op0);
26255 if ((optimize && !register_operand (op1, mode1))
26256 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26257 op1 = copy_to_mode_reg (mode1, op1);
26259 pat = GEN_FCN (d->icode) (op0, op1);
26263 emit_insn (gen_rtx_SET (VOIDmode,
26264 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26265 gen_rtx_fmt_ee (comparison, QImode,
26269 return SUBREG_REG (target);
26272 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
26275 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
26279 tree arg0 = CALL_EXPR_ARG (exp, 0);
26280 rtx op1, op0 = expand_normal (arg0);
26281 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26282 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26284 if (optimize || target == 0
26285 || GET_MODE (target) != tmode
26286 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26287 target = gen_reg_rtx (tmode);
26289 if (VECTOR_MODE_P (mode0))
26290 op0 = safe_vector_operand (op0, mode0);
26292 if ((optimize && !register_operand (op0, mode0))
26293 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26294 op0 = copy_to_mode_reg (mode0, op0);
26296 op1 = GEN_INT (d->comparison);
26298 pat = GEN_FCN (d->icode) (target, op0, op1);
26305 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
26308 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
26312 tree arg0 = CALL_EXPR_ARG (exp, 0);
26313 tree arg1 = CALL_EXPR_ARG (exp, 1);
26314 rtx op0 = expand_normal (arg0);
26315 rtx op1 = expand_normal (arg1);
26316 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26317 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26318 enum rtx_code comparison = d->comparison;
26320 if (VECTOR_MODE_P (mode0))
26321 op0 = safe_vector_operand (op0, mode0);
26322 if (VECTOR_MODE_P (mode1))
26323 op1 = safe_vector_operand (op1, mode1);
26325 target = gen_reg_rtx (SImode);
26326 emit_move_insn (target, const0_rtx);
26327 target = gen_rtx_SUBREG (QImode, target, 0);
26329 if ((optimize && !register_operand (op0, mode0))
26330 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26331 op0 = copy_to_mode_reg (mode0, op0);
26332 if ((optimize && !register_operand (op1, mode1))
26333 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26334 op1 = copy_to_mode_reg (mode1, op1);
26336 pat = GEN_FCN (d->icode) (op0, op1);
26340 emit_insn (gen_rtx_SET (VOIDmode,
26341 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26342 gen_rtx_fmt_ee (comparison, QImode,
26346 return SUBREG_REG (target);
26349 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26352 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
26353 tree exp, rtx target)
26356 tree arg0 = CALL_EXPR_ARG (exp, 0);
26357 tree arg1 = CALL_EXPR_ARG (exp, 1);
26358 tree arg2 = CALL_EXPR_ARG (exp, 2);
26359 tree arg3 = CALL_EXPR_ARG (exp, 3);
26360 tree arg4 = CALL_EXPR_ARG (exp, 4);
26361 rtx scratch0, scratch1;
26362 rtx op0 = expand_normal (arg0);
26363 rtx op1 = expand_normal (arg1);
26364 rtx op2 = expand_normal (arg2);
26365 rtx op3 = expand_normal (arg3);
26366 rtx op4 = expand_normal (arg4);
26367 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
26369 tmode0 = insn_data[d->icode].operand[0].mode;
26370 tmode1 = insn_data[d->icode].operand[1].mode;
26371 modev2 = insn_data[d->icode].operand[2].mode;
26372 modei3 = insn_data[d->icode].operand[3].mode;
26373 modev4 = insn_data[d->icode].operand[4].mode;
26374 modei5 = insn_data[d->icode].operand[5].mode;
26375 modeimm = insn_data[d->icode].operand[6].mode;
26377 if (VECTOR_MODE_P (modev2))
26378 op0 = safe_vector_operand (op0, modev2);
26379 if (VECTOR_MODE_P (modev4))
26380 op2 = safe_vector_operand (op2, modev4);
26382 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26383 op0 = copy_to_mode_reg (modev2, op0);
26384 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
26385 op1 = copy_to_mode_reg (modei3, op1);
26386 if ((optimize && !register_operand (op2, modev4))
26387 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
26388 op2 = copy_to_mode_reg (modev4, op2);
26389 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
26390 op3 = copy_to_mode_reg (modei5, op3);
26392 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
26394 error ("the fifth argument must be a 8-bit immediate");
26398 if (d->code == IX86_BUILTIN_PCMPESTRI128)
26400 if (optimize || !target
26401 || GET_MODE (target) != tmode0
26402 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26403 target = gen_reg_rtx (tmode0);
26405 scratch1 = gen_reg_rtx (tmode1);
26407 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
26409 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
26411 if (optimize || !target
26412 || GET_MODE (target) != tmode1
26413 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26414 target = gen_reg_rtx (tmode1);
26416 scratch0 = gen_reg_rtx (tmode0);
26418 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
26422 gcc_assert (d->flag);
26424 scratch0 = gen_reg_rtx (tmode0);
26425 scratch1 = gen_reg_rtx (tmode1);
26427 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
26437 target = gen_reg_rtx (SImode);
26438 emit_move_insn (target, const0_rtx);
26439 target = gen_rtx_SUBREG (QImode, target, 0);
26442 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26443 gen_rtx_fmt_ee (EQ, QImode,
26444 gen_rtx_REG ((enum machine_mode) d->flag,
26447 return SUBREG_REG (target);
26454 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26457 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
26458 tree exp, rtx target)
26461 tree arg0 = CALL_EXPR_ARG (exp, 0);
26462 tree arg1 = CALL_EXPR_ARG (exp, 1);
26463 tree arg2 = CALL_EXPR_ARG (exp, 2);
26464 rtx scratch0, scratch1;
26465 rtx op0 = expand_normal (arg0);
26466 rtx op1 = expand_normal (arg1);
26467 rtx op2 = expand_normal (arg2);
26468 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26470 tmode0 = insn_data[d->icode].operand[0].mode;
26471 tmode1 = insn_data[d->icode].operand[1].mode;
26472 modev2 = insn_data[d->icode].operand[2].mode;
26473 modev3 = insn_data[d->icode].operand[3].mode;
26474 modeimm = insn_data[d->icode].operand[4].mode;
26476 if (VECTOR_MODE_P (modev2))
26477 op0 = safe_vector_operand (op0, modev2);
26478 if (VECTOR_MODE_P (modev3))
26479 op1 = safe_vector_operand (op1, modev3);
26481 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26482 op0 = copy_to_mode_reg (modev2, op0);
26483 if ((optimize && !register_operand (op1, modev3))
26484 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26485 op1 = copy_to_mode_reg (modev3, op1);
26487 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26489 error ("the third argument must be a 8-bit immediate");
26493 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26495 if (optimize || !target
26496 || GET_MODE (target) != tmode0
26497 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26498 target = gen_reg_rtx (tmode0);
26500 scratch1 = gen_reg_rtx (tmode1);
26502 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26504 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26506 if (optimize || !target
26507 || GET_MODE (target) != tmode1
26508 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26509 target = gen_reg_rtx (tmode1);
26511 scratch0 = gen_reg_rtx (tmode0);
26513 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26517 gcc_assert (d->flag);
26519 scratch0 = gen_reg_rtx (tmode0);
26520 scratch1 = gen_reg_rtx (tmode1);
26522 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26532 target = gen_reg_rtx (SImode);
26533 emit_move_insn (target, const0_rtx);
26534 target = gen_rtx_SUBREG (QImode, target, 0);
26537 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26538 gen_rtx_fmt_ee (EQ, QImode,
26539 gen_rtx_REG ((enum machine_mode) d->flag,
26542 return SUBREG_REG (target);
26548 /* Subroutine of ix86_expand_builtin to take care of insns with
26549 variable number of operands. */
26552 ix86_expand_args_builtin (const struct builtin_description *d,
26553 tree exp, rtx target)
26555 rtx pat, real_target;
26556 unsigned int i, nargs;
26557 unsigned int nargs_constant = 0;
26558 int num_memory = 0;
26562 enum machine_mode mode;
26564 bool last_arg_count = false;
26565 enum insn_code icode = d->icode;
26566 const struct insn_data_d *insn_p = &insn_data[icode];
26567 enum machine_mode tmode = insn_p->operand[0].mode;
26568 enum machine_mode rmode = VOIDmode;
26570 enum rtx_code comparison = d->comparison;
26572 switch ((enum ix86_builtin_func_type) d->flag)
26574 case V2DF_FTYPE_V2DF_ROUND:
26575 case V4DF_FTYPE_V4DF_ROUND:
26576 case V4SF_FTYPE_V4SF_ROUND:
26577 case V8SF_FTYPE_V8SF_ROUND:
26578 return ix86_expand_sse_round (d, exp, target);
26579 case INT_FTYPE_V8SF_V8SF_PTEST:
26580 case INT_FTYPE_V4DI_V4DI_PTEST:
26581 case INT_FTYPE_V4DF_V4DF_PTEST:
26582 case INT_FTYPE_V4SF_V4SF_PTEST:
26583 case INT_FTYPE_V2DI_V2DI_PTEST:
26584 case INT_FTYPE_V2DF_V2DF_PTEST:
26585 return ix86_expand_sse_ptest (d, exp, target);
26586 case FLOAT128_FTYPE_FLOAT128:
26587 case FLOAT_FTYPE_FLOAT:
26588 case INT_FTYPE_INT:
26589 case UINT64_FTYPE_INT:
26590 case UINT16_FTYPE_UINT16:
26591 case INT64_FTYPE_INT64:
26592 case INT64_FTYPE_V4SF:
26593 case INT64_FTYPE_V2DF:
26594 case INT_FTYPE_V16QI:
26595 case INT_FTYPE_V8QI:
26596 case INT_FTYPE_V8SF:
26597 case INT_FTYPE_V4DF:
26598 case INT_FTYPE_V4SF:
26599 case INT_FTYPE_V2DF:
26600 case V16QI_FTYPE_V16QI:
26601 case V8SI_FTYPE_V8SF:
26602 case V8SI_FTYPE_V4SI:
26603 case V8HI_FTYPE_V8HI:
26604 case V8HI_FTYPE_V16QI:
26605 case V8QI_FTYPE_V8QI:
26606 case V8SF_FTYPE_V8SF:
26607 case V8SF_FTYPE_V8SI:
26608 case V8SF_FTYPE_V4SF:
26609 case V8SF_FTYPE_V8HI:
26610 case V4SI_FTYPE_V4SI:
26611 case V4SI_FTYPE_V16QI:
26612 case V4SI_FTYPE_V4SF:
26613 case V4SI_FTYPE_V8SI:
26614 case V4SI_FTYPE_V8HI:
26615 case V4SI_FTYPE_V4DF:
26616 case V4SI_FTYPE_V2DF:
26617 case V4HI_FTYPE_V4HI:
26618 case V4DF_FTYPE_V4DF:
26619 case V4DF_FTYPE_V4SI:
26620 case V4DF_FTYPE_V4SF:
26621 case V4DF_FTYPE_V2DF:
26622 case V4SF_FTYPE_V4SF:
26623 case V4SF_FTYPE_V4SI:
26624 case V4SF_FTYPE_V8SF:
26625 case V4SF_FTYPE_V4DF:
26626 case V4SF_FTYPE_V8HI:
26627 case V4SF_FTYPE_V2DF:
26628 case V2DI_FTYPE_V2DI:
26629 case V2DI_FTYPE_V16QI:
26630 case V2DI_FTYPE_V8HI:
26631 case V2DI_FTYPE_V4SI:
26632 case V2DF_FTYPE_V2DF:
26633 case V2DF_FTYPE_V4SI:
26634 case V2DF_FTYPE_V4DF:
26635 case V2DF_FTYPE_V4SF:
26636 case V2DF_FTYPE_V2SI:
26637 case V2SI_FTYPE_V2SI:
26638 case V2SI_FTYPE_V4SF:
26639 case V2SI_FTYPE_V2SF:
26640 case V2SI_FTYPE_V2DF:
26641 case V2SF_FTYPE_V2SF:
26642 case V2SF_FTYPE_V2SI:
26645 case V4SF_FTYPE_V4SF_VEC_MERGE:
26646 case V2DF_FTYPE_V2DF_VEC_MERGE:
26647 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26648 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26649 case V16QI_FTYPE_V16QI_V16QI:
26650 case V16QI_FTYPE_V8HI_V8HI:
26651 case V8QI_FTYPE_V8QI_V8QI:
26652 case V8QI_FTYPE_V4HI_V4HI:
26653 case V8HI_FTYPE_V8HI_V8HI:
26654 case V8HI_FTYPE_V16QI_V16QI:
26655 case V8HI_FTYPE_V4SI_V4SI:
26656 case V8SF_FTYPE_V8SF_V8SF:
26657 case V8SF_FTYPE_V8SF_V8SI:
26658 case V4SI_FTYPE_V4SI_V4SI:
26659 case V4SI_FTYPE_V8HI_V8HI:
26660 case V4SI_FTYPE_V4SF_V4SF:
26661 case V4SI_FTYPE_V2DF_V2DF:
26662 case V4HI_FTYPE_V4HI_V4HI:
26663 case V4HI_FTYPE_V8QI_V8QI:
26664 case V4HI_FTYPE_V2SI_V2SI:
26665 case V4DF_FTYPE_V4DF_V4DF:
26666 case V4DF_FTYPE_V4DF_V4DI:
26667 case V4SF_FTYPE_V4SF_V4SF:
26668 case V4SF_FTYPE_V4SF_V4SI:
26669 case V4SF_FTYPE_V4SF_V2SI:
26670 case V4SF_FTYPE_V4SF_V2DF:
26671 case V4SF_FTYPE_V4SF_DI:
26672 case V4SF_FTYPE_V4SF_SI:
26673 case V2DI_FTYPE_V2DI_V2DI:
26674 case V2DI_FTYPE_V16QI_V16QI:
26675 case V2DI_FTYPE_V4SI_V4SI:
26676 case V2DI_FTYPE_V2DI_V16QI:
26677 case V2DI_FTYPE_V2DF_V2DF:
26678 case V2SI_FTYPE_V2SI_V2SI:
26679 case V2SI_FTYPE_V4HI_V4HI:
26680 case V2SI_FTYPE_V2SF_V2SF:
26681 case V2DF_FTYPE_V2DF_V2DF:
26682 case V2DF_FTYPE_V2DF_V4SF:
26683 case V2DF_FTYPE_V2DF_V2DI:
26684 case V2DF_FTYPE_V2DF_DI:
26685 case V2DF_FTYPE_V2DF_SI:
26686 case V2SF_FTYPE_V2SF_V2SF:
26687 case V1DI_FTYPE_V1DI_V1DI:
26688 case V1DI_FTYPE_V8QI_V8QI:
26689 case V1DI_FTYPE_V2SI_V2SI:
26690 if (comparison == UNKNOWN)
26691 return ix86_expand_binop_builtin (icode, exp, target);
26694 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26695 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26696 gcc_assert (comparison != UNKNOWN);
26700 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26701 case V8HI_FTYPE_V8HI_SI_COUNT:
26702 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26703 case V4SI_FTYPE_V4SI_SI_COUNT:
26704 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26705 case V4HI_FTYPE_V4HI_SI_COUNT:
26706 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26707 case V2DI_FTYPE_V2DI_SI_COUNT:
26708 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26709 case V2SI_FTYPE_V2SI_SI_COUNT:
26710 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26711 case V1DI_FTYPE_V1DI_SI_COUNT:
26713 last_arg_count = true;
26715 case UINT64_FTYPE_UINT64_UINT64:
26716 case UINT_FTYPE_UINT_UINT:
26717 case UINT_FTYPE_UINT_USHORT:
26718 case UINT_FTYPE_UINT_UCHAR:
26719 case UINT16_FTYPE_UINT16_INT:
26720 case UINT8_FTYPE_UINT8_INT:
26723 case V2DI_FTYPE_V2DI_INT_CONVERT:
26726 nargs_constant = 1;
26728 case V8HI_FTYPE_V8HI_INT:
26729 case V8HI_FTYPE_V8SF_INT:
26730 case V8HI_FTYPE_V4SF_INT:
26731 case V8SF_FTYPE_V8SF_INT:
26732 case V4SI_FTYPE_V4SI_INT:
26733 case V4SI_FTYPE_V8SI_INT:
26734 case V4HI_FTYPE_V4HI_INT:
26735 case V4DF_FTYPE_V4DF_INT:
26736 case V4SF_FTYPE_V4SF_INT:
26737 case V4SF_FTYPE_V8SF_INT:
26738 case V2DI_FTYPE_V2DI_INT:
26739 case V2DF_FTYPE_V2DF_INT:
26740 case V2DF_FTYPE_V4DF_INT:
26742 nargs_constant = 1;
26744 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26745 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26746 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26747 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26748 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26751 case V16QI_FTYPE_V16QI_V16QI_INT:
26752 case V8HI_FTYPE_V8HI_V8HI_INT:
26753 case V8SI_FTYPE_V8SI_V8SI_INT:
26754 case V8SI_FTYPE_V8SI_V4SI_INT:
26755 case V8SF_FTYPE_V8SF_V8SF_INT:
26756 case V8SF_FTYPE_V8SF_V4SF_INT:
26757 case V4SI_FTYPE_V4SI_V4SI_INT:
26758 case V4DF_FTYPE_V4DF_V4DF_INT:
26759 case V4DF_FTYPE_V4DF_V2DF_INT:
26760 case V4SF_FTYPE_V4SF_V4SF_INT:
26761 case V2DI_FTYPE_V2DI_V2DI_INT:
26762 case V2DF_FTYPE_V2DF_V2DF_INT:
26764 nargs_constant = 1;
26766 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26769 nargs_constant = 1;
26771 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26774 nargs_constant = 1;
26776 case V2DI_FTYPE_V2DI_UINT_UINT:
26778 nargs_constant = 2;
26780 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26781 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26782 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26783 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26785 nargs_constant = 1;
26787 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26789 nargs_constant = 2;
26792 gcc_unreachable ();
26795 gcc_assert (nargs <= ARRAY_SIZE (args));
26797 if (comparison != UNKNOWN)
26799 gcc_assert (nargs == 2);
26800 return ix86_expand_sse_compare (d, exp, target, swap);
26803 if (rmode == VOIDmode || rmode == tmode)
26807 || GET_MODE (target) != tmode
26808 || !insn_p->operand[0].predicate (target, tmode))
26809 target = gen_reg_rtx (tmode);
26810 real_target = target;
26814 target = gen_reg_rtx (rmode);
26815 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26818 for (i = 0; i < nargs; i++)
26820 tree arg = CALL_EXPR_ARG (exp, i);
26821 rtx op = expand_normal (arg);
26822 enum machine_mode mode = insn_p->operand[i + 1].mode;
26823 bool match = insn_p->operand[i + 1].predicate (op, mode);
26825 if (last_arg_count && (i + 1) == nargs)
26827 /* SIMD shift insns take either an 8-bit immediate or
26828 register as count. But builtin functions take int as
26829 count. If count doesn't match, we put it in register. */
26832 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26833 if (!insn_p->operand[i + 1].predicate (op, mode))
26834 op = copy_to_reg (op);
26837 else if ((nargs - i) <= nargs_constant)
26842 case CODE_FOR_sse4_1_roundpd:
26843 case CODE_FOR_sse4_1_roundps:
26844 case CODE_FOR_sse4_1_roundsd:
26845 case CODE_FOR_sse4_1_roundss:
26846 case CODE_FOR_sse4_1_blendps:
26847 case CODE_FOR_avx_blendpd256:
26848 case CODE_FOR_avx_vpermilv4df:
26849 case CODE_FOR_avx_roundpd256:
26850 case CODE_FOR_avx_roundps256:
26851 error ("the last argument must be a 4-bit immediate");
26854 case CODE_FOR_sse4_1_blendpd:
26855 case CODE_FOR_avx_vpermilv2df:
26856 case CODE_FOR_xop_vpermil2v2df3:
26857 case CODE_FOR_xop_vpermil2v4sf3:
26858 case CODE_FOR_xop_vpermil2v4df3:
26859 case CODE_FOR_xop_vpermil2v8sf3:
26860 error ("the last argument must be a 2-bit immediate");
26863 case CODE_FOR_avx_vextractf128v4df:
26864 case CODE_FOR_avx_vextractf128v8sf:
26865 case CODE_FOR_avx_vextractf128v8si:
26866 case CODE_FOR_avx_vinsertf128v4df:
26867 case CODE_FOR_avx_vinsertf128v8sf:
26868 case CODE_FOR_avx_vinsertf128v8si:
26869 error ("the last argument must be a 1-bit immediate");
26872 case CODE_FOR_avx_cmpsdv2df3:
26873 case CODE_FOR_avx_cmpssv4sf3:
26874 case CODE_FOR_avx_cmppdv2df3:
26875 case CODE_FOR_avx_cmppsv4sf3:
26876 case CODE_FOR_avx_cmppdv4df3:
26877 case CODE_FOR_avx_cmppsv8sf3:
26878 error ("the last argument must be a 5-bit immediate");
26882 switch (nargs_constant)
26885 if ((nargs - i) == nargs_constant)
26887 error ("the next to last argument must be an 8-bit immediate");
26891 error ("the last argument must be an 8-bit immediate");
26894 gcc_unreachable ();
26901 if (VECTOR_MODE_P (mode))
26902 op = safe_vector_operand (op, mode);
26904 /* If we aren't optimizing, only allow one memory operand to
26906 if (memory_operand (op, mode))
26909 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
26911 if (optimize || !match || num_memory > 1)
26912 op = copy_to_mode_reg (mode, op);
26916 op = copy_to_reg (op);
26917 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
26922 args[i].mode = mode;
26928 pat = GEN_FCN (icode) (real_target, args[0].op);
26931 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
26934 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26938 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26939 args[2].op, args[3].op);
26942 gcc_unreachable ();
26952 /* Subroutine of ix86_expand_builtin to take care of special insns
26953 with variable number of operands. */
26956 ix86_expand_special_args_builtin (const struct builtin_description *d,
26957 tree exp, rtx target)
26961 unsigned int i, nargs, arg_adjust, memory;
26965 enum machine_mode mode;
26967 enum insn_code icode = d->icode;
26968 bool last_arg_constant = false;
26969 const struct insn_data_d *insn_p = &insn_data[icode];
26970 enum machine_mode tmode = insn_p->operand[0].mode;
26971 enum { load, store } klass;
26973 switch ((enum ix86_builtin_func_type) d->flag)
26975 case VOID_FTYPE_VOID:
26976 if (icode == CODE_FOR_avx_vzeroupper)
26977 target = GEN_INT (vzeroupper_intrinsic);
26978 emit_insn (GEN_FCN (icode) (target));
26980 case VOID_FTYPE_UINT64:
26981 case VOID_FTYPE_UNSIGNED:
26987 case UINT64_FTYPE_VOID:
26988 case UNSIGNED_FTYPE_VOID:
26993 case UINT64_FTYPE_PUNSIGNED:
26994 case V2DI_FTYPE_PV2DI:
26995 case V32QI_FTYPE_PCCHAR:
26996 case V16QI_FTYPE_PCCHAR:
26997 case V8SF_FTYPE_PCV4SF:
26998 case V8SF_FTYPE_PCFLOAT:
26999 case V4SF_FTYPE_PCFLOAT:
27000 case V4DF_FTYPE_PCV2DF:
27001 case V4DF_FTYPE_PCDOUBLE:
27002 case V2DF_FTYPE_PCDOUBLE:
27003 case VOID_FTYPE_PVOID:
27008 case VOID_FTYPE_PV2SF_V4SF:
27009 case VOID_FTYPE_PV4DI_V4DI:
27010 case VOID_FTYPE_PV2DI_V2DI:
27011 case VOID_FTYPE_PCHAR_V32QI:
27012 case VOID_FTYPE_PCHAR_V16QI:
27013 case VOID_FTYPE_PFLOAT_V8SF:
27014 case VOID_FTYPE_PFLOAT_V4SF:
27015 case VOID_FTYPE_PDOUBLE_V4DF:
27016 case VOID_FTYPE_PDOUBLE_V2DF:
27017 case VOID_FTYPE_PULONGLONG_ULONGLONG:
27018 case VOID_FTYPE_PINT_INT:
27021 /* Reserve memory operand for target. */
27022 memory = ARRAY_SIZE (args);
27024 case V4SF_FTYPE_V4SF_PCV2SF:
27025 case V2DF_FTYPE_V2DF_PCDOUBLE:
27030 case V8SF_FTYPE_PCV8SF_V8SI:
27031 case V4DF_FTYPE_PCV4DF_V4DI:
27032 case V4SF_FTYPE_PCV4SF_V4SI:
27033 case V2DF_FTYPE_PCV2DF_V2DI:
27038 case VOID_FTYPE_PV8SF_V8SI_V8SF:
27039 case VOID_FTYPE_PV4DF_V4DI_V4DF:
27040 case VOID_FTYPE_PV4SF_V4SI_V4SF:
27041 case VOID_FTYPE_PV2DF_V2DI_V2DF:
27044 /* Reserve memory operand for target. */
27045 memory = ARRAY_SIZE (args);
27047 case VOID_FTYPE_UINT_UINT_UINT:
27048 case VOID_FTYPE_UINT64_UINT_UINT:
27049 case UCHAR_FTYPE_UINT_UINT_UINT:
27050 case UCHAR_FTYPE_UINT64_UINT_UINT:
27053 memory = ARRAY_SIZE (args);
27054 last_arg_constant = true;
27057 gcc_unreachable ();
27060 gcc_assert (nargs <= ARRAY_SIZE (args));
27062 if (klass == store)
27064 arg = CALL_EXPR_ARG (exp, 0);
27065 op = expand_normal (arg);
27066 gcc_assert (target == 0);
27068 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
27070 target = force_reg (tmode, op);
27078 || GET_MODE (target) != tmode
27079 || !insn_p->operand[0].predicate (target, tmode))
27080 target = gen_reg_rtx (tmode);
27083 for (i = 0; i < nargs; i++)
27085 enum machine_mode mode = insn_p->operand[i + 1].mode;
27088 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
27089 op = expand_normal (arg);
27090 match = insn_p->operand[i + 1].predicate (op, mode);
27092 if (last_arg_constant && (i + 1) == nargs)
27096 if (icode == CODE_FOR_lwp_lwpvalsi3
27097 || icode == CODE_FOR_lwp_lwpinssi3
27098 || icode == CODE_FOR_lwp_lwpvaldi3
27099 || icode == CODE_FOR_lwp_lwpinsdi3)
27100 error ("the last argument must be a 32-bit immediate");
27102 error ("the last argument must be an 8-bit immediate");
27110 /* This must be the memory operand. */
27111 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
27112 gcc_assert (GET_MODE (op) == mode
27113 || GET_MODE (op) == VOIDmode);
27117 /* This must be register. */
27118 if (VECTOR_MODE_P (mode))
27119 op = safe_vector_operand (op, mode);
27121 gcc_assert (GET_MODE (op) == mode
27122 || GET_MODE (op) == VOIDmode);
27123 op = copy_to_mode_reg (mode, op);
27128 args[i].mode = mode;
27134 pat = GEN_FCN (icode) (target);
27137 pat = GEN_FCN (icode) (target, args[0].op);
27140 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
27143 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
27146 gcc_unreachable ();
27152 return klass == store ? 0 : target;
27155 /* Return the integer constant in ARG. Constrain it to be in the range
27156 of the subparts of VEC_TYPE; issue an error if not. */
27159 get_element_number (tree vec_type, tree arg)
27161 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
27163 if (!host_integerp (arg, 1)
27164 || (elt = tree_low_cst (arg, 1), elt > max))
27166 error ("selector must be an integer constant in the range 0..%wi", max);
27173 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27174 ix86_expand_vector_init. We DO have language-level syntax for this, in
27175 the form of (type){ init-list }. Except that since we can't place emms
27176 instructions from inside the compiler, we can't allow the use of MMX
27177 registers unless the user explicitly asks for it. So we do *not* define
27178 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
27179 we have builtins invoked by mmintrin.h that gives us license to emit
27180 these sorts of instructions. */
27183 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
27185 enum machine_mode tmode = TYPE_MODE (type);
27186 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
27187 int i, n_elt = GET_MODE_NUNITS (tmode);
27188 rtvec v = rtvec_alloc (n_elt);
27190 gcc_assert (VECTOR_MODE_P (tmode));
27191 gcc_assert (call_expr_nargs (exp) == n_elt);
27193 for (i = 0; i < n_elt; ++i)
27195 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
27196 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
27199 if (!target || !register_operand (target, tmode))
27200 target = gen_reg_rtx (tmode);
27202 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
27206 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27207 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
27208 had a language-level syntax for referencing vector elements. */
27211 ix86_expand_vec_ext_builtin (tree exp, rtx target)
27213 enum machine_mode tmode, mode0;
27218 arg0 = CALL_EXPR_ARG (exp, 0);
27219 arg1 = CALL_EXPR_ARG (exp, 1);
27221 op0 = expand_normal (arg0);
27222 elt = get_element_number (TREE_TYPE (arg0), arg1);
27224 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27225 mode0 = TYPE_MODE (TREE_TYPE (arg0));
27226 gcc_assert (VECTOR_MODE_P (mode0));
27228 op0 = force_reg (mode0, op0);
27230 if (optimize || !target || !register_operand (target, tmode))
27231 target = gen_reg_rtx (tmode);
27233 ix86_expand_vector_extract (true, target, op0, elt);
27238 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27239 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
27240 a language-level syntax for referencing vector elements. */
27243 ix86_expand_vec_set_builtin (tree exp)
27245 enum machine_mode tmode, mode1;
27246 tree arg0, arg1, arg2;
27248 rtx op0, op1, target;
27250 arg0 = CALL_EXPR_ARG (exp, 0);
27251 arg1 = CALL_EXPR_ARG (exp, 1);
27252 arg2 = CALL_EXPR_ARG (exp, 2);
27254 tmode = TYPE_MODE (TREE_TYPE (arg0));
27255 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27256 gcc_assert (VECTOR_MODE_P (tmode));
27258 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
27259 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
27260 elt = get_element_number (TREE_TYPE (arg0), arg2);
27262 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
27263 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
27265 op0 = force_reg (tmode, op0);
27266 op1 = force_reg (mode1, op1);
27268 /* OP0 is the source of these builtin functions and shouldn't be
27269 modified. Create a copy, use it and return it as target. */
27270 target = gen_reg_rtx (tmode);
27271 emit_move_insn (target, op0);
27272 ix86_expand_vector_set (true, target, op1, elt);
27277 /* Expand an expression EXP that calls a built-in function,
27278 with result going to TARGET if that's convenient
27279 (and in mode MODE if that's convenient).
27280 SUBTARGET may be used as the target for computing one of EXP's operands.
27281 IGNORE is nonzero if the value is to be ignored. */
27284 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
27285 enum machine_mode mode ATTRIBUTE_UNUSED,
27286 int ignore ATTRIBUTE_UNUSED)
27288 const struct builtin_description *d;
27290 enum insn_code icode;
27291 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
27292 tree arg0, arg1, arg2;
27293 rtx op0, op1, op2, pat;
27294 enum machine_mode mode0, mode1, mode2;
27295 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
27297 /* Determine whether the builtin function is available under the current ISA.
27298 Originally the builtin was not created if it wasn't applicable to the
27299 current ISA based on the command line switches. With function specific
27300 options, we need to check in the context of the function making the call
27301 whether it is supported. */
27302 if (ix86_builtins_isa[fcode].isa
27303 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
27305 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
27306 NULL, NULL, false);
27309 error ("%qE needs unknown isa option", fndecl);
27312 gcc_assert (opts != NULL);
27313 error ("%qE needs isa option %s", fndecl, opts);
27321 case IX86_BUILTIN_MASKMOVQ:
27322 case IX86_BUILTIN_MASKMOVDQU:
27323 icode = (fcode == IX86_BUILTIN_MASKMOVQ
27324 ? CODE_FOR_mmx_maskmovq
27325 : CODE_FOR_sse2_maskmovdqu);
27326 /* Note the arg order is different from the operand order. */
27327 arg1 = CALL_EXPR_ARG (exp, 0);
27328 arg2 = CALL_EXPR_ARG (exp, 1);
27329 arg0 = CALL_EXPR_ARG (exp, 2);
27330 op0 = expand_normal (arg0);
27331 op1 = expand_normal (arg1);
27332 op2 = expand_normal (arg2);
27333 mode0 = insn_data[icode].operand[0].mode;
27334 mode1 = insn_data[icode].operand[1].mode;
27335 mode2 = insn_data[icode].operand[2].mode;
27337 op0 = force_reg (Pmode, op0);
27338 op0 = gen_rtx_MEM (mode1, op0);
27340 if (!insn_data[icode].operand[0].predicate (op0, mode0))
27341 op0 = copy_to_mode_reg (mode0, op0);
27342 if (!insn_data[icode].operand[1].predicate (op1, mode1))
27343 op1 = copy_to_mode_reg (mode1, op1);
27344 if (!insn_data[icode].operand[2].predicate (op2, mode2))
27345 op2 = copy_to_mode_reg (mode2, op2);
27346 pat = GEN_FCN (icode) (op0, op1, op2);
27352 case IX86_BUILTIN_LDMXCSR:
27353 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
27354 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27355 emit_move_insn (target, op0);
27356 emit_insn (gen_sse_ldmxcsr (target));
27359 case IX86_BUILTIN_STMXCSR:
27360 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27361 emit_insn (gen_sse_stmxcsr (target));
27362 return copy_to_mode_reg (SImode, target);
27364 case IX86_BUILTIN_CLFLUSH:
27365 arg0 = CALL_EXPR_ARG (exp, 0);
27366 op0 = expand_normal (arg0);
27367 icode = CODE_FOR_sse2_clflush;
27368 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27369 op0 = copy_to_mode_reg (Pmode, op0);
27371 emit_insn (gen_sse2_clflush (op0));
27374 case IX86_BUILTIN_MONITOR:
27375 arg0 = CALL_EXPR_ARG (exp, 0);
27376 arg1 = CALL_EXPR_ARG (exp, 1);
27377 arg2 = CALL_EXPR_ARG (exp, 2);
27378 op0 = expand_normal (arg0);
27379 op1 = expand_normal (arg1);
27380 op2 = expand_normal (arg2);
27382 op0 = copy_to_mode_reg (Pmode, op0);
27384 op1 = copy_to_mode_reg (SImode, op1);
27386 op2 = copy_to_mode_reg (SImode, op2);
27387 emit_insn (ix86_gen_monitor (op0, op1, op2));
27390 case IX86_BUILTIN_MWAIT:
27391 arg0 = CALL_EXPR_ARG (exp, 0);
27392 arg1 = CALL_EXPR_ARG (exp, 1);
27393 op0 = expand_normal (arg0);
27394 op1 = expand_normal (arg1);
27396 op0 = copy_to_mode_reg (SImode, op0);
27398 op1 = copy_to_mode_reg (SImode, op1);
27399 emit_insn (gen_sse3_mwait (op0, op1));
27402 case IX86_BUILTIN_VEC_INIT_V2SI:
27403 case IX86_BUILTIN_VEC_INIT_V4HI:
27404 case IX86_BUILTIN_VEC_INIT_V8QI:
27405 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
27407 case IX86_BUILTIN_VEC_EXT_V2DF:
27408 case IX86_BUILTIN_VEC_EXT_V2DI:
27409 case IX86_BUILTIN_VEC_EXT_V4SF:
27410 case IX86_BUILTIN_VEC_EXT_V4SI:
27411 case IX86_BUILTIN_VEC_EXT_V8HI:
27412 case IX86_BUILTIN_VEC_EXT_V2SI:
27413 case IX86_BUILTIN_VEC_EXT_V4HI:
27414 case IX86_BUILTIN_VEC_EXT_V16QI:
27415 return ix86_expand_vec_ext_builtin (exp, target);
27417 case IX86_BUILTIN_VEC_SET_V2DI:
27418 case IX86_BUILTIN_VEC_SET_V4SF:
27419 case IX86_BUILTIN_VEC_SET_V4SI:
27420 case IX86_BUILTIN_VEC_SET_V8HI:
27421 case IX86_BUILTIN_VEC_SET_V4HI:
27422 case IX86_BUILTIN_VEC_SET_V16QI:
27423 return ix86_expand_vec_set_builtin (exp);
27425 case IX86_BUILTIN_VEC_PERM_V2DF:
27426 case IX86_BUILTIN_VEC_PERM_V4SF:
27427 case IX86_BUILTIN_VEC_PERM_V2DI:
27428 case IX86_BUILTIN_VEC_PERM_V4SI:
27429 case IX86_BUILTIN_VEC_PERM_V8HI:
27430 case IX86_BUILTIN_VEC_PERM_V16QI:
27431 case IX86_BUILTIN_VEC_PERM_V2DI_U:
27432 case IX86_BUILTIN_VEC_PERM_V4SI_U:
27433 case IX86_BUILTIN_VEC_PERM_V8HI_U:
27434 case IX86_BUILTIN_VEC_PERM_V16QI_U:
27435 case IX86_BUILTIN_VEC_PERM_V4DF:
27436 case IX86_BUILTIN_VEC_PERM_V8SF:
27437 return ix86_expand_vec_perm_builtin (exp);
27439 case IX86_BUILTIN_INFQ:
27440 case IX86_BUILTIN_HUGE_VALQ:
27442 REAL_VALUE_TYPE inf;
27446 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
27448 tmp = validize_mem (force_const_mem (mode, tmp));
27451 target = gen_reg_rtx (mode);
27453 emit_move_insn (target, tmp);
27457 case IX86_BUILTIN_LLWPCB:
27458 arg0 = CALL_EXPR_ARG (exp, 0);
27459 op0 = expand_normal (arg0);
27460 icode = CODE_FOR_lwp_llwpcb;
27461 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27462 op0 = copy_to_mode_reg (Pmode, op0);
27463 emit_insn (gen_lwp_llwpcb (op0));
27466 case IX86_BUILTIN_SLWPCB:
27467 icode = CODE_FOR_lwp_slwpcb;
27469 || !insn_data[icode].operand[0].predicate (target, Pmode))
27470 target = gen_reg_rtx (Pmode);
27471 emit_insn (gen_lwp_slwpcb (target));
27474 case IX86_BUILTIN_BEXTRI32:
27475 case IX86_BUILTIN_BEXTRI64:
27476 arg0 = CALL_EXPR_ARG (exp, 0);
27477 arg1 = CALL_EXPR_ARG (exp, 1);
27478 op0 = expand_normal (arg0);
27479 op1 = expand_normal (arg1);
27480 icode = (fcode == IX86_BUILTIN_BEXTRI32
27481 ? CODE_FOR_tbm_bextri_si
27482 : CODE_FOR_tbm_bextri_di);
27483 if (!CONST_INT_P (op1))
27485 error ("last argument must be an immediate");
27490 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
27491 unsigned char lsb_index = INTVAL (op1) & 0xFF;
27492 op1 = GEN_INT (length);
27493 op2 = GEN_INT (lsb_index);
27494 pat = GEN_FCN (icode) (target, op0, op1, op2);
27500 case IX86_BUILTIN_RDRAND16_STEP:
27501 icode = CODE_FOR_rdrandhi_1;
27505 case IX86_BUILTIN_RDRAND32_STEP:
27506 icode = CODE_FOR_rdrandsi_1;
27510 case IX86_BUILTIN_RDRAND64_STEP:
27511 icode = CODE_FOR_rdranddi_1;
27515 op0 = gen_reg_rtx (mode0);
27516 emit_insn (GEN_FCN (icode) (op0));
27518 op1 = gen_reg_rtx (SImode);
27519 emit_move_insn (op1, CONST1_RTX (SImode));
27521 /* Emit SImode conditional move. */
27522 if (mode0 == HImode)
27524 op2 = gen_reg_rtx (SImode);
27525 emit_insn (gen_zero_extendhisi2 (op2, op0));
27527 else if (mode0 == SImode)
27530 op2 = gen_rtx_SUBREG (SImode, op0, 0);
27532 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
27534 emit_insn (gen_rtx_SET (VOIDmode, op1,
27535 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
27536 emit_move_insn (target, op1);
27538 arg0 = CALL_EXPR_ARG (exp, 0);
27539 op1 = expand_normal (arg0);
27540 if (!address_operand (op1, VOIDmode))
27541 op1 = copy_addr_to_reg (op1);
27542 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
27549 for (i = 0, d = bdesc_special_args;
27550 i < ARRAY_SIZE (bdesc_special_args);
27552 if (d->code == fcode)
27553 return ix86_expand_special_args_builtin (d, exp, target);
27555 for (i = 0, d = bdesc_args;
27556 i < ARRAY_SIZE (bdesc_args);
27558 if (d->code == fcode)
27561 case IX86_BUILTIN_FABSQ:
27562 case IX86_BUILTIN_COPYSIGNQ:
27564 /* Emit a normal call if SSE2 isn't available. */
27565 return expand_call (exp, target, ignore);
27567 return ix86_expand_args_builtin (d, exp, target);
27570 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27571 if (d->code == fcode)
27572 return ix86_expand_sse_comi (d, exp, target);
27574 for (i = 0, d = bdesc_pcmpestr;
27575 i < ARRAY_SIZE (bdesc_pcmpestr);
27577 if (d->code == fcode)
27578 return ix86_expand_sse_pcmpestr (d, exp, target);
27580 for (i = 0, d = bdesc_pcmpistr;
27581 i < ARRAY_SIZE (bdesc_pcmpistr);
27583 if (d->code == fcode)
27584 return ix86_expand_sse_pcmpistr (d, exp, target);
27586 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27587 if (d->code == fcode)
27588 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27589 (enum ix86_builtin_func_type)
27590 d->flag, d->comparison);
27592 gcc_unreachable ();
27595 /* Returns a function decl for a vectorized version of the builtin function
27596 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27597 if it is not available. */
27600 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27603 enum machine_mode in_mode, out_mode;
27605 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27607 if (TREE_CODE (type_out) != VECTOR_TYPE
27608 || TREE_CODE (type_in) != VECTOR_TYPE
27609 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27612 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27613 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27614 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27615 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27619 case BUILT_IN_SQRT:
27620 if (out_mode == DFmode && in_mode == DFmode)
27622 if (out_n == 2 && in_n == 2)
27623 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27624 else if (out_n == 4 && in_n == 4)
27625 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27629 case BUILT_IN_SQRTF:
27630 if (out_mode == SFmode && in_mode == SFmode)
27632 if (out_n == 4 && in_n == 4)
27633 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27634 else if (out_n == 8 && in_n == 8)
27635 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27639 case BUILT_IN_LRINT:
27640 if (out_mode == SImode && out_n == 4
27641 && in_mode == DFmode && in_n == 2)
27642 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27645 case BUILT_IN_LRINTF:
27646 if (out_mode == SImode && in_mode == SFmode)
27648 if (out_n == 4 && in_n == 4)
27649 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27650 else if (out_n == 8 && in_n == 8)
27651 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27655 case BUILT_IN_COPYSIGN:
27656 if (out_mode == DFmode && in_mode == DFmode)
27658 if (out_n == 2 && in_n == 2)
27659 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27660 else if (out_n == 4 && in_n == 4)
27661 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27665 case BUILT_IN_COPYSIGNF:
27666 if (out_mode == SFmode && in_mode == SFmode)
27668 if (out_n == 4 && in_n == 4)
27669 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27670 else if (out_n == 8 && in_n == 8)
27671 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27675 case BUILT_IN_FLOOR:
27676 /* The round insn does not trap on denormals. */
27677 if (flag_trapping_math || !TARGET_ROUND)
27680 if (out_mode == DFmode && in_mode == DFmode)
27682 if (out_n == 2 && in_n == 2)
27683 return ix86_builtins[IX86_BUILTIN_FLOORPD];
27684 else if (out_n == 4 && in_n == 4)
27685 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
27689 case BUILT_IN_FLOORF:
27690 /* The round insn does not trap on denormals. */
27691 if (flag_trapping_math || !TARGET_ROUND)
27694 if (out_mode == SFmode && in_mode == SFmode)
27696 if (out_n == 4 && in_n == 4)
27697 return ix86_builtins[IX86_BUILTIN_FLOORPS];
27698 else if (out_n == 8 && in_n == 8)
27699 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
27703 case BUILT_IN_CEIL:
27704 /* The round insn does not trap on denormals. */
27705 if (flag_trapping_math || !TARGET_ROUND)
27708 if (out_mode == DFmode && in_mode == DFmode)
27710 if (out_n == 2 && in_n == 2)
27711 return ix86_builtins[IX86_BUILTIN_CEILPD];
27712 else if (out_n == 4 && in_n == 4)
27713 return ix86_builtins[IX86_BUILTIN_CEILPD256];
27717 case BUILT_IN_CEILF:
27718 /* The round insn does not trap on denormals. */
27719 if (flag_trapping_math || !TARGET_ROUND)
27722 if (out_mode == SFmode && in_mode == SFmode)
27724 if (out_n == 4 && in_n == 4)
27725 return ix86_builtins[IX86_BUILTIN_CEILPS];
27726 else if (out_n == 8 && in_n == 8)
27727 return ix86_builtins[IX86_BUILTIN_CEILPS256];
27731 case BUILT_IN_TRUNC:
27732 /* The round insn does not trap on denormals. */
27733 if (flag_trapping_math || !TARGET_ROUND)
27736 if (out_mode == DFmode && in_mode == DFmode)
27738 if (out_n == 2 && in_n == 2)
27739 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
27740 else if (out_n == 4 && in_n == 4)
27741 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
27745 case BUILT_IN_TRUNCF:
27746 /* The round insn does not trap on denormals. */
27747 if (flag_trapping_math || !TARGET_ROUND)
27750 if (out_mode == SFmode && in_mode == SFmode)
27752 if (out_n == 4 && in_n == 4)
27753 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
27754 else if (out_n == 8 && in_n == 8)
27755 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
27759 case BUILT_IN_RINT:
27760 /* The round insn does not trap on denormals. */
27761 if (flag_trapping_math || !TARGET_ROUND)
27764 if (out_mode == DFmode && in_mode == DFmode)
27766 if (out_n == 2 && in_n == 2)
27767 return ix86_builtins[IX86_BUILTIN_RINTPD];
27768 else if (out_n == 4 && in_n == 4)
27769 return ix86_builtins[IX86_BUILTIN_RINTPD256];
27773 case BUILT_IN_RINTF:
27774 /* The round insn does not trap on denormals. */
27775 if (flag_trapping_math || !TARGET_ROUND)
27778 if (out_mode == SFmode && in_mode == SFmode)
27780 if (out_n == 4 && in_n == 4)
27781 return ix86_builtins[IX86_BUILTIN_RINTPS];
27782 else if (out_n == 8 && in_n == 8)
27783 return ix86_builtins[IX86_BUILTIN_RINTPS256];
27788 if (out_mode == DFmode && in_mode == DFmode)
27790 if (out_n == 2 && in_n == 2)
27791 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27792 if (out_n == 4 && in_n == 4)
27793 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27797 case BUILT_IN_FMAF:
27798 if (out_mode == SFmode && in_mode == SFmode)
27800 if (out_n == 4 && in_n == 4)
27801 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27802 if (out_n == 8 && in_n == 8)
27803 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27811 /* Dispatch to a handler for a vectorization library. */
27812 if (ix86_veclib_handler)
27813 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27819 /* Handler for an SVML-style interface to
27820 a library with vectorized intrinsics. */
27823 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27826 tree fntype, new_fndecl, args;
27829 enum machine_mode el_mode, in_mode;
27832 /* The SVML is suitable for unsafe math only. */
27833 if (!flag_unsafe_math_optimizations)
27836 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27837 n = TYPE_VECTOR_SUBPARTS (type_out);
27838 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27839 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27840 if (el_mode != in_mode
27848 case BUILT_IN_LOG10:
27850 case BUILT_IN_TANH:
27852 case BUILT_IN_ATAN:
27853 case BUILT_IN_ATAN2:
27854 case BUILT_IN_ATANH:
27855 case BUILT_IN_CBRT:
27856 case BUILT_IN_SINH:
27858 case BUILT_IN_ASINH:
27859 case BUILT_IN_ASIN:
27860 case BUILT_IN_COSH:
27862 case BUILT_IN_ACOSH:
27863 case BUILT_IN_ACOS:
27864 if (el_mode != DFmode || n != 2)
27868 case BUILT_IN_EXPF:
27869 case BUILT_IN_LOGF:
27870 case BUILT_IN_LOG10F:
27871 case BUILT_IN_POWF:
27872 case BUILT_IN_TANHF:
27873 case BUILT_IN_TANF:
27874 case BUILT_IN_ATANF:
27875 case BUILT_IN_ATAN2F:
27876 case BUILT_IN_ATANHF:
27877 case BUILT_IN_CBRTF:
27878 case BUILT_IN_SINHF:
27879 case BUILT_IN_SINF:
27880 case BUILT_IN_ASINHF:
27881 case BUILT_IN_ASINF:
27882 case BUILT_IN_COSHF:
27883 case BUILT_IN_COSF:
27884 case BUILT_IN_ACOSHF:
27885 case BUILT_IN_ACOSF:
27886 if (el_mode != SFmode || n != 4)
27894 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27896 if (fn == BUILT_IN_LOGF)
27897 strcpy (name, "vmlsLn4");
27898 else if (fn == BUILT_IN_LOG)
27899 strcpy (name, "vmldLn2");
27902 sprintf (name, "vmls%s", bname+10);
27903 name[strlen (name)-1] = '4';
27906 sprintf (name, "vmld%s2", bname+10);
27908 /* Convert to uppercase. */
27912 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27913 args = TREE_CHAIN (args))
27917 fntype = build_function_type_list (type_out, type_in, NULL);
27919 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27921 /* Build a function declaration for the vectorized function. */
27922 new_fndecl = build_decl (BUILTINS_LOCATION,
27923 FUNCTION_DECL, get_identifier (name), fntype);
27924 TREE_PUBLIC (new_fndecl) = 1;
27925 DECL_EXTERNAL (new_fndecl) = 1;
27926 DECL_IS_NOVOPS (new_fndecl) = 1;
27927 TREE_READONLY (new_fndecl) = 1;
27932 /* Handler for an ACML-style interface to
27933 a library with vectorized intrinsics. */
27936 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
27938 char name[20] = "__vr.._";
27939 tree fntype, new_fndecl, args;
27942 enum machine_mode el_mode, in_mode;
27945 /* The ACML is 64bits only and suitable for unsafe math only as
27946 it does not correctly support parts of IEEE with the required
27947 precision such as denormals. */
27949 || !flag_unsafe_math_optimizations)
27952 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27953 n = TYPE_VECTOR_SUBPARTS (type_out);
27954 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27955 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27956 if (el_mode != in_mode
27966 case BUILT_IN_LOG2:
27967 case BUILT_IN_LOG10:
27970 if (el_mode != DFmode
27975 case BUILT_IN_SINF:
27976 case BUILT_IN_COSF:
27977 case BUILT_IN_EXPF:
27978 case BUILT_IN_POWF:
27979 case BUILT_IN_LOGF:
27980 case BUILT_IN_LOG2F:
27981 case BUILT_IN_LOG10F:
27984 if (el_mode != SFmode
27993 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27994 sprintf (name + 7, "%s", bname+10);
27997 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27998 args = TREE_CHAIN (args))
28002 fntype = build_function_type_list (type_out, type_in, NULL);
28004 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
28006 /* Build a function declaration for the vectorized function. */
28007 new_fndecl = build_decl (BUILTINS_LOCATION,
28008 FUNCTION_DECL, get_identifier (name), fntype);
28009 TREE_PUBLIC (new_fndecl) = 1;
28010 DECL_EXTERNAL (new_fndecl) = 1;
28011 DECL_IS_NOVOPS (new_fndecl) = 1;
28012 TREE_READONLY (new_fndecl) = 1;
28018 /* Returns a decl of a function that implements conversion of an integer vector
28019 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
28020 are the types involved when converting according to CODE.
28021 Return NULL_TREE if it is not available. */
28024 ix86_vectorize_builtin_conversion (unsigned int code,
28025 tree dest_type, tree src_type)
28033 switch (TYPE_MODE (src_type))
28036 switch (TYPE_MODE (dest_type))
28039 return (TYPE_UNSIGNED (src_type)
28040 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
28041 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
28043 return (TYPE_UNSIGNED (src_type)
28045 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
28051 switch (TYPE_MODE (dest_type))
28054 return (TYPE_UNSIGNED (src_type)
28056 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
28065 case FIX_TRUNC_EXPR:
28066 switch (TYPE_MODE (dest_type))
28069 switch (TYPE_MODE (src_type))
28072 return (TYPE_UNSIGNED (dest_type)
28074 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
28076 return (TYPE_UNSIGNED (dest_type)
28078 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
28085 switch (TYPE_MODE (src_type))
28088 return (TYPE_UNSIGNED (dest_type)
28090 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
28107 /* Returns a code for a target-specific builtin that implements
28108 reciprocal of the function, or NULL_TREE if not available. */
28111 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
28112 bool sqrt ATTRIBUTE_UNUSED)
28114 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
28115 && flag_finite_math_only && !flag_trapping_math
28116 && flag_unsafe_math_optimizations))
28120 /* Machine dependent builtins. */
28123 /* Vectorized version of sqrt to rsqrt conversion. */
28124 case IX86_BUILTIN_SQRTPS_NR:
28125 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
28127 case IX86_BUILTIN_SQRTPS_NR256:
28128 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
28134 /* Normal builtins. */
28137 /* Sqrt to rsqrt conversion. */
28138 case BUILT_IN_SQRTF:
28139 return ix86_builtins[IX86_BUILTIN_RSQRTF];
28146 /* Helper for avx_vpermilps256_operand et al. This is also used by
28147 the expansion functions to turn the parallel back into a mask.
28148 The return value is 0 for no match and the imm8+1 for a match. */
28151 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
28153 unsigned i, nelt = GET_MODE_NUNITS (mode);
28155 unsigned char ipar[8];
28157 if (XVECLEN (par, 0) != (int) nelt)
28160 /* Validate that all of the elements are constants, and not totally
28161 out of range. Copy the data into an integral array to make the
28162 subsequent checks easier. */
28163 for (i = 0; i < nelt; ++i)
28165 rtx er = XVECEXP (par, 0, i);
28166 unsigned HOST_WIDE_INT ei;
28168 if (!CONST_INT_P (er))
28179 /* In the 256-bit DFmode case, we can only move elements within
28181 for (i = 0; i < 2; ++i)
28185 mask |= ipar[i] << i;
28187 for (i = 2; i < 4; ++i)
28191 mask |= (ipar[i] - 2) << i;
28196 /* In the 256-bit SFmode case, we have full freedom of movement
28197 within the low 128-bit lane, but the high 128-bit lane must
28198 mirror the exact same pattern. */
28199 for (i = 0; i < 4; ++i)
28200 if (ipar[i] + 4 != ipar[i + 4])
28207 /* In the 128-bit case, we've full freedom in the placement of
28208 the elements from the source operand. */
28209 for (i = 0; i < nelt; ++i)
28210 mask |= ipar[i] << (i * (nelt / 2));
28214 gcc_unreachable ();
28217 /* Make sure success has a non-zero value by adding one. */
28221 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
28222 the expansion functions to turn the parallel back into a mask.
28223 The return value is 0 for no match and the imm8+1 for a match. */
28226 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
28228 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
28230 unsigned char ipar[8];
28232 if (XVECLEN (par, 0) != (int) nelt)
28235 /* Validate that all of the elements are constants, and not totally
28236 out of range. Copy the data into an integral array to make the
28237 subsequent checks easier. */
28238 for (i = 0; i < nelt; ++i)
28240 rtx er = XVECEXP (par, 0, i);
28241 unsigned HOST_WIDE_INT ei;
28243 if (!CONST_INT_P (er))
28246 if (ei >= 2 * nelt)
28251 /* Validate that the halves of the permute are halves. */
28252 for (i = 0; i < nelt2 - 1; ++i)
28253 if (ipar[i] + 1 != ipar[i + 1])
28255 for (i = nelt2; i < nelt - 1; ++i)
28256 if (ipar[i] + 1 != ipar[i + 1])
28259 /* Reconstruct the mask. */
28260 for (i = 0; i < 2; ++i)
28262 unsigned e = ipar[i * nelt2];
28266 mask |= e << (i * 4);
28269 /* Make sure success has a non-zero value by adding one. */
28274 /* Store OPERAND to the memory after reload is completed. This means
28275 that we can't easily use assign_stack_local. */
28277 ix86_force_to_memory (enum machine_mode mode, rtx operand)
28281 gcc_assert (reload_completed);
28282 if (ix86_using_red_zone ())
28284 result = gen_rtx_MEM (mode,
28285 gen_rtx_PLUS (Pmode,
28287 GEN_INT (-RED_ZONE_SIZE)));
28288 emit_move_insn (result, operand);
28290 else if (TARGET_64BIT)
28296 operand = gen_lowpart (DImode, operand);
28300 gen_rtx_SET (VOIDmode,
28301 gen_rtx_MEM (DImode,
28302 gen_rtx_PRE_DEC (DImode,
28303 stack_pointer_rtx)),
28307 gcc_unreachable ();
28309 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28318 split_double_mode (mode, &operand, 1, operands, operands + 1);
28320 gen_rtx_SET (VOIDmode,
28321 gen_rtx_MEM (SImode,
28322 gen_rtx_PRE_DEC (Pmode,
28323 stack_pointer_rtx)),
28326 gen_rtx_SET (VOIDmode,
28327 gen_rtx_MEM (SImode,
28328 gen_rtx_PRE_DEC (Pmode,
28329 stack_pointer_rtx)),
28334 /* Store HImodes as SImodes. */
28335 operand = gen_lowpart (SImode, operand);
28339 gen_rtx_SET (VOIDmode,
28340 gen_rtx_MEM (GET_MODE (operand),
28341 gen_rtx_PRE_DEC (SImode,
28342 stack_pointer_rtx)),
28346 gcc_unreachable ();
28348 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28353 /* Free operand from the memory. */
28355 ix86_free_from_memory (enum machine_mode mode)
28357 if (!ix86_using_red_zone ())
28361 if (mode == DImode || TARGET_64BIT)
28365 /* Use LEA to deallocate stack space. In peephole2 it will be converted
28366 to pop or add instruction if registers are available. */
28367 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
28368 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28373 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
28374 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
28376 static const reg_class_t *
28377 i386_ira_cover_classes (void)
28379 static const reg_class_t sse_fpmath_classes[] = {
28380 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
28382 static const reg_class_t no_sse_fpmath_classes[] = {
28383 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
28386 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
28389 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
28391 Put float CONST_DOUBLE in the constant pool instead of fp regs.
28392 QImode must go into class Q_REGS.
28393 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
28394 movdf to do mem-to-mem moves through integer regs. */
28397 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
28399 enum machine_mode mode = GET_MODE (x);
28401 /* We're only allowed to return a subclass of CLASS. Many of the
28402 following checks fail for NO_REGS, so eliminate that early. */
28403 if (regclass == NO_REGS)
28406 /* All classes can load zeros. */
28407 if (x == CONST0_RTX (mode))
28410 /* Force constants into memory if we are loading a (nonzero) constant into
28411 an MMX or SSE register. This is because there are no MMX/SSE instructions
28412 to load from a constant. */
28414 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
28417 /* Prefer SSE regs only, if we can use them for math. */
28418 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
28419 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
28421 /* Floating-point constants need more complex checks. */
28422 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
28424 /* General regs can load everything. */
28425 if (reg_class_subset_p (regclass, GENERAL_REGS))
28428 /* Floats can load 0 and 1 plus some others. Note that we eliminated
28429 zero above. We only want to wind up preferring 80387 registers if
28430 we plan on doing computation with them. */
28432 && standard_80387_constant_p (x))
28434 /* Limit class to non-sse. */
28435 if (regclass == FLOAT_SSE_REGS)
28437 if (regclass == FP_TOP_SSE_REGS)
28439 if (regclass == FP_SECOND_SSE_REGS)
28440 return FP_SECOND_REG;
28441 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
28448 /* Generally when we see PLUS here, it's the function invariant
28449 (plus soft-fp const_int). Which can only be computed into general
28451 if (GET_CODE (x) == PLUS)
28452 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
28454 /* QImode constants are easy to load, but non-constant QImode data
28455 must go into Q_REGS. */
28456 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
28458 if (reg_class_subset_p (regclass, Q_REGS))
28460 if (reg_class_subset_p (Q_REGS, regclass))
28468 /* Discourage putting floating-point values in SSE registers unless
28469 SSE math is being used, and likewise for the 387 registers. */
28471 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
28473 enum machine_mode mode = GET_MODE (x);
28475 /* Restrict the output reload class to the register bank that we are doing
28476 math on. If we would like not to return a subset of CLASS, reject this
28477 alternative: if reload cannot do this, it will still use its choice. */
28478 mode = GET_MODE (x);
28479 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
28480 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
28482 if (X87_FLOAT_MODE_P (mode))
28484 if (regclass == FP_TOP_SSE_REGS)
28486 else if (regclass == FP_SECOND_SSE_REGS)
28487 return FP_SECOND_REG;
28489 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
28496 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
28497 enum machine_mode mode,
28498 secondary_reload_info *sri ATTRIBUTE_UNUSED)
28500 /* QImode spills from non-QI registers require
28501 intermediate register on 32bit targets. */
28503 && !in_p && mode == QImode
28504 && (rclass == GENERAL_REGS
28505 || rclass == LEGACY_REGS
28506 || rclass == INDEX_REGS))
28515 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28516 regno = true_regnum (x);
28518 /* Return Q_REGS if the operand is in memory. */
28523 /* This condition handles corner case where an expression involving
28524 pointers gets vectorized. We're trying to use the address of a
28525 stack slot as a vector initializer.
28527 (set (reg:V2DI 74 [ vect_cst_.2 ])
28528 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
28530 Eventually frame gets turned into sp+offset like this:
28532 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28533 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28534 (const_int 392 [0x188]))))
28536 That later gets turned into:
28538 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28539 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28540 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
28542 We'll have the following reload recorded:
28544 Reload 0: reload_in (DI) =
28545 (plus:DI (reg/f:DI 7 sp)
28546 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
28547 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28548 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
28549 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
28550 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28551 reload_reg_rtx: (reg:V2DI 22 xmm1)
28553 Which isn't going to work since SSE instructions can't handle scalar
28554 additions. Returning GENERAL_REGS forces the addition into integer
28555 register and reload can handle subsequent reloads without problems. */
28557 if (in_p && GET_CODE (x) == PLUS
28558 && SSE_CLASS_P (rclass)
28559 && SCALAR_INT_MODE_P (mode))
28560 return GENERAL_REGS;
28565 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28568 ix86_class_likely_spilled_p (reg_class_t rclass)
28579 case SSE_FIRST_REG:
28581 case FP_SECOND_REG:
28591 /* If we are copying between general and FP registers, we need a memory
28592 location. The same is true for SSE and MMX registers.
28594 To optimize register_move_cost performance, allow inline variant.
28596 The macro can't work reliably when one of the CLASSES is class containing
28597 registers from multiple units (SSE, MMX, integer). We avoid this by never
28598 combining those units in single alternative in the machine description.
28599 Ensure that this constraint holds to avoid unexpected surprises.
28601 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28602 enforce these sanity checks. */
28605 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28606 enum machine_mode mode, int strict)
28608 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
28609 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
28610 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
28611 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
28612 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
28613 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
28615 gcc_assert (!strict);
28619 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
28622 /* ??? This is a lie. We do have moves between mmx/general, and for
28623 mmx/sse2. But by saying we need secondary memory we discourage the
28624 register allocator from using the mmx registers unless needed. */
28625 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
28628 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28630 /* SSE1 doesn't have any direct moves from other classes. */
28634 /* If the target says that inter-unit moves are more expensive
28635 than moving through memory, then don't generate them. */
28636 if (!TARGET_INTER_UNIT_MOVES)
28639 /* Between SSE and general, we have moves no larger than word size. */
28640 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
28648 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28649 enum machine_mode mode, int strict)
28651 return inline_secondary_memory_needed (class1, class2, mode, strict);
28654 /* Return true if the registers in CLASS cannot represent the change from
28655 modes FROM to TO. */
28658 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
28659 enum reg_class regclass)
28664 /* x87 registers can't do subreg at all, as all values are reformatted
28665 to extended precision. */
28666 if (MAYBE_FLOAT_CLASS_P (regclass))
28669 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
28671 /* Vector registers do not support QI or HImode loads. If we don't
28672 disallow a change to these modes, reload will assume it's ok to
28673 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28674 the vec_dupv4hi pattern. */
28675 if (GET_MODE_SIZE (from) < 4)
28678 /* Vector registers do not support subreg with nonzero offsets, which
28679 are otherwise valid for integer registers. Since we can't see
28680 whether we have a nonzero offset from here, prohibit all
28681 nonparadoxical subregs changing size. */
28682 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
28689 /* Return the cost of moving data of mode M between a
28690 register and memory. A value of 2 is the default; this cost is
28691 relative to those in `REGISTER_MOVE_COST'.
28693 This function is used extensively by register_move_cost that is used to
28694 build tables at startup. Make it inline in this case.
28695 When IN is 2, return maximum of in and out move cost.
28697 If moving between registers and memory is more expensive than
28698 between two registers, you should define this macro to express the
28701 Model also increased moving costs of QImode registers in non
28705 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28709 if (FLOAT_CLASS_P (regclass))
28727 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28728 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28730 if (SSE_CLASS_P (regclass))
28733 switch (GET_MODE_SIZE (mode))
28748 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28749 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28751 if (MMX_CLASS_P (regclass))
28754 switch (GET_MODE_SIZE (mode))
28766 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28767 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28769 switch (GET_MODE_SIZE (mode))
28772 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28775 return ix86_cost->int_store[0];
28776 if (TARGET_PARTIAL_REG_DEPENDENCY
28777 && optimize_function_for_speed_p (cfun))
28778 cost = ix86_cost->movzbl_load;
28780 cost = ix86_cost->int_load[0];
28782 return MAX (cost, ix86_cost->int_store[0]);
28788 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28790 return ix86_cost->movzbl_load;
28792 return ix86_cost->int_store[0] + 4;
28797 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28798 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28800 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28801 if (mode == TFmode)
28804 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28806 cost = ix86_cost->int_load[2];
28808 cost = ix86_cost->int_store[2];
28809 return (cost * (((int) GET_MODE_SIZE (mode)
28810 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28815 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28818 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28822 /* Return the cost of moving data from a register in class CLASS1 to
28823 one in class CLASS2.
28825 It is not required that the cost always equal 2 when FROM is the same as TO;
28826 on some machines it is expensive to move between registers if they are not
28827 general registers. */
28830 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28831 reg_class_t class2_i)
28833 enum reg_class class1 = (enum reg_class) class1_i;
28834 enum reg_class class2 = (enum reg_class) class2_i;
28836 /* In case we require secondary memory, compute cost of the store followed
28837 by load. In order to avoid bad register allocation choices, we need
28838 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28840 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28844 cost += inline_memory_move_cost (mode, class1, 2);
28845 cost += inline_memory_move_cost (mode, class2, 2);
28847 /* In case of copying from general_purpose_register we may emit multiple
28848 stores followed by single load causing memory size mismatch stall.
28849 Count this as arbitrarily high cost of 20. */
28850 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
28853 /* In the case of FP/MMX moves, the registers actually overlap, and we
28854 have to switch modes in order to treat them differently. */
28855 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28856 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28862 /* Moves between SSE/MMX and integer unit are expensive. */
28863 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28864 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28866 /* ??? By keeping returned value relatively high, we limit the number
28867 of moves between integer and MMX/SSE registers for all targets.
28868 Additionally, high value prevents problem with x86_modes_tieable_p(),
28869 where integer modes in MMX/SSE registers are not tieable
28870 because of missing QImode and HImode moves to, from or between
28871 MMX/SSE registers. */
28872 return MAX (8, ix86_cost->mmxsse_to_integer);
28874 if (MAYBE_FLOAT_CLASS_P (class1))
28875 return ix86_cost->fp_move;
28876 if (MAYBE_SSE_CLASS_P (class1))
28877 return ix86_cost->sse_move;
28878 if (MAYBE_MMX_CLASS_P (class1))
28879 return ix86_cost->mmx_move;
28883 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
28886 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28888 /* Flags and only flags can only hold CCmode values. */
28889 if (CC_REGNO_P (regno))
28890 return GET_MODE_CLASS (mode) == MODE_CC;
28891 if (GET_MODE_CLASS (mode) == MODE_CC
28892 || GET_MODE_CLASS (mode) == MODE_RANDOM
28893 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28895 if (FP_REGNO_P (regno))
28896 return VALID_FP_MODE_P (mode);
28897 if (SSE_REGNO_P (regno))
28899 /* We implement the move patterns for all vector modes into and
28900 out of SSE registers, even when no operation instructions
28901 are available. OImode move is available only when AVX is
28903 return ((TARGET_AVX && mode == OImode)
28904 || VALID_AVX256_REG_MODE (mode)
28905 || VALID_SSE_REG_MODE (mode)
28906 || VALID_SSE2_REG_MODE (mode)
28907 || VALID_MMX_REG_MODE (mode)
28908 || VALID_MMX_REG_MODE_3DNOW (mode));
28910 if (MMX_REGNO_P (regno))
28912 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28913 so if the register is available at all, then we can move data of
28914 the given mode into or out of it. */
28915 return (VALID_MMX_REG_MODE (mode)
28916 || VALID_MMX_REG_MODE_3DNOW (mode));
28919 if (mode == QImode)
28921 /* Take care for QImode values - they can be in non-QI regs,
28922 but then they do cause partial register stalls. */
28923 if (regno <= BX_REG || TARGET_64BIT)
28925 if (!TARGET_PARTIAL_REG_STALL)
28927 return reload_in_progress || reload_completed;
28929 /* We handle both integer and floats in the general purpose registers. */
28930 else if (VALID_INT_MODE_P (mode))
28932 else if (VALID_FP_MODE_P (mode))
28934 else if (VALID_DFP_MODE_P (mode))
28936 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
28937 on to use that value in smaller contexts, this can easily force a
28938 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
28939 supporting DImode, allow it. */
28940 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
28946 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
28947 tieable integer mode. */
28950 ix86_tieable_integer_mode_p (enum machine_mode mode)
28959 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
28962 return TARGET_64BIT;
28969 /* Return true if MODE1 is accessible in a register that can hold MODE2
28970 without copying. That is, all register classes that can hold MODE2
28971 can also hold MODE1. */
28974 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
28976 if (mode1 == mode2)
28979 if (ix86_tieable_integer_mode_p (mode1)
28980 && ix86_tieable_integer_mode_p (mode2))
28983 /* MODE2 being XFmode implies fp stack or general regs, which means we
28984 can tie any smaller floating point modes to it. Note that we do not
28985 tie this with TFmode. */
28986 if (mode2 == XFmode)
28987 return mode1 == SFmode || mode1 == DFmode;
28989 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
28990 that we can tie it with SFmode. */
28991 if (mode2 == DFmode)
28992 return mode1 == SFmode;
28994 /* If MODE2 is only appropriate for an SSE register, then tie with
28995 any other mode acceptable to SSE registers. */
28996 if (GET_MODE_SIZE (mode2) == 16
28997 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
28998 return (GET_MODE_SIZE (mode1) == 16
28999 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
29001 /* If MODE2 is appropriate for an MMX register, then tie
29002 with any other mode acceptable to MMX registers. */
29003 if (GET_MODE_SIZE (mode2) == 8
29004 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
29005 return (GET_MODE_SIZE (mode1) == 8
29006 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
29011 /* Compute a (partial) cost for rtx X. Return true if the complete
29012 cost has been computed, and false if subexpressions should be
29013 scanned. In either case, *TOTAL contains the cost result. */
29016 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
29018 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
29019 enum machine_mode mode = GET_MODE (x);
29020 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
29028 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
29030 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
29032 else if (flag_pic && SYMBOLIC_CONST (x)
29034 || (!GET_CODE (x) != LABEL_REF
29035 && (GET_CODE (x) != SYMBOL_REF
29036 || !SYMBOL_REF_LOCAL_P (x)))))
29043 if (mode == VOIDmode)
29046 switch (standard_80387_constant_p (x))
29051 default: /* Other constants */
29056 /* Start with (MEM (SYMBOL_REF)), since that's where
29057 it'll probably end up. Add a penalty for size. */
29058 *total = (COSTS_N_INSNS (1)
29059 + (flag_pic != 0 && !TARGET_64BIT)
29060 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
29066 /* The zero extensions is often completely free on x86_64, so make
29067 it as cheap as possible. */
29068 if (TARGET_64BIT && mode == DImode
29069 && GET_MODE (XEXP (x, 0)) == SImode)
29071 else if (TARGET_ZERO_EXTEND_WITH_AND)
29072 *total = cost->add;
29074 *total = cost->movzx;
29078 *total = cost->movsx;
29082 if (CONST_INT_P (XEXP (x, 1))
29083 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
29085 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
29088 *total = cost->add;
29091 if ((value == 2 || value == 3)
29092 && cost->lea <= cost->shift_const)
29094 *total = cost->lea;
29104 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
29106 if (CONST_INT_P (XEXP (x, 1)))
29108 if (INTVAL (XEXP (x, 1)) > 32)
29109 *total = cost->shift_const + COSTS_N_INSNS (2);
29111 *total = cost->shift_const * 2;
29115 if (GET_CODE (XEXP (x, 1)) == AND)
29116 *total = cost->shift_var * 2;
29118 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
29123 if (CONST_INT_P (XEXP (x, 1)))
29124 *total = cost->shift_const;
29126 *total = cost->shift_var;
29134 gcc_assert (FLOAT_MODE_P (mode));
29135 gcc_assert (TARGET_FMA || TARGET_FMA4);
29137 /* ??? SSE scalar/vector cost should be used here. */
29138 /* ??? Bald assumption that fma has the same cost as fmul. */
29139 *total = cost->fmul;
29140 *total += rtx_cost (XEXP (x, 1), FMA, speed);
29142 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
29144 if (GET_CODE (sub) == NEG)
29146 *total += rtx_cost (sub, FMA, speed);
29149 if (GET_CODE (sub) == NEG)
29151 *total += rtx_cost (sub, FMA, speed);
29156 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29158 /* ??? SSE scalar cost should be used here. */
29159 *total = cost->fmul;
29162 else if (X87_FLOAT_MODE_P (mode))
29164 *total = cost->fmul;
29167 else if (FLOAT_MODE_P (mode))
29169 /* ??? SSE vector cost should be used here. */
29170 *total = cost->fmul;
29175 rtx op0 = XEXP (x, 0);
29176 rtx op1 = XEXP (x, 1);
29178 if (CONST_INT_P (XEXP (x, 1)))
29180 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
29181 for (nbits = 0; value != 0; value &= value - 1)
29185 /* This is arbitrary. */
29188 /* Compute costs correctly for widening multiplication. */
29189 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
29190 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
29191 == GET_MODE_SIZE (mode))
29193 int is_mulwiden = 0;
29194 enum machine_mode inner_mode = GET_MODE (op0);
29196 if (GET_CODE (op0) == GET_CODE (op1))
29197 is_mulwiden = 1, op1 = XEXP (op1, 0);
29198 else if (CONST_INT_P (op1))
29200 if (GET_CODE (op0) == SIGN_EXTEND)
29201 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
29204 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
29208 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
29211 *total = (cost->mult_init[MODE_INDEX (mode)]
29212 + nbits * cost->mult_bit
29213 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
29222 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29223 /* ??? SSE cost should be used here. */
29224 *total = cost->fdiv;
29225 else if (X87_FLOAT_MODE_P (mode))
29226 *total = cost->fdiv;
29227 else if (FLOAT_MODE_P (mode))
29228 /* ??? SSE vector cost should be used here. */
29229 *total = cost->fdiv;
29231 *total = cost->divide[MODE_INDEX (mode)];
29235 if (GET_MODE_CLASS (mode) == MODE_INT
29236 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
29238 if (GET_CODE (XEXP (x, 0)) == PLUS
29239 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
29240 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
29241 && CONSTANT_P (XEXP (x, 1)))
29243 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
29244 if (val == 2 || val == 4 || val == 8)
29246 *total = cost->lea;
29247 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
29248 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
29249 outer_code, speed);
29250 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29254 else if (GET_CODE (XEXP (x, 0)) == MULT
29255 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
29257 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
29258 if (val == 2 || val == 4 || val == 8)
29260 *total = cost->lea;
29261 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
29262 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29266 else if (GET_CODE (XEXP (x, 0)) == PLUS)
29268 *total = cost->lea;
29269 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
29270 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
29271 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29278 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29280 /* ??? SSE cost should be used here. */
29281 *total = cost->fadd;
29284 else if (X87_FLOAT_MODE_P (mode))
29286 *total = cost->fadd;
29289 else if (FLOAT_MODE_P (mode))
29291 /* ??? SSE vector cost should be used here. */
29292 *total = cost->fadd;
29300 if (!TARGET_64BIT && mode == DImode)
29302 *total = (cost->add * 2
29303 + (rtx_cost (XEXP (x, 0), outer_code, speed)
29304 << (GET_MODE (XEXP (x, 0)) != DImode))
29305 + (rtx_cost (XEXP (x, 1), outer_code, speed)
29306 << (GET_MODE (XEXP (x, 1)) != DImode)));
29312 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29314 /* ??? SSE cost should be used here. */
29315 *total = cost->fchs;
29318 else if (X87_FLOAT_MODE_P (mode))
29320 *total = cost->fchs;
29323 else if (FLOAT_MODE_P (mode))
29325 /* ??? SSE vector cost should be used here. */
29326 *total = cost->fchs;
29332 if (!TARGET_64BIT && mode == DImode)
29333 *total = cost->add * 2;
29335 *total = cost->add;
29339 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
29340 && XEXP (XEXP (x, 0), 1) == const1_rtx
29341 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
29342 && XEXP (x, 1) == const0_rtx)
29344 /* This kind of construct is implemented using test[bwl].
29345 Treat it as if we had an AND. */
29346 *total = (cost->add
29347 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
29348 + rtx_cost (const1_rtx, outer_code, speed));
29354 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
29359 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29360 /* ??? SSE cost should be used here. */
29361 *total = cost->fabs;
29362 else if (X87_FLOAT_MODE_P (mode))
29363 *total = cost->fabs;
29364 else if (FLOAT_MODE_P (mode))
29365 /* ??? SSE vector cost should be used here. */
29366 *total = cost->fabs;
29370 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29371 /* ??? SSE cost should be used here. */
29372 *total = cost->fsqrt;
29373 else if (X87_FLOAT_MODE_P (mode))
29374 *total = cost->fsqrt;
29375 else if (FLOAT_MODE_P (mode))
29376 /* ??? SSE vector cost should be used here. */
29377 *total = cost->fsqrt;
29381 if (XINT (x, 1) == UNSPEC_TP)
29388 case VEC_DUPLICATE:
29389 /* ??? Assume all of these vector manipulation patterns are
29390 recognizable. In which case they all pretty much have the
29392 *total = COSTS_N_INSNS (1);
29402 static int current_machopic_label_num;
29404 /* Given a symbol name and its associated stub, write out the
29405 definition of the stub. */
29408 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29410 unsigned int length;
29411 char *binder_name, *symbol_name, lazy_ptr_name[32];
29412 int label = ++current_machopic_label_num;
29414 /* For 64-bit we shouldn't get here. */
29415 gcc_assert (!TARGET_64BIT);
29417 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29418 symb = targetm.strip_name_encoding (symb);
29420 length = strlen (stub);
29421 binder_name = XALLOCAVEC (char, length + 32);
29422 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
29424 length = strlen (symb);
29425 symbol_name = XALLOCAVEC (char, length + 32);
29426 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29428 sprintf (lazy_ptr_name, "L%d$lz", label);
29430 if (MACHOPIC_ATT_STUB)
29431 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
29432 else if (MACHOPIC_PURE)
29434 if (TARGET_DEEP_BRANCH_PREDICTION)
29435 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
29437 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
29440 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
29442 fprintf (file, "%s:\n", stub);
29443 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29445 if (MACHOPIC_ATT_STUB)
29447 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
29449 else if (MACHOPIC_PURE)
29452 if (TARGET_DEEP_BRANCH_PREDICTION)
29454 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29455 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
29456 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
29457 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
29461 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
29462 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
29463 fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
29465 fprintf (file, "\tjmp\t*%%ecx\n");
29468 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
29470 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
29471 it needs no stub-binding-helper. */
29472 if (MACHOPIC_ATT_STUB)
29475 fprintf (file, "%s:\n", binder_name);
29479 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
29480 fprintf (file, "\tpushl\t%%ecx\n");
29483 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
29485 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
29487 /* N.B. Keep the correspondence of these
29488 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29489 old-pic/new-pic/non-pic stubs; altering this will break
29490 compatibility with existing dylibs. */
29494 if (TARGET_DEEP_BRANCH_PREDICTION)
29495 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29496 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
29498 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
29499 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29502 /* 16-byte -mdynamic-no-pic stub. */
29503 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
29505 fprintf (file, "%s:\n", lazy_ptr_name);
29506 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29507 fprintf (file, ASM_LONG "%s\n", binder_name);
29509 #endif /* TARGET_MACHO */
29511 /* Order the registers for register allocator. */
29514 x86_order_regs_for_local_alloc (void)
29519 /* First allocate the local general purpose registers. */
29520 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29521 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29522 reg_alloc_order [pos++] = i;
29524 /* Global general purpose registers. */
29525 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29526 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29527 reg_alloc_order [pos++] = i;
29529 /* x87 registers come first in case we are doing FP math
29531 if (!TARGET_SSE_MATH)
29532 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29533 reg_alloc_order [pos++] = i;
29535 /* SSE registers. */
29536 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29537 reg_alloc_order [pos++] = i;
29538 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29539 reg_alloc_order [pos++] = i;
29541 /* x87 registers. */
29542 if (TARGET_SSE_MATH)
29543 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29544 reg_alloc_order [pos++] = i;
29546 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29547 reg_alloc_order [pos++] = i;
29549 /* Initialize the rest of array as we do not allocate some registers
29551 while (pos < FIRST_PSEUDO_REGISTER)
29552 reg_alloc_order [pos++] = 0;
29555 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29556 in struct attribute_spec handler. */
29558 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
29560 int flags ATTRIBUTE_UNUSED,
29561 bool *no_add_attrs)
29563 if (TREE_CODE (*node) != FUNCTION_TYPE
29564 && TREE_CODE (*node) != METHOD_TYPE
29565 && TREE_CODE (*node) != FIELD_DECL
29566 && TREE_CODE (*node) != TYPE_DECL)
29568 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29570 *no_add_attrs = true;
29575 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
29577 *no_add_attrs = true;
29580 if (is_attribute_p ("callee_pop_aggregate_return", name))
29584 cst = TREE_VALUE (args);
29585 if (TREE_CODE (cst) != INTEGER_CST)
29587 warning (OPT_Wattributes,
29588 "%qE attribute requires an integer constant argument",
29590 *no_add_attrs = true;
29592 else if (compare_tree_int (cst, 0) != 0
29593 && compare_tree_int (cst, 1) != 0)
29595 warning (OPT_Wattributes,
29596 "argument to %qE attribute is neither zero, nor one",
29598 *no_add_attrs = true;
29607 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29608 struct attribute_spec.handler. */
29610 ix86_handle_abi_attribute (tree *node, tree name,
29611 tree args ATTRIBUTE_UNUSED,
29612 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29614 if (TREE_CODE (*node) != FUNCTION_TYPE
29615 && TREE_CODE (*node) != METHOD_TYPE
29616 && TREE_CODE (*node) != FIELD_DECL
29617 && TREE_CODE (*node) != TYPE_DECL)
29619 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29621 *no_add_attrs = true;
29626 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
29628 *no_add_attrs = true;
29632 /* Can combine regparm with all attributes but fastcall. */
29633 if (is_attribute_p ("ms_abi", name))
29635 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
29637 error ("ms_abi and sysv_abi attributes are not compatible");
29642 else if (is_attribute_p ("sysv_abi", name))
29644 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
29646 error ("ms_abi and sysv_abi attributes are not compatible");
29655 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29656 struct attribute_spec.handler. */
29658 ix86_handle_struct_attribute (tree *node, tree name,
29659 tree args ATTRIBUTE_UNUSED,
29660 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29663 if (DECL_P (*node))
29665 if (TREE_CODE (*node) == TYPE_DECL)
29666 type = &TREE_TYPE (*node);
29671 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29672 || TREE_CODE (*type) == UNION_TYPE)))
29674 warning (OPT_Wattributes, "%qE attribute ignored",
29676 *no_add_attrs = true;
29679 else if ((is_attribute_p ("ms_struct", name)
29680 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29681 || ((is_attribute_p ("gcc_struct", name)
29682 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29684 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29686 *no_add_attrs = true;
29693 ix86_handle_fndecl_attribute (tree *node, tree name,
29694 tree args ATTRIBUTE_UNUSED,
29695 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29697 if (TREE_CODE (*node) != FUNCTION_DECL)
29699 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29701 *no_add_attrs = true;
29707 ix86_ms_bitfield_layout_p (const_tree record_type)
29709 return ((TARGET_MS_BITFIELD_LAYOUT
29710 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29711 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
29714 /* Returns an expression indicating where the this parameter is
29715 located on entry to the FUNCTION. */
29718 x86_this_parameter (tree function)
29720 tree type = TREE_TYPE (function);
29721 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
29726 const int *parm_regs;
29728 if (ix86_function_type_abi (type) == MS_ABI)
29729 parm_regs = x86_64_ms_abi_int_parameter_registers;
29731 parm_regs = x86_64_int_parameter_registers;
29732 return gen_rtx_REG (DImode, parm_regs[aggr]);
29735 nregs = ix86_function_regparm (type, function);
29737 if (nregs > 0 && !stdarg_p (type))
29741 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
29742 regno = aggr ? DX_REG : CX_REG;
29743 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
29747 return gen_rtx_MEM (SImode,
29748 plus_constant (stack_pointer_rtx, 4));
29757 return gen_rtx_MEM (SImode,
29758 plus_constant (stack_pointer_rtx, 4));
29761 return gen_rtx_REG (SImode, regno);
29764 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29767 /* Determine whether x86_output_mi_thunk can succeed. */
29770 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29771 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29772 HOST_WIDE_INT vcall_offset, const_tree function)
29774 /* 64-bit can handle anything. */
29778 /* For 32-bit, everything's fine if we have one free register. */
29779 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29782 /* Need a free register for vcall_offset. */
29786 /* Need a free register for GOT references. */
29787 if (flag_pic && !targetm.binds_local_p (function))
29790 /* Otherwise ok. */
29794 /* Output the assembler code for a thunk function. THUNK_DECL is the
29795 declaration for the thunk function itself, FUNCTION is the decl for
29796 the target function. DELTA is an immediate constant offset to be
29797 added to THIS. If VCALL_OFFSET is nonzero, the word at
29798 *(*this + vcall_offset) should be added to THIS. */
29801 x86_output_mi_thunk (FILE *file,
29802 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29803 HOST_WIDE_INT vcall_offset, tree function)
29806 rtx this_param = x86_this_parameter (function);
29809 /* Make sure unwind info is emitted for the thunk if needed. */
29810 final_start_function (emit_barrier (), file, 1);
29812 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29813 pull it in now and let DELTA benefit. */
29814 if (REG_P (this_param))
29815 this_reg = this_param;
29816 else if (vcall_offset)
29818 /* Put the this parameter into %eax. */
29819 xops[0] = this_param;
29820 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
29821 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29824 this_reg = NULL_RTX;
29826 /* Adjust the this parameter by a fixed constant. */
29829 xops[0] = GEN_INT (delta);
29830 xops[1] = this_reg ? this_reg : this_param;
29833 if (!x86_64_general_operand (xops[0], DImode))
29835 tmp = gen_rtx_REG (DImode, R10_REG);
29837 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
29839 xops[1] = this_param;
29841 if (x86_maybe_negate_const_int (&xops[0], DImode))
29842 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
29844 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
29846 else if (x86_maybe_negate_const_int (&xops[0], SImode))
29847 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
29849 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
29852 /* Adjust the this parameter by a value stored in the vtable. */
29856 tmp = gen_rtx_REG (DImode, R10_REG);
29859 int tmp_regno = CX_REG;
29860 if (lookup_attribute ("fastcall",
29861 TYPE_ATTRIBUTES (TREE_TYPE (function)))
29862 || lookup_attribute ("thiscall",
29863 TYPE_ATTRIBUTES (TREE_TYPE (function))))
29864 tmp_regno = AX_REG;
29865 tmp = gen_rtx_REG (SImode, tmp_regno);
29868 xops[0] = gen_rtx_MEM (Pmode, this_reg);
29870 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29872 /* Adjust the this parameter. */
29873 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
29874 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
29876 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
29877 xops[0] = GEN_INT (vcall_offset);
29879 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
29880 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
29882 xops[1] = this_reg;
29883 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
29886 /* If necessary, drop THIS back to its stack slot. */
29887 if (this_reg && this_reg != this_param)
29889 xops[0] = this_reg;
29890 xops[1] = this_param;
29891 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29894 xops[0] = XEXP (DECL_RTL (function), 0);
29897 if (!flag_pic || targetm.binds_local_p (function)
29898 || DEFAULT_ABI == MS_ABI)
29899 output_asm_insn ("jmp\t%P0", xops);
29900 /* All thunks should be in the same object as their target,
29901 and thus binds_local_p should be true. */
29902 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
29903 gcc_unreachable ();
29906 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
29907 tmp = gen_rtx_CONST (Pmode, tmp);
29908 tmp = gen_rtx_MEM (QImode, tmp);
29910 output_asm_insn ("jmp\t%A0", xops);
29915 if (!flag_pic || targetm.binds_local_p (function))
29916 output_asm_insn ("jmp\t%P0", xops);
29921 rtx sym_ref = XEXP (DECL_RTL (function), 0);
29922 if (TARGET_MACHO_BRANCH_ISLANDS)
29923 sym_ref = (gen_rtx_SYMBOL_REF
29925 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
29926 tmp = gen_rtx_MEM (QImode, sym_ref);
29928 output_asm_insn ("jmp\t%0", xops);
29931 #endif /* TARGET_MACHO */
29933 tmp = gen_rtx_REG (SImode, CX_REG);
29934 output_set_got (tmp, NULL_RTX);
29937 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
29938 output_asm_insn ("jmp\t{*}%1", xops);
29941 final_end_function ();
29945 x86_file_start (void)
29947 default_file_start ();
29949 darwin_file_start ();
29951 if (X86_FILE_START_VERSION_DIRECTIVE)
29952 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
29953 if (X86_FILE_START_FLTUSED)
29954 fputs ("\t.global\t__fltused\n", asm_out_file);
29955 if (ix86_asm_dialect == ASM_INTEL)
29956 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
29960 x86_field_alignment (tree field, int computed)
29962 enum machine_mode mode;
29963 tree type = TREE_TYPE (field);
29965 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
29967 mode = TYPE_MODE (strip_array_types (type));
29968 if (mode == DFmode || mode == DCmode
29969 || GET_MODE_CLASS (mode) == MODE_INT
29970 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
29971 return MIN (32, computed);
29975 /* Output assembler code to FILE to increment profiler label # LABELNO
29976 for profiling a function entry. */
29978 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
29980 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
29985 #ifndef NO_PROFILE_COUNTERS
29986 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
29989 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
29990 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
29992 fprintf (file, "\tcall\t%s\n", mcount_name);
29996 #ifndef NO_PROFILE_COUNTERS
29997 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
30000 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
30004 #ifndef NO_PROFILE_COUNTERS
30005 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
30008 fprintf (file, "\tcall\t%s\n", mcount_name);
30012 /* We don't have exact information about the insn sizes, but we may assume
30013 quite safely that we are informed about all 1 byte insns and memory
30014 address sizes. This is enough to eliminate unnecessary padding in
30018 min_insn_size (rtx insn)
30022 if (!INSN_P (insn) || !active_insn_p (insn))
30025 /* Discard alignments we've emit and jump instructions. */
30026 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
30027 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
30029 if (JUMP_TABLE_DATA_P (insn))
30032 /* Important case - calls are always 5 bytes.
30033 It is common to have many calls in the row. */
30035 && symbolic_reference_mentioned_p (PATTERN (insn))
30036 && !SIBLING_CALL_P (insn))
30038 len = get_attr_length (insn);
30042 /* For normal instructions we rely on get_attr_length being exact,
30043 with a few exceptions. */
30044 if (!JUMP_P (insn))
30046 enum attr_type type = get_attr_type (insn);
30051 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
30052 || asm_noperands (PATTERN (insn)) >= 0)
30059 /* Otherwise trust get_attr_length. */
30063 l = get_attr_length_address (insn);
30064 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
30073 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30075 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
30079 ix86_avoid_jump_mispredicts (void)
30081 rtx insn, start = get_insns ();
30082 int nbytes = 0, njumps = 0;
30085 /* Look for all minimal intervals of instructions containing 4 jumps.
30086 The intervals are bounded by START and INSN. NBYTES is the total
30087 size of instructions in the interval including INSN and not including
30088 START. When the NBYTES is smaller than 16 bytes, it is possible
30089 that the end of START and INSN ends up in the same 16byte page.
30091 The smallest offset in the page INSN can start is the case where START
30092 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
30093 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
30095 for (insn = start; insn; insn = NEXT_INSN (insn))
30099 if (LABEL_P (insn))
30101 int align = label_to_alignment (insn);
30102 int max_skip = label_to_max_skip (insn);
30106 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
30107 already in the current 16 byte page, because otherwise
30108 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
30109 bytes to reach 16 byte boundary. */
30111 || (align <= 3 && max_skip != (1 << align) - 1))
30114 fprintf (dump_file, "Label %i with max_skip %i\n",
30115 INSN_UID (insn), max_skip);
30118 while (nbytes + max_skip >= 16)
30120 start = NEXT_INSN (start);
30121 if ((JUMP_P (start)
30122 && GET_CODE (PATTERN (start)) != ADDR_VEC
30123 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
30125 njumps--, isjump = 1;
30128 nbytes -= min_insn_size (start);
30134 min_size = min_insn_size (insn);
30135 nbytes += min_size;
30137 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
30138 INSN_UID (insn), min_size);
30140 && GET_CODE (PATTERN (insn)) != ADDR_VEC
30141 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
30149 start = NEXT_INSN (start);
30150 if ((JUMP_P (start)
30151 && GET_CODE (PATTERN (start)) != ADDR_VEC
30152 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
30154 njumps--, isjump = 1;
30157 nbytes -= min_insn_size (start);
30159 gcc_assert (njumps >= 0);
30161 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
30162 INSN_UID (start), INSN_UID (insn), nbytes);
30164 if (njumps == 3 && isjump && nbytes < 16)
30166 int padsize = 15 - nbytes + min_insn_size (insn);
30169 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
30170 INSN_UID (insn), padsize);
30171 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
30177 /* AMD Athlon works faster
30178 when RET is not destination of conditional jump or directly preceded
30179 by other jump instruction. We avoid the penalty by inserting NOP just
30180 before the RET instructions in such cases. */
30182 ix86_pad_returns (void)
30187 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30189 basic_block bb = e->src;
30190 rtx ret = BB_END (bb);
30192 bool replace = false;
30194 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
30195 || optimize_bb_for_size_p (bb))
30197 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
30198 if (active_insn_p (prev) || LABEL_P (prev))
30200 if (prev && LABEL_P (prev))
30205 FOR_EACH_EDGE (e, ei, bb->preds)
30206 if (EDGE_FREQUENCY (e) && e->src->index >= 0
30207 && !(e->flags & EDGE_FALLTHRU))
30212 prev = prev_active_insn (ret);
30214 && ((JUMP_P (prev) && any_condjump_p (prev))
30217 /* Empty functions get branch mispredict even when
30218 the jump destination is not visible to us. */
30219 if (!prev && !optimize_function_for_size_p (cfun))
30224 emit_jump_insn_before (gen_return_internal_long (), ret);
30230 /* Count the minimum number of instructions in BB. Return 4 if the
30231 number of instructions >= 4. */
30234 ix86_count_insn_bb (basic_block bb)
30237 int insn_count = 0;
30239 /* Count number of instructions in this block. Return 4 if the number
30240 of instructions >= 4. */
30241 FOR_BB_INSNS (bb, insn)
30243 /* Only happen in exit blocks. */
30245 && GET_CODE (PATTERN (insn)) == RETURN)
30248 if (NONDEBUG_INSN_P (insn)
30249 && GET_CODE (PATTERN (insn)) != USE
30250 && GET_CODE (PATTERN (insn)) != CLOBBER)
30253 if (insn_count >= 4)
30262 /* Count the minimum number of instructions in code path in BB.
30263 Return 4 if the number of instructions >= 4. */
30266 ix86_count_insn (basic_block bb)
30270 int min_prev_count;
30272 /* Only bother counting instructions along paths with no
30273 more than 2 basic blocks between entry and exit. Given
30274 that BB has an edge to exit, determine if a predecessor
30275 of BB has an edge from entry. If so, compute the number
30276 of instructions in the predecessor block. If there
30277 happen to be multiple such blocks, compute the minimum. */
30278 min_prev_count = 4;
30279 FOR_EACH_EDGE (e, ei, bb->preds)
30282 edge_iterator prev_ei;
30284 if (e->src == ENTRY_BLOCK_PTR)
30286 min_prev_count = 0;
30289 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
30291 if (prev_e->src == ENTRY_BLOCK_PTR)
30293 int count = ix86_count_insn_bb (e->src);
30294 if (count < min_prev_count)
30295 min_prev_count = count;
30301 if (min_prev_count < 4)
30302 min_prev_count += ix86_count_insn_bb (bb);
30304 return min_prev_count;
30307 /* Pad short funtion to 4 instructions. */
30310 ix86_pad_short_function (void)
30315 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30317 rtx ret = BB_END (e->src);
30318 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
30320 int insn_count = ix86_count_insn (e->src);
30322 /* Pad short function. */
30323 if (insn_count < 4)
30327 /* Find epilogue. */
30330 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
30331 insn = PREV_INSN (insn);
30336 /* Two NOPs count as one instruction. */
30337 insn_count = 2 * (4 - insn_count);
30338 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
30344 /* Implement machine specific optimizations. We implement padding of returns
30345 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
30349 /* We are freeing block_for_insn in the toplev to keep compatibility
30350 with old MDEP_REORGS that are not CFG based. Recompute it now. */
30351 compute_bb_for_insn ();
30353 if (optimize && optimize_function_for_speed_p (cfun))
30355 if (TARGET_PAD_SHORT_FUNCTION)
30356 ix86_pad_short_function ();
30357 else if (TARGET_PAD_RETURNS)
30358 ix86_pad_returns ();
30359 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30360 if (TARGET_FOUR_JUMP_LIMIT)
30361 ix86_avoid_jump_mispredicts ();
30365 /* Run the vzeroupper optimization if needed. */
30366 if (TARGET_VZEROUPPER)
30367 move_or_delete_vzeroupper ();
30370 /* Return nonzero when QImode register that must be represented via REX prefix
30373 x86_extended_QIreg_mentioned_p (rtx insn)
30376 extract_insn_cached (insn);
30377 for (i = 0; i < recog_data.n_operands; i++)
30378 if (REG_P (recog_data.operand[i])
30379 && REGNO (recog_data.operand[i]) > BX_REG)
30384 /* Return nonzero when P points to register encoded via REX prefix.
30385 Called via for_each_rtx. */
30387 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
30389 unsigned int regno;
30392 regno = REGNO (*p);
30393 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
30396 /* Return true when INSN mentions register that must be encoded using REX
30399 x86_extended_reg_mentioned_p (rtx insn)
30401 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
30402 extended_reg_mentioned_1, NULL);
30405 /* If profitable, negate (without causing overflow) integer constant
30406 of mode MODE at location LOC. Return true in this case. */
30408 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
30412 if (!CONST_INT_P (*loc))
30418 /* DImode x86_64 constants must fit in 32 bits. */
30419 gcc_assert (x86_64_immediate_operand (*loc, mode));
30430 gcc_unreachable ();
30433 /* Avoid overflows. */
30434 if (mode_signbit_p (mode, *loc))
30437 val = INTVAL (*loc);
30439 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
30440 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
30441 if ((val < 0 && val != -128)
30444 *loc = GEN_INT (-val);
30451 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
30452 optabs would emit if we didn't have TFmode patterns. */
30455 x86_emit_floatuns (rtx operands[2])
30457 rtx neglab, donelab, i0, i1, f0, in, out;
30458 enum machine_mode mode, inmode;
30460 inmode = GET_MODE (operands[1]);
30461 gcc_assert (inmode == SImode || inmode == DImode);
30464 in = force_reg (inmode, operands[1]);
30465 mode = GET_MODE (out);
30466 neglab = gen_label_rtx ();
30467 donelab = gen_label_rtx ();
30468 f0 = gen_reg_rtx (mode);
30470 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
30472 expand_float (out, in, 0);
30474 emit_jump_insn (gen_jump (donelab));
30477 emit_label (neglab);
30479 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
30481 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
30483 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
30485 expand_float (f0, i0, 0);
30487 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
30489 emit_label (donelab);
30492 /* AVX does not support 32-byte integer vector operations,
30493 thus the longest vector we are faced with is V16QImode. */
30494 #define MAX_VECT_LEN 16
30496 struct expand_vec_perm_d
30498 rtx target, op0, op1;
30499 unsigned char perm[MAX_VECT_LEN];
30500 enum machine_mode vmode;
30501 unsigned char nelt;
30505 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
30506 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
30508 /* Get a vector mode of the same size as the original but with elements
30509 twice as wide. This is only guaranteed to apply to integral vectors. */
30511 static inline enum machine_mode
30512 get_mode_wider_vector (enum machine_mode o)
30514 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30515 enum machine_mode n = GET_MODE_WIDER_MODE (o);
30516 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
30517 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
30521 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30522 with all elements equal to VAR. Return true if successful. */
30525 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
30526 rtx target, rtx val)
30549 /* First attempt to recognize VAL as-is. */
30550 dup = gen_rtx_VEC_DUPLICATE (mode, val);
30551 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30552 if (recog_memoized (insn) < 0)
30555 /* If that fails, force VAL into a register. */
30558 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30559 seq = get_insns ();
30562 emit_insn_before (seq, insn);
30564 ok = recog_memoized (insn) >= 0;
30573 if (TARGET_SSE || TARGET_3DNOW_A)
30577 val = gen_lowpart (SImode, val);
30578 x = gen_rtx_TRUNCATE (HImode, val);
30579 x = gen_rtx_VEC_DUPLICATE (mode, x);
30580 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30593 struct expand_vec_perm_d dperm;
30597 memset (&dperm, 0, sizeof (dperm));
30598 dperm.target = target;
30599 dperm.vmode = mode;
30600 dperm.nelt = GET_MODE_NUNITS (mode);
30601 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
30603 /* Extend to SImode using a paradoxical SUBREG. */
30604 tmp1 = gen_reg_rtx (SImode);
30605 emit_move_insn (tmp1, gen_lowpart (SImode, val));
30607 /* Insert the SImode value as low element of a V4SImode vector. */
30608 tmp2 = gen_lowpart (V4SImode, dperm.op0);
30609 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
30611 ok = (expand_vec_perm_1 (&dperm)
30612 || expand_vec_perm_broadcast_1 (&dperm));
30624 /* Replicate the value once into the next wider mode and recurse. */
30626 enum machine_mode smode, wsmode, wvmode;
30629 smode = GET_MODE_INNER (mode);
30630 wvmode = get_mode_wider_vector (mode);
30631 wsmode = GET_MODE_INNER (wvmode);
30633 val = convert_modes (wsmode, smode, val, true);
30634 x = expand_simple_binop (wsmode, ASHIFT, val,
30635 GEN_INT (GET_MODE_BITSIZE (smode)),
30636 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30637 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
30639 x = gen_lowpart (wvmode, target);
30640 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
30648 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
30649 rtx x = gen_reg_rtx (hvmode);
30651 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
30654 x = gen_rtx_VEC_CONCAT (mode, x, x);
30655 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30664 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30665 whose ONE_VAR element is VAR, and other elements are zero. Return true
30669 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
30670 rtx target, rtx var, int one_var)
30672 enum machine_mode vsimode;
30675 bool use_vector_set = false;
30680 /* For SSE4.1, we normally use vector set. But if the second
30681 element is zero and inter-unit moves are OK, we use movq
30683 use_vector_set = (TARGET_64BIT
30685 && !(TARGET_INTER_UNIT_MOVES
30691 use_vector_set = TARGET_SSE4_1;
30694 use_vector_set = TARGET_SSE2;
30697 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
30704 use_vector_set = TARGET_AVX;
30707 /* Use ix86_expand_vector_set in 64bit mode only. */
30708 use_vector_set = TARGET_AVX && TARGET_64BIT;
30714 if (use_vector_set)
30716 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
30717 var = force_reg (GET_MODE_INNER (mode), var);
30718 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30734 var = force_reg (GET_MODE_INNER (mode), var);
30735 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
30736 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30741 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
30742 new_target = gen_reg_rtx (mode);
30744 new_target = target;
30745 var = force_reg (GET_MODE_INNER (mode), var);
30746 x = gen_rtx_VEC_DUPLICATE (mode, var);
30747 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
30748 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
30751 /* We need to shuffle the value to the correct position, so
30752 create a new pseudo to store the intermediate result. */
30754 /* With SSE2, we can use the integer shuffle insns. */
30755 if (mode != V4SFmode && TARGET_SSE2)
30757 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
30759 GEN_INT (one_var == 1 ? 0 : 1),
30760 GEN_INT (one_var == 2 ? 0 : 1),
30761 GEN_INT (one_var == 3 ? 0 : 1)));
30762 if (target != new_target)
30763 emit_move_insn (target, new_target);
30767 /* Otherwise convert the intermediate result to V4SFmode and
30768 use the SSE1 shuffle instructions. */
30769 if (mode != V4SFmode)
30771 tmp = gen_reg_rtx (V4SFmode);
30772 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30777 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30779 GEN_INT (one_var == 1 ? 0 : 1),
30780 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30781 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30783 if (mode != V4SFmode)
30784 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30785 else if (tmp != target)
30786 emit_move_insn (target, tmp);
30788 else if (target != new_target)
30789 emit_move_insn (target, new_target);
30794 vsimode = V4SImode;
30800 vsimode = V2SImode;
30806 /* Zero extend the variable element to SImode and recurse. */
30807 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30809 x = gen_reg_rtx (vsimode);
30810 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30812 gcc_unreachable ();
30814 emit_move_insn (target, gen_lowpart (mode, x));
30822 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30823 consisting of the values in VALS. It is known that all elements
30824 except ONE_VAR are constants. Return true if successful. */
30827 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30828 rtx target, rtx vals, int one_var)
30830 rtx var = XVECEXP (vals, 0, one_var);
30831 enum machine_mode wmode;
30834 const_vec = copy_rtx (vals);
30835 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30836 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30844 /* For the two element vectors, it's just as easy to use
30845 the general case. */
30849 /* Use ix86_expand_vector_set in 64bit mode only. */
30872 /* There's no way to set one QImode entry easily. Combine
30873 the variable value with its adjacent constant value, and
30874 promote to an HImode set. */
30875 x = XVECEXP (vals, 0, one_var ^ 1);
30878 var = convert_modes (HImode, QImode, var, true);
30879 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30880 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30881 x = GEN_INT (INTVAL (x) & 0xff);
30885 var = convert_modes (HImode, QImode, var, true);
30886 x = gen_int_mode (INTVAL (x) << 8, HImode);
30888 if (x != const0_rtx)
30889 var = expand_simple_binop (HImode, IOR, var, x, var,
30890 1, OPTAB_LIB_WIDEN);
30892 x = gen_reg_rtx (wmode);
30893 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30894 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30896 emit_move_insn (target, gen_lowpart (mode, x));
30903 emit_move_insn (target, const_vec);
30904 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30908 /* A subroutine of ix86_expand_vector_init_general. Use vector
30909 concatenate to handle the most general case: all values variable,
30910 and none identical. */
30913 ix86_expand_vector_init_concat (enum machine_mode mode,
30914 rtx target, rtx *ops, int n)
30916 enum machine_mode cmode, hmode = VOIDmode;
30917 rtx first[8], second[4];
30957 gcc_unreachable ();
30960 if (!register_operand (ops[1], cmode))
30961 ops[1] = force_reg (cmode, ops[1]);
30962 if (!register_operand (ops[0], cmode))
30963 ops[0] = force_reg (cmode, ops[0]);
30964 emit_insn (gen_rtx_SET (VOIDmode, target,
30965 gen_rtx_VEC_CONCAT (mode, ops[0],
30985 gcc_unreachable ();
31001 gcc_unreachable ();
31006 /* FIXME: We process inputs backward to help RA. PR 36222. */
31009 for (; i > 0; i -= 2, j--)
31011 first[j] = gen_reg_rtx (cmode);
31012 v = gen_rtvec (2, ops[i - 1], ops[i]);
31013 ix86_expand_vector_init (false, first[j],
31014 gen_rtx_PARALLEL (cmode, v));
31020 gcc_assert (hmode != VOIDmode);
31021 for (i = j = 0; i < n; i += 2, j++)
31023 second[j] = gen_reg_rtx (hmode);
31024 ix86_expand_vector_init_concat (hmode, second [j],
31028 ix86_expand_vector_init_concat (mode, target, second, n);
31031 ix86_expand_vector_init_concat (mode, target, first, n);
31035 gcc_unreachable ();
31039 /* A subroutine of ix86_expand_vector_init_general. Use vector
31040 interleave to handle the most general case: all values variable,
31041 and none identical. */
31044 ix86_expand_vector_init_interleave (enum machine_mode mode,
31045 rtx target, rtx *ops, int n)
31047 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
31050 rtx (*gen_load_even) (rtx, rtx, rtx);
31051 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
31052 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
31057 gen_load_even = gen_vec_setv8hi;
31058 gen_interleave_first_low = gen_vec_interleave_lowv4si;
31059 gen_interleave_second_low = gen_vec_interleave_lowv2di;
31060 inner_mode = HImode;
31061 first_imode = V4SImode;
31062 second_imode = V2DImode;
31063 third_imode = VOIDmode;
31066 gen_load_even = gen_vec_setv16qi;
31067 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
31068 gen_interleave_second_low = gen_vec_interleave_lowv4si;
31069 inner_mode = QImode;
31070 first_imode = V8HImode;
31071 second_imode = V4SImode;
31072 third_imode = V2DImode;
31075 gcc_unreachable ();
31078 for (i = 0; i < n; i++)
31080 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
31081 op0 = gen_reg_rtx (SImode);
31082 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
31084 /* Insert the SImode value as low element of V4SImode vector. */
31085 op1 = gen_reg_rtx (V4SImode);
31086 op0 = gen_rtx_VEC_MERGE (V4SImode,
31087 gen_rtx_VEC_DUPLICATE (V4SImode,
31089 CONST0_RTX (V4SImode),
31091 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
31093 /* Cast the V4SImode vector back to a vector in orignal mode. */
31094 op0 = gen_reg_rtx (mode);
31095 emit_move_insn (op0, gen_lowpart (mode, op1));
31097 /* Load even elements into the second positon. */
31098 emit_insn (gen_load_even (op0,
31099 force_reg (inner_mode,
31103 /* Cast vector to FIRST_IMODE vector. */
31104 ops[i] = gen_reg_rtx (first_imode);
31105 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
31108 /* Interleave low FIRST_IMODE vectors. */
31109 for (i = j = 0; i < n; i += 2, j++)
31111 op0 = gen_reg_rtx (first_imode);
31112 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
31114 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
31115 ops[j] = gen_reg_rtx (second_imode);
31116 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
31119 /* Interleave low SECOND_IMODE vectors. */
31120 switch (second_imode)
31123 for (i = j = 0; i < n / 2; i += 2, j++)
31125 op0 = gen_reg_rtx (second_imode);
31126 emit_insn (gen_interleave_second_low (op0, ops[i],
31129 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
31131 ops[j] = gen_reg_rtx (third_imode);
31132 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
31134 second_imode = V2DImode;
31135 gen_interleave_second_low = gen_vec_interleave_lowv2di;
31139 op0 = gen_reg_rtx (second_imode);
31140 emit_insn (gen_interleave_second_low (op0, ops[0],
31143 /* Cast the SECOND_IMODE vector back to a vector on original
31145 emit_insn (gen_rtx_SET (VOIDmode, target,
31146 gen_lowpart (mode, op0)));
31150 gcc_unreachable ();
31154 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
31155 all values variable, and none identical. */
31158 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
31159 rtx target, rtx vals)
31161 rtx ops[32], op0, op1;
31162 enum machine_mode half_mode = VOIDmode;
31169 if (!mmx_ok && !TARGET_SSE)
31181 n = GET_MODE_NUNITS (mode);
31182 for (i = 0; i < n; i++)
31183 ops[i] = XVECEXP (vals, 0, i);
31184 ix86_expand_vector_init_concat (mode, target, ops, n);
31188 half_mode = V16QImode;
31192 half_mode = V8HImode;
31196 n = GET_MODE_NUNITS (mode);
31197 for (i = 0; i < n; i++)
31198 ops[i] = XVECEXP (vals, 0, i);
31199 op0 = gen_reg_rtx (half_mode);
31200 op1 = gen_reg_rtx (half_mode);
31201 ix86_expand_vector_init_interleave (half_mode, op0, ops,
31203 ix86_expand_vector_init_interleave (half_mode, op1,
31204 &ops [n >> 1], n >> 2);
31205 emit_insn (gen_rtx_SET (VOIDmode, target,
31206 gen_rtx_VEC_CONCAT (mode, op0, op1)));
31210 if (!TARGET_SSE4_1)
31218 /* Don't use ix86_expand_vector_init_interleave if we can't
31219 move from GPR to SSE register directly. */
31220 if (!TARGET_INTER_UNIT_MOVES)
31223 n = GET_MODE_NUNITS (mode);
31224 for (i = 0; i < n; i++)
31225 ops[i] = XVECEXP (vals, 0, i);
31226 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
31234 gcc_unreachable ();
31238 int i, j, n_elts, n_words, n_elt_per_word;
31239 enum machine_mode inner_mode;
31240 rtx words[4], shift;
31242 inner_mode = GET_MODE_INNER (mode);
31243 n_elts = GET_MODE_NUNITS (mode);
31244 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
31245 n_elt_per_word = n_elts / n_words;
31246 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
31248 for (i = 0; i < n_words; ++i)
31250 rtx word = NULL_RTX;
31252 for (j = 0; j < n_elt_per_word; ++j)
31254 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
31255 elt = convert_modes (word_mode, inner_mode, elt, true);
31261 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
31262 word, 1, OPTAB_LIB_WIDEN);
31263 word = expand_simple_binop (word_mode, IOR, word, elt,
31264 word, 1, OPTAB_LIB_WIDEN);
31272 emit_move_insn (target, gen_lowpart (mode, words[0]));
31273 else if (n_words == 2)
31275 rtx tmp = gen_reg_rtx (mode);
31276 emit_clobber (tmp);
31277 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
31278 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
31279 emit_move_insn (target, tmp);
31281 else if (n_words == 4)
31283 rtx tmp = gen_reg_rtx (V4SImode);
31284 gcc_assert (word_mode == SImode);
31285 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
31286 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
31287 emit_move_insn (target, gen_lowpart (mode, tmp));
31290 gcc_unreachable ();
31294 /* Initialize vector TARGET via VALS. Suppress the use of MMX
31295 instructions unless MMX_OK is true. */
31298 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
31300 enum machine_mode mode = GET_MODE (target);
31301 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31302 int n_elts = GET_MODE_NUNITS (mode);
31303 int n_var = 0, one_var = -1;
31304 bool all_same = true, all_const_zero = true;
31308 for (i = 0; i < n_elts; ++i)
31310 x = XVECEXP (vals, 0, i);
31311 if (!(CONST_INT_P (x)
31312 || GET_CODE (x) == CONST_DOUBLE
31313 || GET_CODE (x) == CONST_FIXED))
31314 n_var++, one_var = i;
31315 else if (x != CONST0_RTX (inner_mode))
31316 all_const_zero = false;
31317 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
31321 /* Constants are best loaded from the constant pool. */
31324 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
31328 /* If all values are identical, broadcast the value. */
31330 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
31331 XVECEXP (vals, 0, 0)))
31334 /* Values where only one field is non-constant are best loaded from
31335 the pool and overwritten via move later. */
31339 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
31340 XVECEXP (vals, 0, one_var),
31344 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
31348 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
31352 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
31354 enum machine_mode mode = GET_MODE (target);
31355 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31356 enum machine_mode half_mode;
31357 bool use_vec_merge = false;
31359 static rtx (*gen_extract[6][2]) (rtx, rtx)
31361 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
31362 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
31363 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
31364 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
31365 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
31366 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
31368 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
31370 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
31371 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
31372 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
31373 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
31374 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
31375 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
31385 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31386 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
31388 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31390 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31391 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31397 use_vec_merge = TARGET_SSE4_1;
31405 /* For the two element vectors, we implement a VEC_CONCAT with
31406 the extraction of the other element. */
31408 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
31409 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
31412 op0 = val, op1 = tmp;
31414 op0 = tmp, op1 = val;
31416 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
31417 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31422 use_vec_merge = TARGET_SSE4_1;
31429 use_vec_merge = true;
31433 /* tmp = target = A B C D */
31434 tmp = copy_to_reg (target);
31435 /* target = A A B B */
31436 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
31437 /* target = X A B B */
31438 ix86_expand_vector_set (false, target, val, 0);
31439 /* target = A X C D */
31440 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31441 const1_rtx, const0_rtx,
31442 GEN_INT (2+4), GEN_INT (3+4)));
31446 /* tmp = target = A B C D */
31447 tmp = copy_to_reg (target);
31448 /* tmp = X B C D */
31449 ix86_expand_vector_set (false, tmp, val, 0);
31450 /* target = A B X D */
31451 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31452 const0_rtx, const1_rtx,
31453 GEN_INT (0+4), GEN_INT (3+4)));
31457 /* tmp = target = A B C D */
31458 tmp = copy_to_reg (target);
31459 /* tmp = X B C D */
31460 ix86_expand_vector_set (false, tmp, val, 0);
31461 /* target = A B X D */
31462 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31463 const0_rtx, const1_rtx,
31464 GEN_INT (2+4), GEN_INT (0+4)));
31468 gcc_unreachable ();
31473 use_vec_merge = TARGET_SSE4_1;
31477 /* Element 0 handled by vec_merge below. */
31480 use_vec_merge = true;
31486 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31487 store into element 0, then shuffle them back. */
31491 order[0] = GEN_INT (elt);
31492 order[1] = const1_rtx;
31493 order[2] = const2_rtx;
31494 order[3] = GEN_INT (3);
31495 order[elt] = const0_rtx;
31497 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31498 order[1], order[2], order[3]));
31500 ix86_expand_vector_set (false, target, val, 0);
31502 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31503 order[1], order[2], order[3]));
31507 /* For SSE1, we have to reuse the V4SF code. */
31508 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
31509 gen_lowpart (SFmode, val), elt);
31514 use_vec_merge = TARGET_SSE2;
31517 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31521 use_vec_merge = TARGET_SSE4_1;
31528 half_mode = V16QImode;
31534 half_mode = V8HImode;
31540 half_mode = V4SImode;
31546 half_mode = V2DImode;
31552 half_mode = V4SFmode;
31558 half_mode = V2DFmode;
31564 /* Compute offset. */
31568 gcc_assert (i <= 1);
31570 /* Extract the half. */
31571 tmp = gen_reg_rtx (half_mode);
31572 emit_insn (gen_extract[j][i] (tmp, target));
31574 /* Put val in tmp at elt. */
31575 ix86_expand_vector_set (false, tmp, val, elt);
31578 emit_insn (gen_insert[j][i] (target, target, tmp));
31587 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
31588 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
31589 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31593 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31595 emit_move_insn (mem, target);
31597 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31598 emit_move_insn (tmp, val);
31600 emit_move_insn (target, mem);
31605 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
31607 enum machine_mode mode = GET_MODE (vec);
31608 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31609 bool use_vec_extr = false;
31622 use_vec_extr = true;
31626 use_vec_extr = TARGET_SSE4_1;
31638 tmp = gen_reg_rtx (mode);
31639 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
31640 GEN_INT (elt), GEN_INT (elt),
31641 GEN_INT (elt+4), GEN_INT (elt+4)));
31645 tmp = gen_reg_rtx (mode);
31646 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
31650 gcc_unreachable ();
31653 use_vec_extr = true;
31658 use_vec_extr = TARGET_SSE4_1;
31672 tmp = gen_reg_rtx (mode);
31673 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
31674 GEN_INT (elt), GEN_INT (elt),
31675 GEN_INT (elt), GEN_INT (elt)));
31679 tmp = gen_reg_rtx (mode);
31680 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
31684 gcc_unreachable ();
31687 use_vec_extr = true;
31692 /* For SSE1, we have to reuse the V4SF code. */
31693 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
31694 gen_lowpart (V4SFmode, vec), elt);
31700 use_vec_extr = TARGET_SSE2;
31703 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31707 use_vec_extr = TARGET_SSE4_1;
31711 /* ??? Could extract the appropriate HImode element and shift. */
31718 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
31719 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
31721 /* Let the rtl optimizers know about the zero extension performed. */
31722 if (inner_mode == QImode || inner_mode == HImode)
31724 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
31725 target = gen_lowpart (SImode, target);
31728 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31732 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31734 emit_move_insn (mem, vec);
31736 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31737 emit_move_insn (target, tmp);
31741 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31742 pattern to reduce; DEST is the destination; IN is the input vector. */
31745 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
31747 rtx tmp1, tmp2, tmp3;
31749 tmp1 = gen_reg_rtx (V4SFmode);
31750 tmp2 = gen_reg_rtx (V4SFmode);
31751 tmp3 = gen_reg_rtx (V4SFmode);
31753 emit_insn (gen_sse_movhlps (tmp1, in, in));
31754 emit_insn (fn (tmp2, tmp1, in));
31756 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
31757 const1_rtx, const1_rtx,
31758 GEN_INT (1+4), GEN_INT (1+4)));
31759 emit_insn (fn (dest, tmp2, tmp3));
31762 /* Target hook for scalar_mode_supported_p. */
31764 ix86_scalar_mode_supported_p (enum machine_mode mode)
31766 if (DECIMAL_FLOAT_MODE_P (mode))
31767 return default_decimal_float_supported_p ();
31768 else if (mode == TFmode)
31771 return default_scalar_mode_supported_p (mode);
31774 /* Implements target hook vector_mode_supported_p. */
31776 ix86_vector_mode_supported_p (enum machine_mode mode)
31778 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31780 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31782 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31784 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31786 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31791 /* Target hook for c_mode_for_suffix. */
31792 static enum machine_mode
31793 ix86_c_mode_for_suffix (char suffix)
31803 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31805 We do this in the new i386 backend to maintain source compatibility
31806 with the old cc0-based compiler. */
31809 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31810 tree inputs ATTRIBUTE_UNUSED,
31813 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31815 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31820 /* Implements target vector targetm.asm.encode_section_info. This
31821 is not used by netware. */
31823 static void ATTRIBUTE_UNUSED
31824 ix86_encode_section_info (tree decl, rtx rtl, int first)
31826 default_encode_section_info (decl, rtl, first);
31828 if (TREE_CODE (decl) == VAR_DECL
31829 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31830 && ix86_in_large_data_p (decl))
31831 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31834 /* Worker function for REVERSE_CONDITION. */
31837 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31839 return (mode != CCFPmode && mode != CCFPUmode
31840 ? reverse_condition (code)
31841 : reverse_condition_maybe_unordered (code));
31844 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31848 output_387_reg_move (rtx insn, rtx *operands)
31850 if (REG_P (operands[0]))
31852 if (REG_P (operands[1])
31853 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31855 if (REGNO (operands[0]) == FIRST_STACK_REG)
31856 return output_387_ffreep (operands, 0);
31857 return "fstp\t%y0";
31859 if (STACK_TOP_P (operands[0]))
31860 return "fld%Z1\t%y1";
31863 else if (MEM_P (operands[0]))
31865 gcc_assert (REG_P (operands[1]));
31866 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31867 return "fstp%Z0\t%y0";
31870 /* There is no non-popping store to memory for XFmode.
31871 So if we need one, follow the store with a load. */
31872 if (GET_MODE (operands[0]) == XFmode)
31873 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31875 return "fst%Z0\t%y0";
31882 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31883 FP status register is set. */
31886 ix86_emit_fp_unordered_jump (rtx label)
31888 rtx reg = gen_reg_rtx (HImode);
31891 emit_insn (gen_x86_fnstsw_1 (reg));
31893 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31895 emit_insn (gen_x86_sahf_1 (reg));
31897 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31898 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31902 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31904 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31905 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
31908 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
31909 gen_rtx_LABEL_REF (VOIDmode, label),
31911 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
31913 emit_jump_insn (temp);
31914 predict_jump (REG_BR_PROB_BASE * 10 / 100);
31917 /* Output code to perform a log1p XFmode calculation. */
31919 void ix86_emit_i387_log1p (rtx op0, rtx op1)
31921 rtx label1 = gen_label_rtx ();
31922 rtx label2 = gen_label_rtx ();
31924 rtx tmp = gen_reg_rtx (XFmode);
31925 rtx tmp2 = gen_reg_rtx (XFmode);
31928 emit_insn (gen_absxf2 (tmp, op1));
31929 test = gen_rtx_GE (VOIDmode, tmp,
31930 CONST_DOUBLE_FROM_REAL_VALUE (
31931 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
31933 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
31935 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31936 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
31937 emit_jump (label2);
31939 emit_label (label1);
31940 emit_move_insn (tmp, CONST1_RTX (XFmode));
31941 emit_insn (gen_addxf3 (tmp, op1, tmp));
31942 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31943 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
31945 emit_label (label2);
31948 /* Output code to perform a Newton-Rhapson approximation of a single precision
31949 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
31951 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
31953 rtx x0, x1, e0, e1;
31955 x0 = gen_reg_rtx (mode);
31956 e0 = gen_reg_rtx (mode);
31957 e1 = gen_reg_rtx (mode);
31958 x1 = gen_reg_rtx (mode);
31960 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
31962 /* x0 = rcp(b) estimate */
31963 emit_insn (gen_rtx_SET (VOIDmode, x0,
31964 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
31967 emit_insn (gen_rtx_SET (VOIDmode, e0,
31968 gen_rtx_MULT (mode, x0, b)));
31971 emit_insn (gen_rtx_SET (VOIDmode, e0,
31972 gen_rtx_MULT (mode, x0, e0)));
31975 emit_insn (gen_rtx_SET (VOIDmode, e1,
31976 gen_rtx_PLUS (mode, x0, x0)));
31979 emit_insn (gen_rtx_SET (VOIDmode, x1,
31980 gen_rtx_MINUS (mode, e1, e0)));
31983 emit_insn (gen_rtx_SET (VOIDmode, res,
31984 gen_rtx_MULT (mode, a, x1)));
31987 /* Output code to perform a Newton-Rhapson approximation of a
31988 single precision floating point [reciprocal] square root. */
31990 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
31993 rtx x0, e0, e1, e2, e3, mthree, mhalf;
31996 x0 = gen_reg_rtx (mode);
31997 e0 = gen_reg_rtx (mode);
31998 e1 = gen_reg_rtx (mode);
31999 e2 = gen_reg_rtx (mode);
32000 e3 = gen_reg_rtx (mode);
32002 real_from_integer (&r, VOIDmode, -3, -1, 0);
32003 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
32005 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
32006 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
32008 if (VECTOR_MODE_P (mode))
32010 mthree = ix86_build_const_vector (mode, true, mthree);
32011 mhalf = ix86_build_const_vector (mode, true, mhalf);
32014 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
32015 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
32017 /* x0 = rsqrt(a) estimate */
32018 emit_insn (gen_rtx_SET (VOIDmode, x0,
32019 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
32022 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
32027 zero = gen_reg_rtx (mode);
32028 mask = gen_reg_rtx (mode);
32030 zero = force_reg (mode, CONST0_RTX(mode));
32031 emit_insn (gen_rtx_SET (VOIDmode, mask,
32032 gen_rtx_NE (mode, zero, a)));
32034 emit_insn (gen_rtx_SET (VOIDmode, x0,
32035 gen_rtx_AND (mode, x0, mask)));
32039 emit_insn (gen_rtx_SET (VOIDmode, e0,
32040 gen_rtx_MULT (mode, x0, a)));
32042 emit_insn (gen_rtx_SET (VOIDmode, e1,
32043 gen_rtx_MULT (mode, e0, x0)));
32046 mthree = force_reg (mode, mthree);
32047 emit_insn (gen_rtx_SET (VOIDmode, e2,
32048 gen_rtx_PLUS (mode, e1, mthree)));
32050 mhalf = force_reg (mode, mhalf);
32052 /* e3 = -.5 * x0 */
32053 emit_insn (gen_rtx_SET (VOIDmode, e3,
32054 gen_rtx_MULT (mode, x0, mhalf)));
32056 /* e3 = -.5 * e0 */
32057 emit_insn (gen_rtx_SET (VOIDmode, e3,
32058 gen_rtx_MULT (mode, e0, mhalf)));
32059 /* ret = e2 * e3 */
32060 emit_insn (gen_rtx_SET (VOIDmode, res,
32061 gen_rtx_MULT (mode, e2, e3)));
32064 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
32066 static void ATTRIBUTE_UNUSED
32067 i386_solaris_elf_named_section (const char *name, unsigned int flags,
32070 /* With Binutils 2.15, the "@unwind" marker must be specified on
32071 every occurrence of the ".eh_frame" section, not just the first
32074 && strcmp (name, ".eh_frame") == 0)
32076 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
32077 flags & SECTION_WRITE ? "aw" : "a");
32080 default_elf_asm_named_section (name, flags, decl);
32083 /* Return the mangling of TYPE if it is an extended fundamental type. */
32085 static const char *
32086 ix86_mangle_type (const_tree type)
32088 type = TYPE_MAIN_VARIANT (type);
32090 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32091 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32094 switch (TYPE_MODE (type))
32097 /* __float128 is "g". */
32100 /* "long double" or __float80 is "e". */
32107 /* For 32-bit code we can save PIC register setup by using
32108 __stack_chk_fail_local hidden function instead of calling
32109 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
32110 register, so it is better to call __stack_chk_fail directly. */
32113 ix86_stack_protect_fail (void)
32115 return TARGET_64BIT
32116 ? default_external_stack_protect_fail ()
32117 : default_hidden_stack_protect_fail ();
32120 /* Select a format to encode pointers in exception handling data. CODE
32121 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
32122 true if the symbol may be affected by dynamic relocations.
32124 ??? All x86 object file formats are capable of representing this.
32125 After all, the relocation needed is the same as for the call insn.
32126 Whether or not a particular assembler allows us to enter such, I
32127 guess we'll have to see. */
32129 asm_preferred_eh_data_format (int code, int global)
32133 int type = DW_EH_PE_sdata8;
32135 || ix86_cmodel == CM_SMALL_PIC
32136 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
32137 type = DW_EH_PE_sdata4;
32138 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
32140 if (ix86_cmodel == CM_SMALL
32141 || (ix86_cmodel == CM_MEDIUM && code))
32142 return DW_EH_PE_udata4;
32143 return DW_EH_PE_absptr;
32146 /* Expand copysign from SIGN to the positive value ABS_VALUE
32147 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
32150 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
32152 enum machine_mode mode = GET_MODE (sign);
32153 rtx sgn = gen_reg_rtx (mode);
32154 if (mask == NULL_RTX)
32156 enum machine_mode vmode;
32158 if (mode == SFmode)
32160 else if (mode == DFmode)
32165 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
32166 if (!VECTOR_MODE_P (mode))
32168 /* We need to generate a scalar mode mask in this case. */
32169 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32170 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32171 mask = gen_reg_rtx (mode);
32172 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32176 mask = gen_rtx_NOT (mode, mask);
32177 emit_insn (gen_rtx_SET (VOIDmode, sgn,
32178 gen_rtx_AND (mode, mask, sign)));
32179 emit_insn (gen_rtx_SET (VOIDmode, result,
32180 gen_rtx_IOR (mode, abs_value, sgn)));
32183 /* Expand fabs (OP0) and return a new rtx that holds the result. The
32184 mask for masking out the sign-bit is stored in *SMASK, if that is
32187 ix86_expand_sse_fabs (rtx op0, rtx *smask)
32189 enum machine_mode vmode, mode = GET_MODE (op0);
32192 xa = gen_reg_rtx (mode);
32193 if (mode == SFmode)
32195 else if (mode == DFmode)
32199 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
32200 if (!VECTOR_MODE_P (mode))
32202 /* We need to generate a scalar mode mask in this case. */
32203 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32204 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32205 mask = gen_reg_rtx (mode);
32206 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32208 emit_insn (gen_rtx_SET (VOIDmode, xa,
32209 gen_rtx_AND (mode, op0, mask)));
32217 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
32218 swapping the operands if SWAP_OPERANDS is true. The expanded
32219 code is a forward jump to a newly created label in case the
32220 comparison is true. The generated label rtx is returned. */
32222 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
32223 bool swap_operands)
32234 label = gen_label_rtx ();
32235 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
32236 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32237 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
32238 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
32239 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
32240 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
32241 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32242 JUMP_LABEL (tmp) = label;
32247 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
32248 using comparison code CODE. Operands are swapped for the comparison if
32249 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
32251 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
32252 bool swap_operands)
32254 enum machine_mode mode = GET_MODE (op0);
32255 rtx mask = gen_reg_rtx (mode);
32264 if (mode == DFmode)
32265 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
32266 gen_rtx_fmt_ee (code, mode, op0, op1)));
32268 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
32269 gen_rtx_fmt_ee (code, mode, op0, op1)));
32274 /* Generate and return a rtx of mode MODE for 2**n where n is the number
32275 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
32277 ix86_gen_TWO52 (enum machine_mode mode)
32279 REAL_VALUE_TYPE TWO52r;
32282 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
32283 TWO52 = const_double_from_real_value (TWO52r, mode);
32284 TWO52 = force_reg (mode, TWO52);
32289 /* Expand SSE sequence for computing lround from OP1 storing
32292 ix86_expand_lround (rtx op0, rtx op1)
32294 /* C code for the stuff we're doing below:
32295 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
32298 enum machine_mode mode = GET_MODE (op1);
32299 const struct real_format *fmt;
32300 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32303 /* load nextafter (0.5, 0.0) */
32304 fmt = REAL_MODE_FORMAT (mode);
32305 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32306 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32308 /* adj = copysign (0.5, op1) */
32309 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
32310 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
32312 /* adj = op1 + adj */
32313 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
32315 /* op0 = (imode)adj */
32316 expand_fix (op0, adj, 0);
32319 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
32322 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
32324 /* C code for the stuff we're doing below (for do_floor):
32326 xi -= (double)xi > op1 ? 1 : 0;
32329 enum machine_mode fmode = GET_MODE (op1);
32330 enum machine_mode imode = GET_MODE (op0);
32331 rtx ireg, freg, label, tmp;
32333 /* reg = (long)op1 */
32334 ireg = gen_reg_rtx (imode);
32335 expand_fix (ireg, op1, 0);
32337 /* freg = (double)reg */
32338 freg = gen_reg_rtx (fmode);
32339 expand_float (freg, ireg, 0);
32341 /* ireg = (freg > op1) ? ireg - 1 : ireg */
32342 label = ix86_expand_sse_compare_and_jump (UNLE,
32343 freg, op1, !do_floor);
32344 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
32345 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
32346 emit_move_insn (ireg, tmp);
32348 emit_label (label);
32349 LABEL_NUSES (label) = 1;
32351 emit_move_insn (op0, ireg);
32354 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
32355 result in OPERAND0. */
32357 ix86_expand_rint (rtx operand0, rtx operand1)
32359 /* C code for the stuff we're doing below:
32360 xa = fabs (operand1);
32361 if (!isless (xa, 2**52))
32363 xa = xa + 2**52 - 2**52;
32364 return copysign (xa, operand1);
32366 enum machine_mode mode = GET_MODE (operand0);
32367 rtx res, xa, label, TWO52, mask;
32369 res = gen_reg_rtx (mode);
32370 emit_move_insn (res, operand1);
32372 /* xa = abs (operand1) */
32373 xa = ix86_expand_sse_fabs (res, &mask);
32375 /* if (!isless (xa, TWO52)) goto label; */
32376 TWO52 = ix86_gen_TWO52 (mode);
32377 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32379 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32380 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32382 ix86_sse_copysign_to_positive (res, xa, res, mask);
32384 emit_label (label);
32385 LABEL_NUSES (label) = 1;
32387 emit_move_insn (operand0, res);
32390 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32393 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
32395 /* C code for the stuff we expand below.
32396 double xa = fabs (x), x2;
32397 if (!isless (xa, TWO52))
32399 xa = xa + TWO52 - TWO52;
32400 x2 = copysign (xa, x);
32409 enum machine_mode mode = GET_MODE (operand0);
32410 rtx xa, TWO52, tmp, label, one, res, mask;
32412 TWO52 = ix86_gen_TWO52 (mode);
32414 /* Temporary for holding the result, initialized to the input
32415 operand to ease control flow. */
32416 res = gen_reg_rtx (mode);
32417 emit_move_insn (res, operand1);
32419 /* xa = abs (operand1) */
32420 xa = ix86_expand_sse_fabs (res, &mask);
32422 /* if (!isless (xa, TWO52)) goto label; */
32423 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32425 /* xa = xa + TWO52 - TWO52; */
32426 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32427 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32429 /* xa = copysign (xa, operand1) */
32430 ix86_sse_copysign_to_positive (xa, xa, res, mask);
32432 /* generate 1.0 or -1.0 */
32433 one = force_reg (mode,
32434 const_double_from_real_value (do_floor
32435 ? dconst1 : dconstm1, mode));
32437 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32438 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32439 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32440 gen_rtx_AND (mode, one, tmp)));
32441 /* We always need to subtract here to preserve signed zero. */
32442 tmp = expand_simple_binop (mode, MINUS,
32443 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32444 emit_move_insn (res, tmp);
32446 emit_label (label);
32447 LABEL_NUSES (label) = 1;
32449 emit_move_insn (operand0, res);
32452 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32455 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
32457 /* C code for the stuff we expand below.
32458 double xa = fabs (x), x2;
32459 if (!isless (xa, TWO52))
32461 x2 = (double)(long)x;
32468 if (HONOR_SIGNED_ZEROS (mode))
32469 return copysign (x2, x);
32472 enum machine_mode mode = GET_MODE (operand0);
32473 rtx xa, xi, TWO52, tmp, label, one, res, mask;
32475 TWO52 = ix86_gen_TWO52 (mode);
32477 /* Temporary for holding the result, initialized to the input
32478 operand to ease control flow. */
32479 res = gen_reg_rtx (mode);
32480 emit_move_insn (res, operand1);
32482 /* xa = abs (operand1) */
32483 xa = ix86_expand_sse_fabs (res, &mask);
32485 /* if (!isless (xa, TWO52)) goto label; */
32486 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32488 /* xa = (double)(long)x */
32489 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32490 expand_fix (xi, res, 0);
32491 expand_float (xa, xi, 0);
32494 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32496 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32497 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32498 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32499 gen_rtx_AND (mode, one, tmp)));
32500 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
32501 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32502 emit_move_insn (res, tmp);
32504 if (HONOR_SIGNED_ZEROS (mode))
32505 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32507 emit_label (label);
32508 LABEL_NUSES (label) = 1;
32510 emit_move_insn (operand0, res);
32513 /* Expand SSE sequence for computing round from OPERAND1 storing
32514 into OPERAND0. Sequence that works without relying on DImode truncation
32515 via cvttsd2siq that is only available on 64bit targets. */
32517 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
32519 /* C code for the stuff we expand below.
32520 double xa = fabs (x), xa2, x2;
32521 if (!isless (xa, TWO52))
32523 Using the absolute value and copying back sign makes
32524 -0.0 -> -0.0 correct.
32525 xa2 = xa + TWO52 - TWO52;
32530 else if (dxa > 0.5)
32532 x2 = copysign (xa2, x);
32535 enum machine_mode mode = GET_MODE (operand0);
32536 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
32538 TWO52 = ix86_gen_TWO52 (mode);
32540 /* Temporary for holding the result, initialized to the input
32541 operand to ease control flow. */
32542 res = gen_reg_rtx (mode);
32543 emit_move_insn (res, operand1);
32545 /* xa = abs (operand1) */
32546 xa = ix86_expand_sse_fabs (res, &mask);
32548 /* if (!isless (xa, TWO52)) goto label; */
32549 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32551 /* xa2 = xa + TWO52 - TWO52; */
32552 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32553 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
32555 /* dxa = xa2 - xa; */
32556 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
32558 /* generate 0.5, 1.0 and -0.5 */
32559 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
32560 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
32561 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
32565 tmp = gen_reg_rtx (mode);
32566 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32567 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
32568 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32569 gen_rtx_AND (mode, one, tmp)));
32570 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32571 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32572 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
32573 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32574 gen_rtx_AND (mode, one, tmp)));
32575 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32577 /* res = copysign (xa2, operand1) */
32578 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
32580 emit_label (label);
32581 LABEL_NUSES (label) = 1;
32583 emit_move_insn (operand0, res);
32586 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32589 ix86_expand_trunc (rtx operand0, rtx operand1)
32591 /* C code for SSE variant we expand below.
32592 double xa = fabs (x), x2;
32593 if (!isless (xa, TWO52))
32595 x2 = (double)(long)x;
32596 if (HONOR_SIGNED_ZEROS (mode))
32597 return copysign (x2, x);
32600 enum machine_mode mode = GET_MODE (operand0);
32601 rtx xa, xi, TWO52, label, res, mask;
32603 TWO52 = ix86_gen_TWO52 (mode);
32605 /* Temporary for holding the result, initialized to the input
32606 operand to ease control flow. */
32607 res = gen_reg_rtx (mode);
32608 emit_move_insn (res, operand1);
32610 /* xa = abs (operand1) */
32611 xa = ix86_expand_sse_fabs (res, &mask);
32613 /* if (!isless (xa, TWO52)) goto label; */
32614 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32616 /* x = (double)(long)x */
32617 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32618 expand_fix (xi, res, 0);
32619 expand_float (res, xi, 0);
32621 if (HONOR_SIGNED_ZEROS (mode))
32622 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32624 emit_label (label);
32625 LABEL_NUSES (label) = 1;
32627 emit_move_insn (operand0, res);
32630 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32633 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
32635 enum machine_mode mode = GET_MODE (operand0);
32636 rtx xa, mask, TWO52, label, one, res, smask, tmp;
32638 /* C code for SSE variant we expand below.
32639 double xa = fabs (x), x2;
32640 if (!isless (xa, TWO52))
32642 xa2 = xa + TWO52 - TWO52;
32646 x2 = copysign (xa2, x);
32650 TWO52 = ix86_gen_TWO52 (mode);
32652 /* Temporary for holding the result, initialized to the input
32653 operand to ease control flow. */
32654 res = gen_reg_rtx (mode);
32655 emit_move_insn (res, operand1);
32657 /* xa = abs (operand1) */
32658 xa = ix86_expand_sse_fabs (res, &smask);
32660 /* if (!isless (xa, TWO52)) goto label; */
32661 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32663 /* res = xa + TWO52 - TWO52; */
32664 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32665 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
32666 emit_move_insn (res, tmp);
32669 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32671 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32672 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
32673 emit_insn (gen_rtx_SET (VOIDmode, mask,
32674 gen_rtx_AND (mode, mask, one)));
32675 tmp = expand_simple_binop (mode, MINUS,
32676 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
32677 emit_move_insn (res, tmp);
32679 /* res = copysign (res, operand1) */
32680 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
32682 emit_label (label);
32683 LABEL_NUSES (label) = 1;
32685 emit_move_insn (operand0, res);
32688 /* Expand SSE sequence for computing round from OPERAND1 storing
32691 ix86_expand_round (rtx operand0, rtx operand1)
32693 /* C code for the stuff we're doing below:
32694 double xa = fabs (x);
32695 if (!isless (xa, TWO52))
32697 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32698 return copysign (xa, x);
32700 enum machine_mode mode = GET_MODE (operand0);
32701 rtx res, TWO52, xa, label, xi, half, mask;
32702 const struct real_format *fmt;
32703 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32705 /* Temporary for holding the result, initialized to the input
32706 operand to ease control flow. */
32707 res = gen_reg_rtx (mode);
32708 emit_move_insn (res, operand1);
32710 TWO52 = ix86_gen_TWO52 (mode);
32711 xa = ix86_expand_sse_fabs (res, &mask);
32712 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32714 /* load nextafter (0.5, 0.0) */
32715 fmt = REAL_MODE_FORMAT (mode);
32716 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32717 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32719 /* xa = xa + 0.5 */
32720 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32721 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
32723 /* xa = (double)(int64_t)xa */
32724 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32725 expand_fix (xi, xa, 0);
32726 expand_float (xa, xi, 0);
32728 /* res = copysign (xa, operand1) */
32729 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
32731 emit_label (label);
32732 LABEL_NUSES (label) = 1;
32734 emit_move_insn (operand0, res);
32738 /* Table of valid machine attributes. */
32739 static const struct attribute_spec ix86_attribute_table[] =
32741 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
32742 affects_type_identity } */
32743 /* Stdcall attribute says callee is responsible for popping arguments
32744 if they are not variable. */
32745 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32747 /* Fastcall attribute says callee is responsible for popping arguments
32748 if they are not variable. */
32749 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32751 /* Thiscall attribute says callee is responsible for popping arguments
32752 if they are not variable. */
32753 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32755 /* Cdecl attribute says the callee is a normal C declaration */
32756 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32758 /* Regparm attribute specifies how many integer arguments are to be
32759 passed in registers. */
32760 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
32762 /* Sseregparm attribute says we are using x86_64 calling conventions
32763 for FP arguments. */
32764 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32766 /* force_align_arg_pointer says this function realigns the stack at entry. */
32767 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
32768 false, true, true, ix86_handle_cconv_attribute, false },
32769 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32770 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
32771 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
32772 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
32775 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32777 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32779 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32780 SUBTARGET_ATTRIBUTE_TABLE,
32782 /* ms_abi and sysv_abi calling convention function attributes. */
32783 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32784 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32785 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
32787 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32788 ix86_handle_callee_pop_aggregate_return, true },
32790 { NULL, 0, 0, false, false, false, NULL, false }
32793 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32795 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32796 tree vectype ATTRIBUTE_UNUSED,
32797 int misalign ATTRIBUTE_UNUSED)
32799 switch (type_of_cost)
32802 return ix86_cost->scalar_stmt_cost;
32805 return ix86_cost->scalar_load_cost;
32808 return ix86_cost->scalar_store_cost;
32811 return ix86_cost->vec_stmt_cost;
32814 return ix86_cost->vec_align_load_cost;
32817 return ix86_cost->vec_store_cost;
32819 case vec_to_scalar:
32820 return ix86_cost->vec_to_scalar_cost;
32822 case scalar_to_vec:
32823 return ix86_cost->scalar_to_vec_cost;
32825 case unaligned_load:
32826 case unaligned_store:
32827 return ix86_cost->vec_unalign_load_cost;
32829 case cond_branch_taken:
32830 return ix86_cost->cond_taken_branch_cost;
32832 case cond_branch_not_taken:
32833 return ix86_cost->cond_not_taken_branch_cost;
32839 gcc_unreachable ();
32844 /* Implement targetm.vectorize.builtin_vec_perm. */
32847 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32849 tree itype = TREE_TYPE (vec_type);
32850 bool u = TYPE_UNSIGNED (itype);
32851 enum machine_mode vmode = TYPE_MODE (vec_type);
32852 enum ix86_builtins fcode;
32853 bool ok = TARGET_SSE2;
32859 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32862 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32864 itype = ix86_get_builtin_type (IX86_BT_DI);
32869 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32873 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32875 itype = ix86_get_builtin_type (IX86_BT_SI);
32879 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32882 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32885 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32888 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32898 *mask_type = itype;
32899 return ix86_builtins[(int) fcode];
32902 /* Return a vector mode with twice as many elements as VMODE. */
32903 /* ??? Consider moving this to a table generated by genmodes.c. */
32905 static enum machine_mode
32906 doublesize_vector_mode (enum machine_mode vmode)
32910 case V2SFmode: return V4SFmode;
32911 case V1DImode: return V2DImode;
32912 case V2SImode: return V4SImode;
32913 case V4HImode: return V8HImode;
32914 case V8QImode: return V16QImode;
32916 case V2DFmode: return V4DFmode;
32917 case V4SFmode: return V8SFmode;
32918 case V2DImode: return V4DImode;
32919 case V4SImode: return V8SImode;
32920 case V8HImode: return V16HImode;
32921 case V16QImode: return V32QImode;
32923 case V4DFmode: return V8DFmode;
32924 case V8SFmode: return V16SFmode;
32925 case V4DImode: return V8DImode;
32926 case V8SImode: return V16SImode;
32927 case V16HImode: return V32HImode;
32928 case V32QImode: return V64QImode;
32931 gcc_unreachable ();
32935 /* Construct (set target (vec_select op0 (parallel perm))) and
32936 return true if that's a valid instruction in the active ISA. */
32939 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
32941 rtx rperm[MAX_VECT_LEN], x;
32944 for (i = 0; i < nelt; ++i)
32945 rperm[i] = GEN_INT (perm[i]);
32947 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
32948 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
32949 x = gen_rtx_SET (VOIDmode, target, x);
32952 if (recog_memoized (x) < 0)
32960 /* Similar, but generate a vec_concat from op0 and op1 as well. */
32963 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
32964 const unsigned char *perm, unsigned nelt)
32966 enum machine_mode v2mode;
32969 v2mode = doublesize_vector_mode (GET_MODE (op0));
32970 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
32971 return expand_vselect (target, x, perm, nelt);
32974 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32975 in terms of blendp[sd] / pblendw / pblendvb. */
32978 expand_vec_perm_blend (struct expand_vec_perm_d *d)
32980 enum machine_mode vmode = d->vmode;
32981 unsigned i, mask, nelt = d->nelt;
32982 rtx target, op0, op1, x;
32984 if (!TARGET_SSE4_1 || d->op0 == d->op1)
32986 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
32989 /* This is a blend, not a permute. Elements must stay in their
32990 respective lanes. */
32991 for (i = 0; i < nelt; ++i)
32993 unsigned e = d->perm[i];
32994 if (!(e == i || e == i + nelt))
33001 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
33002 decision should be extracted elsewhere, so that we only try that
33003 sequence once all budget==3 options have been tried. */
33005 /* For bytes, see if bytes move in pairs so we can use pblendw with
33006 an immediate argument, rather than pblendvb with a vector argument. */
33007 if (vmode == V16QImode)
33009 bool pblendw_ok = true;
33010 for (i = 0; i < 16 && pblendw_ok; i += 2)
33011 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
33015 rtx rperm[16], vperm;
33017 for (i = 0; i < nelt; ++i)
33018 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
33020 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
33021 vperm = force_reg (V16QImode, vperm);
33023 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
33028 target = d->target;
33040 for (i = 0; i < nelt; ++i)
33041 mask |= (d->perm[i] >= nelt) << i;
33045 for (i = 0; i < 2; ++i)
33046 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
33050 for (i = 0; i < 4; ++i)
33051 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
33055 for (i = 0; i < 8; ++i)
33056 mask |= (d->perm[i * 2] >= 16) << i;
33060 target = gen_lowpart (vmode, target);
33061 op0 = gen_lowpart (vmode, op0);
33062 op1 = gen_lowpart (vmode, op1);
33066 gcc_unreachable ();
33069 /* This matches five different patterns with the different modes. */
33070 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
33071 x = gen_rtx_SET (VOIDmode, target, x);
33077 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33078 in terms of the variable form of vpermilps.
33080 Note that we will have already failed the immediate input vpermilps,
33081 which requires that the high and low part shuffle be identical; the
33082 variable form doesn't require that. */
33085 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
33087 rtx rperm[8], vperm;
33090 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
33093 /* We can only permute within the 128-bit lane. */
33094 for (i = 0; i < 8; ++i)
33096 unsigned e = d->perm[i];
33097 if (i < 4 ? e >= 4 : e < 4)
33104 for (i = 0; i < 8; ++i)
33106 unsigned e = d->perm[i];
33108 /* Within each 128-bit lane, the elements of op0 are numbered
33109 from 0 and the elements of op1 are numbered from 4. */
33115 rperm[i] = GEN_INT (e);
33118 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
33119 vperm = force_reg (V8SImode, vperm);
33120 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
33125 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33126 in terms of pshufb or vpperm. */
33129 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
33131 unsigned i, nelt, eltsz;
33132 rtx rperm[16], vperm, target, op0, op1;
33134 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
33136 if (GET_MODE_SIZE (d->vmode) != 16)
33143 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33145 for (i = 0; i < nelt; ++i)
33147 unsigned j, e = d->perm[i];
33148 for (j = 0; j < eltsz; ++j)
33149 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
33152 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
33153 vperm = force_reg (V16QImode, vperm);
33155 target = gen_lowpart (V16QImode, d->target);
33156 op0 = gen_lowpart (V16QImode, d->op0);
33157 if (d->op0 == d->op1)
33158 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
33161 op1 = gen_lowpart (V16QImode, d->op1);
33162 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
33168 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
33169 in a single instruction. */
33172 expand_vec_perm_1 (struct expand_vec_perm_d *d)
33174 unsigned i, nelt = d->nelt;
33175 unsigned char perm2[MAX_VECT_LEN];
33177 /* Check plain VEC_SELECT first, because AVX has instructions that could
33178 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
33179 input where SEL+CONCAT may not. */
33180 if (d->op0 == d->op1)
33182 int mask = nelt - 1;
33184 for (i = 0; i < nelt; i++)
33185 perm2[i] = d->perm[i] & mask;
33187 if (expand_vselect (d->target, d->op0, perm2, nelt))
33190 /* There are plenty of patterns in sse.md that are written for
33191 SEL+CONCAT and are not replicated for a single op. Perhaps
33192 that should be changed, to avoid the nastiness here. */
33194 /* Recognize interleave style patterns, which means incrementing
33195 every other permutation operand. */
33196 for (i = 0; i < nelt; i += 2)
33198 perm2[i] = d->perm[i] & mask;
33199 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
33201 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33204 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
33207 for (i = 0; i < nelt; i += 4)
33209 perm2[i + 0] = d->perm[i + 0] & mask;
33210 perm2[i + 1] = d->perm[i + 1] & mask;
33211 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
33212 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
33215 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33220 /* Finally, try the fully general two operand permute. */
33221 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
33224 /* Recognize interleave style patterns with reversed operands. */
33225 if (d->op0 != d->op1)
33227 for (i = 0; i < nelt; ++i)
33229 unsigned e = d->perm[i];
33237 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
33241 /* Try the SSE4.1 blend variable merge instructions. */
33242 if (expand_vec_perm_blend (d))
33245 /* Try one of the AVX vpermil variable permutations. */
33246 if (expand_vec_perm_vpermil (d))
33249 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
33250 if (expand_vec_perm_pshufb (d))
33256 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33257 in terms of a pair of pshuflw + pshufhw instructions. */
33260 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
33262 unsigned char perm2[MAX_VECT_LEN];
33266 if (d->vmode != V8HImode || d->op0 != d->op1)
33269 /* The two permutations only operate in 64-bit lanes. */
33270 for (i = 0; i < 4; ++i)
33271 if (d->perm[i] >= 4)
33273 for (i = 4; i < 8; ++i)
33274 if (d->perm[i] < 4)
33280 /* Emit the pshuflw. */
33281 memcpy (perm2, d->perm, 4);
33282 for (i = 4; i < 8; ++i)
33284 ok = expand_vselect (d->target, d->op0, perm2, 8);
33287 /* Emit the pshufhw. */
33288 memcpy (perm2 + 4, d->perm + 4, 4);
33289 for (i = 0; i < 4; ++i)
33291 ok = expand_vselect (d->target, d->target, perm2, 8);
33297 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33298 the permutation using the SSSE3 palignr instruction. This succeeds
33299 when all of the elements in PERM fit within one vector and we merely
33300 need to shift them down so that a single vector permutation has a
33301 chance to succeed. */
33304 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
33306 unsigned i, nelt = d->nelt;
33311 /* Even with AVX, palignr only operates on 128-bit vectors. */
33312 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33315 min = nelt, max = 0;
33316 for (i = 0; i < nelt; ++i)
33318 unsigned e = d->perm[i];
33324 if (min == 0 || max - min >= nelt)
33327 /* Given that we have SSSE3, we know we'll be able to implement the
33328 single operand permutation after the palignr with pshufb. */
33332 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
33333 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
33334 gen_lowpart (TImode, d->op1),
33335 gen_lowpart (TImode, d->op0), shift));
33337 d->op0 = d->op1 = d->target;
33340 for (i = 0; i < nelt; ++i)
33342 unsigned e = d->perm[i] - min;
33348 /* Test for the degenerate case where the alignment by itself
33349 produces the desired permutation. */
33353 ok = expand_vec_perm_1 (d);
33359 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33360 a two vector permutation into a single vector permutation by using
33361 an interleave operation to merge the vectors. */
33364 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
33366 struct expand_vec_perm_d dremap, dfinal;
33367 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
33368 unsigned contents, h1, h2, h3, h4;
33369 unsigned char remap[2 * MAX_VECT_LEN];
33373 if (d->op0 == d->op1)
33376 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
33377 lanes. We can use similar techniques with the vperm2f128 instruction,
33378 but it requires slightly different logic. */
33379 if (GET_MODE_SIZE (d->vmode) != 16)
33382 /* Examine from whence the elements come. */
33384 for (i = 0; i < nelt; ++i)
33385 contents |= 1u << d->perm[i];
33387 /* Split the two input vectors into 4 halves. */
33388 h1 = (1u << nelt2) - 1;
33393 memset (remap, 0xff, sizeof (remap));
33396 /* If the elements from the low halves use interleave low, and similarly
33397 for interleave high. If the elements are from mis-matched halves, we
33398 can use shufps for V4SF/V4SI or do a DImode shuffle. */
33399 if ((contents & (h1 | h3)) == contents)
33401 for (i = 0; i < nelt2; ++i)
33404 remap[i + nelt] = i * 2 + 1;
33405 dremap.perm[i * 2] = i;
33406 dremap.perm[i * 2 + 1] = i + nelt;
33409 else if ((contents & (h2 | h4)) == contents)
33411 for (i = 0; i < nelt2; ++i)
33413 remap[i + nelt2] = i * 2;
33414 remap[i + nelt + nelt2] = i * 2 + 1;
33415 dremap.perm[i * 2] = i + nelt2;
33416 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
33419 else if ((contents & (h1 | h4)) == contents)
33421 for (i = 0; i < nelt2; ++i)
33424 remap[i + nelt + nelt2] = i + nelt2;
33425 dremap.perm[i] = i;
33426 dremap.perm[i + nelt2] = i + nelt + nelt2;
33430 dremap.vmode = V2DImode;
33432 dremap.perm[0] = 0;
33433 dremap.perm[1] = 3;
33436 else if ((contents & (h2 | h3)) == contents)
33438 for (i = 0; i < nelt2; ++i)
33440 remap[i + nelt2] = i;
33441 remap[i + nelt] = i + nelt2;
33442 dremap.perm[i] = i + nelt2;
33443 dremap.perm[i + nelt2] = i + nelt;
33447 dremap.vmode = V2DImode;
33449 dremap.perm[0] = 1;
33450 dremap.perm[1] = 2;
33456 /* Use the remapping array set up above to move the elements from their
33457 swizzled locations into their final destinations. */
33459 for (i = 0; i < nelt; ++i)
33461 unsigned e = remap[d->perm[i]];
33462 gcc_assert (e < nelt);
33463 dfinal.perm[i] = e;
33465 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
33466 dfinal.op1 = dfinal.op0;
33467 dremap.target = dfinal.op0;
33469 /* Test if the final remap can be done with a single insn. For V4SFmode or
33470 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
33472 ok = expand_vec_perm_1 (&dfinal);
33473 seq = get_insns ();
33479 if (dremap.vmode != dfinal.vmode)
33481 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
33482 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
33483 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
33486 ok = expand_vec_perm_1 (&dremap);
33493 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33494 permutation with two pshufb insns and an ior. We should have already
33495 failed all two instruction sequences. */
33498 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
33500 rtx rperm[2][16], vperm, l, h, op, m128;
33501 unsigned int i, nelt, eltsz;
33503 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33505 gcc_assert (d->op0 != d->op1);
33508 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33510 /* Generate two permutation masks. If the required element is within
33511 the given vector it is shuffled into the proper lane. If the required
33512 element is in the other vector, force a zero into the lane by setting
33513 bit 7 in the permutation mask. */
33514 m128 = GEN_INT (-128);
33515 for (i = 0; i < nelt; ++i)
33517 unsigned j, e = d->perm[i];
33518 unsigned which = (e >= nelt);
33522 for (j = 0; j < eltsz; ++j)
33524 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
33525 rperm[1-which][i*eltsz + j] = m128;
33529 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
33530 vperm = force_reg (V16QImode, vperm);
33532 l = gen_reg_rtx (V16QImode);
33533 op = gen_lowpart (V16QImode, d->op0);
33534 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
33536 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
33537 vperm = force_reg (V16QImode, vperm);
33539 h = gen_reg_rtx (V16QImode);
33540 op = gen_lowpart (V16QImode, d->op1);
33541 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
33543 op = gen_lowpart (V16QImode, d->target);
33544 emit_insn (gen_iorv16qi3 (op, l, h));
33549 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33550 and extract-odd permutations. */
33553 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
33560 t1 = gen_reg_rtx (V4DFmode);
33561 t2 = gen_reg_rtx (V4DFmode);
33563 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33564 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
33565 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
33567 /* Now an unpck[lh]pd will produce the result required. */
33569 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
33571 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
33577 int mask = odd ? 0xdd : 0x88;
33579 t1 = gen_reg_rtx (V8SFmode);
33580 t2 = gen_reg_rtx (V8SFmode);
33581 t3 = gen_reg_rtx (V8SFmode);
33583 /* Shuffle within the 128-bit lanes to produce:
33584 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33585 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
33588 /* Shuffle the lanes around to produce:
33589 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33590 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
33593 /* Shuffle within the 128-bit lanes to produce:
33594 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33595 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
33597 /* Shuffle within the 128-bit lanes to produce:
33598 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33599 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
33601 /* Shuffle the lanes around to produce:
33602 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33603 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
33612 /* These are always directly implementable by expand_vec_perm_1. */
33613 gcc_unreachable ();
33617 return expand_vec_perm_pshufb2 (d);
33620 /* We need 2*log2(N)-1 operations to achieve odd/even
33621 with interleave. */
33622 t1 = gen_reg_rtx (V8HImode);
33623 t2 = gen_reg_rtx (V8HImode);
33624 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
33625 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
33626 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
33627 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
33629 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
33631 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
33638 return expand_vec_perm_pshufb2 (d);
33641 t1 = gen_reg_rtx (V16QImode);
33642 t2 = gen_reg_rtx (V16QImode);
33643 t3 = gen_reg_rtx (V16QImode);
33644 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
33645 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
33646 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
33647 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
33648 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
33649 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
33651 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
33653 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
33659 gcc_unreachable ();
33665 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33666 extract-even and extract-odd permutations. */
33669 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
33671 unsigned i, odd, nelt = d->nelt;
33674 if (odd != 0 && odd != 1)
33677 for (i = 1; i < nelt; ++i)
33678 if (d->perm[i] != 2 * i + odd)
33681 return expand_vec_perm_even_odd_1 (d, odd);
33684 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33685 permutations. We assume that expand_vec_perm_1 has already failed. */
33688 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
33690 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
33691 enum machine_mode vmode = d->vmode;
33692 unsigned char perm2[4];
33700 /* These are special-cased in sse.md so that we can optionally
33701 use the vbroadcast instruction. They expand to two insns
33702 if the input happens to be in a register. */
33703 gcc_unreachable ();
33709 /* These are always implementable using standard shuffle patterns. */
33710 gcc_unreachable ();
33714 /* These can be implemented via interleave. We save one insn by
33715 stopping once we have promoted to V4SImode and then use pshufd. */
33718 optab otab = vec_interleave_low_optab;
33722 otab = vec_interleave_high_optab;
33727 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
33728 vmode = get_mode_wider_vector (vmode);
33729 op0 = gen_lowpart (vmode, op0);
33731 while (vmode != V4SImode);
33733 memset (perm2, elt, 4);
33734 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
33739 gcc_unreachable ();
33743 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33744 broadcast permutations. */
33747 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
33749 unsigned i, elt, nelt = d->nelt;
33751 if (d->op0 != d->op1)
33755 for (i = 1; i < nelt; ++i)
33756 if (d->perm[i] != elt)
33759 return expand_vec_perm_broadcast_1 (d);
33762 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33763 With all of the interface bits taken care of, perform the expansion
33764 in D and return true on success. */
33767 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
33769 /* Try a single instruction expansion. */
33770 if (expand_vec_perm_1 (d))
33773 /* Try sequences of two instructions. */
33775 if (expand_vec_perm_pshuflw_pshufhw (d))
33778 if (expand_vec_perm_palignr (d))
33781 if (expand_vec_perm_interleave2 (d))
33784 if (expand_vec_perm_broadcast (d))
33787 /* Try sequences of three instructions. */
33789 if (expand_vec_perm_pshufb2 (d))
33792 /* ??? Look for narrow permutations whose element orderings would
33793 allow the promotion to a wider mode. */
33795 /* ??? Look for sequences of interleave or a wider permute that place
33796 the data into the correct lanes for a half-vector shuffle like
33797 pshuf[lh]w or vpermilps. */
33799 /* ??? Look for sequences of interleave that produce the desired results.
33800 The combinatorics of punpck[lh] get pretty ugly... */
33802 if (expand_vec_perm_even_odd (d))
33808 /* Extract the values from the vector CST into the permutation array in D.
33809 Return 0 on error, 1 if all values from the permutation come from the
33810 first vector, 2 if all values from the second vector, and 3 otherwise. */
33813 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33815 tree list = TREE_VECTOR_CST_ELTS (cst);
33816 unsigned i, nelt = d->nelt;
33819 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33821 unsigned HOST_WIDE_INT e;
33823 if (!host_integerp (TREE_VALUE (list), 1))
33825 e = tree_low_cst (TREE_VALUE (list), 1);
33829 ret |= (e < nelt ? 1 : 2);
33832 gcc_assert (list == NULL);
33834 /* For all elements from second vector, fold the elements to first. */
33836 for (i = 0; i < nelt; ++i)
33837 d->perm[i] -= nelt;
33843 ix86_expand_vec_perm_builtin (tree exp)
33845 struct expand_vec_perm_d d;
33846 tree arg0, arg1, arg2;
33848 arg0 = CALL_EXPR_ARG (exp, 0);
33849 arg1 = CALL_EXPR_ARG (exp, 1);
33850 arg2 = CALL_EXPR_ARG (exp, 2);
33852 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33853 d.nelt = GET_MODE_NUNITS (d.vmode);
33854 d.testing_p = false;
33855 gcc_assert (VECTOR_MODE_P (d.vmode));
33857 if (TREE_CODE (arg2) != VECTOR_CST)
33859 error_at (EXPR_LOCATION (exp),
33860 "vector permutation requires vector constant");
33864 switch (extract_vec_perm_cst (&d, arg2))
33870 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33874 if (!operand_equal_p (arg0, arg1, 0))
33876 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33877 d.op0 = force_reg (d.vmode, d.op0);
33878 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33879 d.op1 = force_reg (d.vmode, d.op1);
33883 /* The elements of PERM do not suggest that only the first operand
33884 is used, but both operands are identical. Allow easier matching
33885 of the permutation by folding the permutation into the single
33888 unsigned i, nelt = d.nelt;
33889 for (i = 0; i < nelt; ++i)
33890 if (d.perm[i] >= nelt)
33896 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33897 d.op0 = force_reg (d.vmode, d.op0);
33902 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33903 d.op0 = force_reg (d.vmode, d.op0);
33908 d.target = gen_reg_rtx (d.vmode);
33909 if (ix86_expand_vec_perm_builtin_1 (&d))
33912 /* For compiler generated permutations, we should never got here, because
33913 the compiler should also be checking the ok hook. But since this is a
33914 builtin the user has access too, so don't abort. */
33918 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
33921 sorry ("vector permutation (%d %d %d %d)",
33922 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
33925 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
33926 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33927 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
33930 sorry ("vector permutation "
33931 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
33932 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33933 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
33934 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
33935 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
33938 gcc_unreachable ();
33941 return CONST0_RTX (d.vmode);
33944 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
33947 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
33949 struct expand_vec_perm_d d;
33953 d.vmode = TYPE_MODE (vec_type);
33954 d.nelt = GET_MODE_NUNITS (d.vmode);
33955 d.testing_p = true;
33957 /* Given sufficient ISA support we can just return true here
33958 for selected vector modes. */
33959 if (GET_MODE_SIZE (d.vmode) == 16)
33961 /* All implementable with a single vpperm insn. */
33964 /* All implementable with 2 pshufb + 1 ior. */
33967 /* All implementable with shufpd or unpck[lh]pd. */
33972 vec_mask = extract_vec_perm_cst (&d, mask);
33974 /* This hook is cannot be called in response to something that the
33975 user does (unlike the builtin expander) so we shouldn't ever see
33976 an error generated from the extract. */
33977 gcc_assert (vec_mask > 0 && vec_mask <= 3);
33978 one_vec = (vec_mask != 3);
33980 /* Implementable with shufps or pshufd. */
33981 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
33984 /* Otherwise we have to go through the motions and see if we can
33985 figure out how to generate the requested permutation. */
33986 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
33987 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
33989 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
33992 ret = ix86_expand_vec_perm_builtin_1 (&d);
33999 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
34001 struct expand_vec_perm_d d;
34007 d.vmode = GET_MODE (targ);
34008 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
34009 d.testing_p = false;
34011 for (i = 0; i < nelt; ++i)
34012 d.perm[i] = i * 2 + odd;
34014 /* We'll either be able to implement the permutation directly... */
34015 if (expand_vec_perm_1 (&d))
34018 /* ... or we use the special-case patterns. */
34019 expand_vec_perm_even_odd_1 (&d, odd);
34022 /* This function returns the calling abi specific va_list type node.
34023 It returns the FNDECL specific va_list type. */
34026 ix86_fn_abi_va_list (tree fndecl)
34029 return va_list_type_node;
34030 gcc_assert (fndecl != NULL_TREE);
34032 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
34033 return ms_va_list_type_node;
34035 return sysv_va_list_type_node;
34038 /* Returns the canonical va_list type specified by TYPE. If there
34039 is no valid TYPE provided, it return NULL_TREE. */
34042 ix86_canonical_va_list_type (tree type)
34046 /* Resolve references and pointers to va_list type. */
34047 if (TREE_CODE (type) == MEM_REF)
34048 type = TREE_TYPE (type);
34049 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
34050 type = TREE_TYPE (type);
34051 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
34052 type = TREE_TYPE (type);
34054 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
34056 wtype = va_list_type_node;
34057 gcc_assert (wtype != NULL_TREE);
34059 if (TREE_CODE (wtype) == ARRAY_TYPE)
34061 /* If va_list is an array type, the argument may have decayed
34062 to a pointer type, e.g. by being passed to another function.
34063 In that case, unwrap both types so that we can compare the
34064 underlying records. */
34065 if (TREE_CODE (htype) == ARRAY_TYPE
34066 || POINTER_TYPE_P (htype))
34068 wtype = TREE_TYPE (wtype);
34069 htype = TREE_TYPE (htype);
34072 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34073 return va_list_type_node;
34074 wtype = sysv_va_list_type_node;
34075 gcc_assert (wtype != NULL_TREE);
34077 if (TREE_CODE (wtype) == ARRAY_TYPE)
34079 /* If va_list is an array type, the argument may have decayed
34080 to a pointer type, e.g. by being passed to another function.
34081 In that case, unwrap both types so that we can compare the
34082 underlying records. */
34083 if (TREE_CODE (htype) == ARRAY_TYPE
34084 || POINTER_TYPE_P (htype))
34086 wtype = TREE_TYPE (wtype);
34087 htype = TREE_TYPE (htype);
34090 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34091 return sysv_va_list_type_node;
34092 wtype = ms_va_list_type_node;
34093 gcc_assert (wtype != NULL_TREE);
34095 if (TREE_CODE (wtype) == ARRAY_TYPE)
34097 /* If va_list is an array type, the argument may have decayed
34098 to a pointer type, e.g. by being passed to another function.
34099 In that case, unwrap both types so that we can compare the
34100 underlying records. */
34101 if (TREE_CODE (htype) == ARRAY_TYPE
34102 || POINTER_TYPE_P (htype))
34104 wtype = TREE_TYPE (wtype);
34105 htype = TREE_TYPE (htype);
34108 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34109 return ms_va_list_type_node;
34112 return std_canonical_va_list_type (type);
34115 /* Iterate through the target-specific builtin types for va_list.
34116 IDX denotes the iterator, *PTREE is set to the result type of
34117 the va_list builtin, and *PNAME to its internal type.
34118 Returns zero if there is no element for this index, otherwise
34119 IDX should be increased upon the next call.
34120 Note, do not iterate a base builtin's name like __builtin_va_list.
34121 Used from c_common_nodes_and_builtins. */
34124 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
34134 *ptree = ms_va_list_type_node;
34135 *pname = "__builtin_ms_va_list";
34139 *ptree = sysv_va_list_type_node;
34140 *pname = "__builtin_sysv_va_list";
34148 #undef TARGET_SCHED_DISPATCH
34149 #define TARGET_SCHED_DISPATCH has_dispatch
34150 #undef TARGET_SCHED_DISPATCH_DO
34151 #define TARGET_SCHED_DISPATCH_DO do_dispatch
34153 /* The size of the dispatch window is the total number of bytes of
34154 object code allowed in a window. */
34155 #define DISPATCH_WINDOW_SIZE 16
34157 /* Number of dispatch windows considered for scheduling. */
34158 #define MAX_DISPATCH_WINDOWS 3
34160 /* Maximum number of instructions in a window. */
34163 /* Maximum number of immediate operands in a window. */
34166 /* Maximum number of immediate bits allowed in a window. */
34167 #define MAX_IMM_SIZE 128
34169 /* Maximum number of 32 bit immediates allowed in a window. */
34170 #define MAX_IMM_32 4
34172 /* Maximum number of 64 bit immediates allowed in a window. */
34173 #define MAX_IMM_64 2
34175 /* Maximum total of loads or prefetches allowed in a window. */
34178 /* Maximum total of stores allowed in a window. */
34179 #define MAX_STORE 1
34185 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
34186 enum dispatch_group {
34201 /* Number of allowable groups in a dispatch window. It is an array
34202 indexed by dispatch_group enum. 100 is used as a big number,
34203 because the number of these kind of operations does not have any
34204 effect in dispatch window, but we need them for other reasons in
34206 static unsigned int num_allowable_groups[disp_last] = {
34207 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
34210 char group_name[disp_last + 1][16] = {
34211 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
34212 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
34213 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
34216 /* Instruction path. */
34219 path_single, /* Single micro op. */
34220 path_double, /* Double micro op. */
34221 path_multi, /* Instructions with more than 2 micro op.. */
34225 /* sched_insn_info defines a window to the instructions scheduled in
34226 the basic block. It contains a pointer to the insn_info table and
34227 the instruction scheduled.
34229 Windows are allocated for each basic block and are linked
34231 typedef struct sched_insn_info_s {
34233 enum dispatch_group group;
34234 enum insn_path path;
34239 /* Linked list of dispatch windows. This is a two way list of
34240 dispatch windows of a basic block. It contains information about
34241 the number of uops in the window and the total number of
34242 instructions and of bytes in the object code for this dispatch
34244 typedef struct dispatch_windows_s {
34245 int num_insn; /* Number of insn in the window. */
34246 int num_uops; /* Number of uops in the window. */
34247 int window_size; /* Number of bytes in the window. */
34248 int window_num; /* Window number between 0 or 1. */
34249 int num_imm; /* Number of immediates in an insn. */
34250 int num_imm_32; /* Number of 32 bit immediates in an insn. */
34251 int num_imm_64; /* Number of 64 bit immediates in an insn. */
34252 int imm_size; /* Total immediates in the window. */
34253 int num_loads; /* Total memory loads in the window. */
34254 int num_stores; /* Total memory stores in the window. */
34255 int violation; /* Violation exists in window. */
34256 sched_insn_info *window; /* Pointer to the window. */
34257 struct dispatch_windows_s *next;
34258 struct dispatch_windows_s *prev;
34259 } dispatch_windows;
34261 /* Immediate valuse used in an insn. */
34262 typedef struct imm_info_s
34269 static dispatch_windows *dispatch_window_list;
34270 static dispatch_windows *dispatch_window_list1;
34272 /* Get dispatch group of insn. */
34274 static enum dispatch_group
34275 get_mem_group (rtx insn)
34277 enum attr_memory memory;
34279 if (INSN_CODE (insn) < 0)
34280 return disp_no_group;
34281 memory = get_attr_memory (insn);
34282 if (memory == MEMORY_STORE)
34285 if (memory == MEMORY_LOAD)
34288 if (memory == MEMORY_BOTH)
34289 return disp_load_store;
34291 return disp_no_group;
34294 /* Return true if insn is a compare instruction. */
34299 enum attr_type type;
34301 type = get_attr_type (insn);
34302 return (type == TYPE_TEST
34303 || type == TYPE_ICMP
34304 || type == TYPE_FCMP
34305 || GET_CODE (PATTERN (insn)) == COMPARE);
34308 /* Return true if a dispatch violation encountered. */
34311 dispatch_violation (void)
34313 if (dispatch_window_list->next)
34314 return dispatch_window_list->next->violation;
34315 return dispatch_window_list->violation;
34318 /* Return true if insn is a branch instruction. */
34321 is_branch (rtx insn)
34323 return (CALL_P (insn) || JUMP_P (insn));
34326 /* Return true if insn is a prefetch instruction. */
34329 is_prefetch (rtx insn)
34331 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
34334 /* This function initializes a dispatch window and the list container holding a
34335 pointer to the window. */
34338 init_window (int window_num)
34341 dispatch_windows *new_list;
34343 if (window_num == 0)
34344 new_list = dispatch_window_list;
34346 new_list = dispatch_window_list1;
34348 new_list->num_insn = 0;
34349 new_list->num_uops = 0;
34350 new_list->window_size = 0;
34351 new_list->next = NULL;
34352 new_list->prev = NULL;
34353 new_list->window_num = window_num;
34354 new_list->num_imm = 0;
34355 new_list->num_imm_32 = 0;
34356 new_list->num_imm_64 = 0;
34357 new_list->imm_size = 0;
34358 new_list->num_loads = 0;
34359 new_list->num_stores = 0;
34360 new_list->violation = false;
34362 for (i = 0; i < MAX_INSN; i++)
34364 new_list->window[i].insn = NULL;
34365 new_list->window[i].group = disp_no_group;
34366 new_list->window[i].path = no_path;
34367 new_list->window[i].byte_len = 0;
34368 new_list->window[i].imm_bytes = 0;
34373 /* This function allocates and initializes a dispatch window and the
34374 list container holding a pointer to the window. */
34376 static dispatch_windows *
34377 allocate_window (void)
34379 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
34380 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
34385 /* This routine initializes the dispatch scheduling information. It
34386 initiates building dispatch scheduler tables and constructs the
34387 first dispatch window. */
34390 init_dispatch_sched (void)
34392 /* Allocate a dispatch list and a window. */
34393 dispatch_window_list = allocate_window ();
34394 dispatch_window_list1 = allocate_window ();
34399 /* This function returns true if a branch is detected. End of a basic block
34400 does not have to be a branch, but here we assume only branches end a
34404 is_end_basic_block (enum dispatch_group group)
34406 return group == disp_branch;
34409 /* This function is called when the end of a window processing is reached. */
34412 process_end_window (void)
34414 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
34415 if (dispatch_window_list->next)
34417 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
34418 gcc_assert (dispatch_window_list->window_size
34419 + dispatch_window_list1->window_size <= 48);
34425 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
34426 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
34427 for 48 bytes of instructions. Note that these windows are not dispatch
34428 windows that their sizes are DISPATCH_WINDOW_SIZE. */
34430 static dispatch_windows *
34431 allocate_next_window (int window_num)
34433 if (window_num == 0)
34435 if (dispatch_window_list->next)
34438 return dispatch_window_list;
34441 dispatch_window_list->next = dispatch_window_list1;
34442 dispatch_window_list1->prev = dispatch_window_list;
34444 return dispatch_window_list1;
34447 /* Increment the number of immediate operands of an instruction. */
34450 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
34455 switch ( GET_CODE (*in_rtx))
34460 (imm_values->imm)++;
34461 if (x86_64_immediate_operand (*in_rtx, SImode))
34462 (imm_values->imm32)++;
34464 (imm_values->imm64)++;
34468 (imm_values->imm)++;
34469 (imm_values->imm64)++;
34473 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
34475 (imm_values->imm)++;
34476 (imm_values->imm32)++;
34487 /* Compute number of immediate operands of an instruction. */
34490 find_constant (rtx in_rtx, imm_info *imm_values)
34492 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
34493 (rtx_function) find_constant_1, (void *) imm_values);
34496 /* Return total size of immediate operands of an instruction along with number
34497 of corresponding immediate-operands. It initializes its parameters to zero
34498 befor calling FIND_CONSTANT.
34499 INSN is the input instruction. IMM is the total of immediates.
34500 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34504 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
34506 imm_info imm_values = {0, 0, 0};
34508 find_constant (insn, &imm_values);
34509 *imm = imm_values.imm;
34510 *imm32 = imm_values.imm32;
34511 *imm64 = imm_values.imm64;
34512 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
34515 /* This function indicates if an operand of an instruction is an
34519 has_immediate (rtx insn)
34521 int num_imm_operand;
34522 int num_imm32_operand;
34523 int num_imm64_operand;
34526 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34527 &num_imm64_operand);
34531 /* Return single or double path for instructions. */
34533 static enum insn_path
34534 get_insn_path (rtx insn)
34536 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
34538 if ((int)path == 0)
34539 return path_single;
34541 if ((int)path == 1)
34542 return path_double;
34547 /* Return insn dispatch group. */
34549 static enum dispatch_group
34550 get_insn_group (rtx insn)
34552 enum dispatch_group group = get_mem_group (insn);
34556 if (is_branch (insn))
34557 return disp_branch;
34562 if (has_immediate (insn))
34565 if (is_prefetch (insn))
34566 return disp_prefetch;
34568 return disp_no_group;
34571 /* Count number of GROUP restricted instructions in a dispatch
34572 window WINDOW_LIST. */
34575 count_num_restricted (rtx insn, dispatch_windows *window_list)
34577 enum dispatch_group group = get_insn_group (insn);
34579 int num_imm_operand;
34580 int num_imm32_operand;
34581 int num_imm64_operand;
34583 if (group == disp_no_group)
34586 if (group == disp_imm)
34588 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34589 &num_imm64_operand);
34590 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
34591 || num_imm_operand + window_list->num_imm > MAX_IMM
34592 || (num_imm32_operand > 0
34593 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
34594 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
34595 || (num_imm64_operand > 0
34596 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
34597 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
34598 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
34599 && num_imm64_operand > 0
34600 && ((window_list->num_imm_64 > 0
34601 && window_list->num_insn >= 2)
34602 || window_list->num_insn >= 3)))
34608 if ((group == disp_load_store
34609 && (window_list->num_loads >= MAX_LOAD
34610 || window_list->num_stores >= MAX_STORE))
34611 || ((group == disp_load
34612 || group == disp_prefetch)
34613 && window_list->num_loads >= MAX_LOAD)
34614 || (group == disp_store
34615 && window_list->num_stores >= MAX_STORE))
34621 /* This function returns true if insn satisfies dispatch rules on the
34622 last window scheduled. */
34625 fits_dispatch_window (rtx insn)
34627 dispatch_windows *window_list = dispatch_window_list;
34628 dispatch_windows *window_list_next = dispatch_window_list->next;
34629 unsigned int num_restrict;
34630 enum dispatch_group group = get_insn_group (insn);
34631 enum insn_path path = get_insn_path (insn);
34634 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34635 instructions should be given the lowest priority in the
34636 scheduling process in Haifa scheduler to make sure they will be
34637 scheduled in the same dispatch window as the refrence to them. */
34638 if (group == disp_jcc || group == disp_cmp)
34641 /* Check nonrestricted. */
34642 if (group == disp_no_group || group == disp_branch)
34645 /* Get last dispatch window. */
34646 if (window_list_next)
34647 window_list = window_list_next;
34649 if (window_list->window_num == 1)
34651 sum = window_list->prev->window_size + window_list->window_size;
34654 || (min_insn_size (insn) + sum) >= 48)
34655 /* Window 1 is full. Go for next window. */
34659 num_restrict = count_num_restricted (insn, window_list);
34661 if (num_restrict > num_allowable_groups[group])
34664 /* See if it fits in the first window. */
34665 if (window_list->window_num == 0)
34667 /* The first widow should have only single and double path
34669 if (path == path_double
34670 && (window_list->num_uops + 2) > MAX_INSN)
34672 else if (path != path_single)
34678 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34679 dispatch window WINDOW_LIST. */
34682 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
34684 int byte_len = min_insn_size (insn);
34685 int num_insn = window_list->num_insn;
34687 sched_insn_info *window = window_list->window;
34688 enum dispatch_group group = get_insn_group (insn);
34689 enum insn_path path = get_insn_path (insn);
34690 int num_imm_operand;
34691 int num_imm32_operand;
34692 int num_imm64_operand;
34694 if (!window_list->violation && group != disp_cmp
34695 && !fits_dispatch_window (insn))
34696 window_list->violation = true;
34698 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34699 &num_imm64_operand);
34701 /* Initialize window with new instruction. */
34702 window[num_insn].insn = insn;
34703 window[num_insn].byte_len = byte_len;
34704 window[num_insn].group = group;
34705 window[num_insn].path = path;
34706 window[num_insn].imm_bytes = imm_size;
34708 window_list->window_size += byte_len;
34709 window_list->num_insn = num_insn + 1;
34710 window_list->num_uops = window_list->num_uops + num_uops;
34711 window_list->imm_size += imm_size;
34712 window_list->num_imm += num_imm_operand;
34713 window_list->num_imm_32 += num_imm32_operand;
34714 window_list->num_imm_64 += num_imm64_operand;
34716 if (group == disp_store)
34717 window_list->num_stores += 1;
34718 else if (group == disp_load
34719 || group == disp_prefetch)
34720 window_list->num_loads += 1;
34721 else if (group == disp_load_store)
34723 window_list->num_stores += 1;
34724 window_list->num_loads += 1;
34728 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34729 If the total bytes of instructions or the number of instructions in
34730 the window exceed allowable, it allocates a new window. */
34733 add_to_dispatch_window (rtx insn)
34736 dispatch_windows *window_list;
34737 dispatch_windows *next_list;
34738 dispatch_windows *window0_list;
34739 enum insn_path path;
34740 enum dispatch_group insn_group;
34748 if (INSN_CODE (insn) < 0)
34751 byte_len = min_insn_size (insn);
34752 window_list = dispatch_window_list;
34753 next_list = window_list->next;
34754 path = get_insn_path (insn);
34755 insn_group = get_insn_group (insn);
34757 /* Get the last dispatch window. */
34759 window_list = dispatch_window_list->next;
34761 if (path == path_single)
34763 else if (path == path_double)
34766 insn_num_uops = (int) path;
34768 /* If current window is full, get a new window.
34769 Window number zero is full, if MAX_INSN uops are scheduled in it.
34770 Window number one is full, if window zero's bytes plus window
34771 one's bytes is 32, or if the bytes of the new instruction added
34772 to the total makes it greater than 48, or it has already MAX_INSN
34773 instructions in it. */
34774 num_insn = window_list->num_insn;
34775 num_uops = window_list->num_uops;
34776 window_num = window_list->window_num;
34777 insn_fits = fits_dispatch_window (insn);
34779 if (num_insn >= MAX_INSN
34780 || num_uops + insn_num_uops > MAX_INSN
34783 window_num = ~window_num & 1;
34784 window_list = allocate_next_window (window_num);
34787 if (window_num == 0)
34789 add_insn_window (insn, window_list, insn_num_uops);
34790 if (window_list->num_insn >= MAX_INSN
34791 && insn_group == disp_branch)
34793 process_end_window ();
34797 else if (window_num == 1)
34799 window0_list = window_list->prev;
34800 sum = window0_list->window_size + window_list->window_size;
34802 || (byte_len + sum) >= 48)
34804 process_end_window ();
34805 window_list = dispatch_window_list;
34808 add_insn_window (insn, window_list, insn_num_uops);
34811 gcc_unreachable ();
34813 if (is_end_basic_block (insn_group))
34815 /* End of basic block is reached do end-basic-block process. */
34816 process_end_window ();
34821 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34823 DEBUG_FUNCTION static void
34824 debug_dispatch_window_file (FILE *file, int window_num)
34826 dispatch_windows *list;
34829 if (window_num == 0)
34830 list = dispatch_window_list;
34832 list = dispatch_window_list1;
34834 fprintf (file, "Window #%d:\n", list->window_num);
34835 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
34836 list->num_insn, list->num_uops, list->window_size);
34837 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34838 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
34840 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
34842 fprintf (file, " insn info:\n");
34844 for (i = 0; i < MAX_INSN; i++)
34846 if (!list->window[i].insn)
34848 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
34849 i, group_name[list->window[i].group],
34850 i, (void *)list->window[i].insn,
34851 i, list->window[i].path,
34852 i, list->window[i].byte_len,
34853 i, list->window[i].imm_bytes);
34857 /* Print to stdout a dispatch window. */
34859 DEBUG_FUNCTION void
34860 debug_dispatch_window (int window_num)
34862 debug_dispatch_window_file (stdout, window_num);
34865 /* Print INSN dispatch information to FILE. */
34867 DEBUG_FUNCTION static void
34868 debug_insn_dispatch_info_file (FILE *file, rtx insn)
34871 enum insn_path path;
34872 enum dispatch_group group;
34874 int num_imm_operand;
34875 int num_imm32_operand;
34876 int num_imm64_operand;
34878 if (INSN_CODE (insn) < 0)
34881 byte_len = min_insn_size (insn);
34882 path = get_insn_path (insn);
34883 group = get_insn_group (insn);
34884 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34885 &num_imm64_operand);
34887 fprintf (file, " insn info:\n");
34888 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
34889 group_name[group], path, byte_len);
34890 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34891 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
34894 /* Print to STDERR the status of the ready list with respect to
34895 dispatch windows. */
34897 DEBUG_FUNCTION void
34898 debug_ready_dispatch (void)
34901 int no_ready = number_in_ready ();
34903 fprintf (stdout, "Number of ready: %d\n", no_ready);
34905 for (i = 0; i < no_ready; i++)
34906 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
34909 /* This routine is the driver of the dispatch scheduler. */
34912 do_dispatch (rtx insn, int mode)
34914 if (mode == DISPATCH_INIT)
34915 init_dispatch_sched ();
34916 else if (mode == ADD_TO_DISPATCH_WINDOW)
34917 add_to_dispatch_window (insn);
34920 /* Return TRUE if Dispatch Scheduling is supported. */
34923 has_dispatch (rtx insn, int action)
34925 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
34931 case IS_DISPATCH_ON:
34936 return is_cmp (insn);
34938 case DISPATCH_VIOLATION:
34939 return dispatch_violation ();
34941 case FITS_DISPATCH_WINDOW:
34942 return fits_dispatch_window (insn);
34948 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
34949 place emms and femms instructions. */
34951 static enum machine_mode
34952 ix86_preferred_simd_mode (enum machine_mode mode)
34954 /* Disable double precision vectorizer if needed. */
34955 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
34958 if (!TARGET_AVX && !TARGET_SSE)
34964 return (TARGET_AVX && !flag_prefer_avx128) ? V8SFmode : V4SFmode;
34966 return (TARGET_AVX && !flag_prefer_avx128) ? V4DFmode : V2DFmode;
34982 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
34985 static unsigned int
34986 ix86_autovectorize_vector_sizes (void)
34988 return TARGET_AVX ? 32 | 16 : 0;
34991 /* Initialize the GCC target structure. */
34992 #undef TARGET_RETURN_IN_MEMORY
34993 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
34995 #undef TARGET_LEGITIMIZE_ADDRESS
34996 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
34998 #undef TARGET_ATTRIBUTE_TABLE
34999 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
35000 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35001 # undef TARGET_MERGE_DECL_ATTRIBUTES
35002 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
35005 #undef TARGET_COMP_TYPE_ATTRIBUTES
35006 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
35008 #undef TARGET_INIT_BUILTINS
35009 #define TARGET_INIT_BUILTINS ix86_init_builtins
35010 #undef TARGET_BUILTIN_DECL
35011 #define TARGET_BUILTIN_DECL ix86_builtin_decl
35012 #undef TARGET_EXPAND_BUILTIN
35013 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
35015 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
35016 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
35017 ix86_builtin_vectorized_function
35019 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
35020 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
35022 #undef TARGET_BUILTIN_RECIPROCAL
35023 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
35025 #undef TARGET_ASM_FUNCTION_EPILOGUE
35026 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
35028 #undef TARGET_ENCODE_SECTION_INFO
35029 #ifndef SUBTARGET_ENCODE_SECTION_INFO
35030 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
35032 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
35035 #undef TARGET_ASM_OPEN_PAREN
35036 #define TARGET_ASM_OPEN_PAREN ""
35037 #undef TARGET_ASM_CLOSE_PAREN
35038 #define TARGET_ASM_CLOSE_PAREN ""
35040 #undef TARGET_ASM_BYTE_OP
35041 #define TARGET_ASM_BYTE_OP ASM_BYTE
35043 #undef TARGET_ASM_ALIGNED_HI_OP
35044 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
35045 #undef TARGET_ASM_ALIGNED_SI_OP
35046 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
35048 #undef TARGET_ASM_ALIGNED_DI_OP
35049 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
35052 #undef TARGET_PROFILE_BEFORE_PROLOGUE
35053 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
35055 #undef TARGET_ASM_UNALIGNED_HI_OP
35056 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
35057 #undef TARGET_ASM_UNALIGNED_SI_OP
35058 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
35059 #undef TARGET_ASM_UNALIGNED_DI_OP
35060 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
35062 #undef TARGET_PRINT_OPERAND
35063 #define TARGET_PRINT_OPERAND ix86_print_operand
35064 #undef TARGET_PRINT_OPERAND_ADDRESS
35065 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
35066 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
35067 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
35068 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
35069 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
35071 #undef TARGET_SCHED_INIT_GLOBAL
35072 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
35073 #undef TARGET_SCHED_ADJUST_COST
35074 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
35075 #undef TARGET_SCHED_ISSUE_RATE
35076 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
35077 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
35078 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
35079 ia32_multipass_dfa_lookahead
35081 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
35082 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
35085 #undef TARGET_HAVE_TLS
35086 #define TARGET_HAVE_TLS true
35088 #undef TARGET_CANNOT_FORCE_CONST_MEM
35089 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
35090 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
35091 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
35093 #undef TARGET_DELEGITIMIZE_ADDRESS
35094 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
35096 #undef TARGET_MS_BITFIELD_LAYOUT_P
35097 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
35100 #undef TARGET_BINDS_LOCAL_P
35101 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
35103 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35104 #undef TARGET_BINDS_LOCAL_P
35105 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
35108 #undef TARGET_ASM_OUTPUT_MI_THUNK
35109 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
35110 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
35111 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
35113 #undef TARGET_ASM_FILE_START
35114 #define TARGET_ASM_FILE_START x86_file_start
35116 #undef TARGET_DEFAULT_TARGET_FLAGS
35117 #define TARGET_DEFAULT_TARGET_FLAGS \
35119 | TARGET_SUBTARGET_DEFAULT \
35120 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
35122 #undef TARGET_HANDLE_OPTION
35123 #define TARGET_HANDLE_OPTION ix86_handle_option
35125 #undef TARGET_OPTION_OVERRIDE
35126 #define TARGET_OPTION_OVERRIDE ix86_option_override
35127 #undef TARGET_OPTION_OPTIMIZATION_TABLE
35128 #define TARGET_OPTION_OPTIMIZATION_TABLE ix86_option_optimization_table
35129 #undef TARGET_OPTION_INIT_STRUCT
35130 #define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
35132 #undef TARGET_REGISTER_MOVE_COST
35133 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
35134 #undef TARGET_MEMORY_MOVE_COST
35135 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
35136 #undef TARGET_RTX_COSTS
35137 #define TARGET_RTX_COSTS ix86_rtx_costs
35138 #undef TARGET_ADDRESS_COST
35139 #define TARGET_ADDRESS_COST ix86_address_cost
35141 #undef TARGET_FIXED_CONDITION_CODE_REGS
35142 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
35143 #undef TARGET_CC_MODES_COMPATIBLE
35144 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
35146 #undef TARGET_MACHINE_DEPENDENT_REORG
35147 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
35149 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
35150 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
35152 #undef TARGET_BUILD_BUILTIN_VA_LIST
35153 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
35155 #undef TARGET_ENUM_VA_LIST_P
35156 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
35158 #undef TARGET_FN_ABI_VA_LIST
35159 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
35161 #undef TARGET_CANONICAL_VA_LIST_TYPE
35162 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
35164 #undef TARGET_EXPAND_BUILTIN_VA_START
35165 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
35167 #undef TARGET_MD_ASM_CLOBBERS
35168 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
35170 #undef TARGET_PROMOTE_PROTOTYPES
35171 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
35172 #undef TARGET_STRUCT_VALUE_RTX
35173 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
35174 #undef TARGET_SETUP_INCOMING_VARARGS
35175 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
35176 #undef TARGET_MUST_PASS_IN_STACK
35177 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
35178 #undef TARGET_FUNCTION_ARG_ADVANCE
35179 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
35180 #undef TARGET_FUNCTION_ARG
35181 #define TARGET_FUNCTION_ARG ix86_function_arg
35182 #undef TARGET_FUNCTION_ARG_BOUNDARY
35183 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
35184 #undef TARGET_PASS_BY_REFERENCE
35185 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
35186 #undef TARGET_INTERNAL_ARG_POINTER
35187 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
35188 #undef TARGET_UPDATE_STACK_BOUNDARY
35189 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
35190 #undef TARGET_GET_DRAP_RTX
35191 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
35192 #undef TARGET_STRICT_ARGUMENT_NAMING
35193 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
35194 #undef TARGET_STATIC_CHAIN
35195 #define TARGET_STATIC_CHAIN ix86_static_chain
35196 #undef TARGET_TRAMPOLINE_INIT
35197 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
35198 #undef TARGET_RETURN_POPS_ARGS
35199 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
35201 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
35202 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
35204 #undef TARGET_SCALAR_MODE_SUPPORTED_P
35205 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
35207 #undef TARGET_VECTOR_MODE_SUPPORTED_P
35208 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
35210 #undef TARGET_C_MODE_FOR_SUFFIX
35211 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
35214 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
35215 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
35218 #ifdef SUBTARGET_INSERT_ATTRIBUTES
35219 #undef TARGET_INSERT_ATTRIBUTES
35220 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
35223 #undef TARGET_MANGLE_TYPE
35224 #define TARGET_MANGLE_TYPE ix86_mangle_type
35226 #undef TARGET_STACK_PROTECT_FAIL
35227 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
35229 #undef TARGET_SUPPORTS_SPLIT_STACK
35230 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
35232 #undef TARGET_FUNCTION_VALUE
35233 #define TARGET_FUNCTION_VALUE ix86_function_value
35235 #undef TARGET_FUNCTION_VALUE_REGNO_P
35236 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
35238 #undef TARGET_SECONDARY_RELOAD
35239 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
35241 #undef TARGET_PREFERRED_RELOAD_CLASS
35242 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
35243 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
35244 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
35245 #undef TARGET_CLASS_LIKELY_SPILLED_P
35246 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
35248 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
35249 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
35250 ix86_builtin_vectorization_cost
35251 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
35252 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
35253 ix86_vectorize_builtin_vec_perm
35254 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
35255 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
35256 ix86_vectorize_builtin_vec_perm_ok
35257 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
35258 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
35259 ix86_preferred_simd_mode
35260 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
35261 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
35262 ix86_autovectorize_vector_sizes
35264 #undef TARGET_SET_CURRENT_FUNCTION
35265 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
35267 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
35268 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
35270 #undef TARGET_OPTION_SAVE
35271 #define TARGET_OPTION_SAVE ix86_function_specific_save
35273 #undef TARGET_OPTION_RESTORE
35274 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
35276 #undef TARGET_OPTION_PRINT
35277 #define TARGET_OPTION_PRINT ix86_function_specific_print
35279 #undef TARGET_CAN_INLINE_P
35280 #define TARGET_CAN_INLINE_P ix86_can_inline_p
35282 #undef TARGET_EXPAND_TO_RTL_HOOK
35283 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
35285 #undef TARGET_LEGITIMATE_ADDRESS_P
35286 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
35288 #undef TARGET_IRA_COVER_CLASSES
35289 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
35291 #undef TARGET_FRAME_POINTER_REQUIRED
35292 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
35294 #undef TARGET_CAN_ELIMINATE
35295 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
35297 #undef TARGET_EXTRA_LIVE_ON_ENTRY
35298 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
35300 #undef TARGET_ASM_CODE_END
35301 #define TARGET_ASM_CODE_END ix86_code_end
35303 #undef TARGET_CONDITIONAL_REGISTER_USAGE
35304 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
35307 #undef TARGET_INIT_LIBFUNCS
35308 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
35311 struct gcc_target targetm = TARGET_INITIALIZER;
35313 #include "gt-i386.h"